Ejemplo n.º 1
0
 def wrapped(x, **param_values) -> Kernel:
     final_params = {}
     for p_name, default_value in param_defaults.items():
         override = param_values.get(p_name)
         if override is not None:
             final_params[p_name] = ensure_parametrized(override,
                                                        constant=True)
         else:
             final_params[p_name] = ensure_parametrized(default_value)
     return Kernel(f, x, fname=f.__name__, **final_params)
Ejemplo n.º 2
0
def exponential_linear_regression(x: Union[pd.DataFrame, np.ndarray],
                                  add_intercept=False,
                                  **constraints) -> Kernel:
    """Computes a trend as the exponential of a linear sum of the columns in the data.

    :param x: the number of dimensions or the data the kernel will be computed on. There will be one parameter for each column.
    :param add_intercept: if True, an intercept is added to the result
    :param constraints: fixed values for the parameters of the regression. The constraints are given as `beta_i=value`,
                        where i is the index of the column starting with 1.
                        If data is given as a dataframe with the second column named 'cname', the following constraints are equivalent:
                        'beta_2=2', 'beta_cname=2', 'cname=2'.
                        'beta_0' constrains the value of the intercept if add_intercept is True.
    """
    if len(x.shape) > 1:
        ndim = x.shape[1]
    else:
        raise ValueError(
            "Consider using kernels.expo for a 1-dimensional data array")
    fixed = {}
    for p_name, p_value in constraints.items():
        if p_name.startswith("beta_"):
            p_name = p_name[len("beta_"):]
        if isinstance(x, pd.DataFrame) and p_name in x.columns:
            index = tuple(x.columns).index(p_name) + 1
        else:
            index = int(p_name)
        fixed[index] = ensure_parametrized(p_value, constant=True)

    if 0 in fixed and not add_intercept:
        raise ValueError(
            "A fixed value is given for the intercept, but `add_intercept` is not True."
        )

    param_indices = range(0 if add_intercept else 1, ndim + 1)
    params = {
        f"beta_{i}": Parameter() if i not in fixed else fixed[i]
        for i in param_indices
    }

    def _compute(data, **params_from_wrapper):
        sorted_params = [params_from_wrapper[k] for k in params]
        return np.exp((sorted_params * data).sum(axis=1))

    return Kernel(_compute,
                  x,
                  **params,
                  fname=exponential_linear_regression.__qualname__)
Ejemplo n.º 3
0
def categories_qualitative(x: Collection, fixed_values: dict = None) -> Kernel:
    unique_values = sorted(set(map(str, x)))
    fixed_values = {str(k): v for k, v in (fixed_values or {}).items()}
    parameter = (Parameter() for _ in count())  # generate parameters on demand
    params = {
        value: next(parameter) if value not in fixed_values else
        ensure_parametrized(fixed_values[value], constant=True)
        for value in unique_values
    }

    def _compute(data, **params_from_wrapper):
        return type(data)(list(map(lambda v: params_from_wrapper[str(v)],
                                   data)))

    return Kernel(_compute,
                  x,
                  **params,
                  fname=categories_qualitative.__qualname__)
Ejemplo n.º 4
0
def polynomial_regression(
    x: Union[pd.DataFrame, np.ndarray],
    degree: Union[int, Sequence] = 2,
    **constraints,
) -> Kernel:
    """Computes a trend as the sum of the columns in the data to the power of n for n smaller or equal to degree.

    :param x: the number of dimensions or the data the kernel will be computed on. There will be one parameter for each column.
    :param degree: last exponent computed for the given covariates. Can be a list or np array, but if this is the case, the number of
    exponents should be equal to the number of columns of x.
    :param constraints: fixed values for the parameters of the regression. The following constraints are equivalent:
                        'beta_2_2=2', 'beta_cname_2=2', 'cname_2=2'
                        The last two are valid only if data is given as a dataframe with the second column named 'cname'.
    """
    if len(x.shape) > 1:
        ndim = x.shape[1]
    else:
        raise ValueError(
            "Consider using kernels.linear for a 1-dimensional data array")
    if isinstance(degree, int):
        assert degree > 0, "This model considers positive power laws only."
        degree = [degree] * ndim
    else:
        assert (
            len(degree) == ndim
        ), "The number of degrees is different than the number of covariates."
    ncols = sum(degree)
    fixed = {}
    for p_name, p_value in constraints.items():
        if p_name.startswith("beta_"):
            p_name = p_name[len("beta_"):]
        match = re.match(r"^(.+)_(\d+)$", p_name)
        if match:
            column, deg = match.groups()
        else:
            raise ValueError(f"Unable to parse parameter constraint: {p_name}")
        if isinstance(x, pd.DataFrame) and column in x.columns:
            column = list(x.columns).index(column) + 1
        fixed[(int(column), int(deg))] = ensure_parametrized(p_value,
                                                             constant=True)
    params = {}
    for col_idx, max_degree in enumerate(degree):
        for d in range(1, max_degree + 1):
            name = f"beta_{col_idx+1}_{d}"
            params[name] = fixed.get((col_idx + 1, d), Parameter())

    def _compute(data, **params_from_wrapper):
        data = np.array(data)
        data_with_extra_cols = np.zeros(shape=(len(data), ncols))
        extra_col_idx = 0
        for col_idx, max_degree in enumerate(degree):
            for d in range(1, max_degree + 1):
                data_with_extra_cols[:, extra_col_idx] = data[:, col_idx]**d
                extra_col_idx += 1
        sorted_params = [params_from_wrapper[k] for k in params]
        return (sorted_params * data_with_extra_cols).sum(axis=1)

    return Kernel(_compute,
                  x,
                  **params,
                  fname=polynomial_regression.__qualname__)