Exemple #1
0
 def test_score(self, gpm):
     model = EpistasisLinearRegression(order=self.order, model_type="local")
     model.add_gpm(gpm)
     model.fit()
     score = model.score()
     # Tests
     assert score >= 0
     assert score <= 1
Exemple #2
0
 def test_hypothesis(self, gpm):
     model = EpistasisLinearRegression(order=self.order, model_type="local")
     model.add_gpm(gpm)
     model.fit()
     # Checks
     check1 = model.hypothesis(thetas=model.coef_)
     # Tests
     np.testing.assert_almost_equal(sorted(check1),
                                    sorted(model.gpm.phenotype))
Exemple #3
0
    def test_predict(self, gpm):
        model = EpistasisLinearRegression(order=self.order, model_type="local")
        model.add_gpm(gpm)
        model.fit()
        check1 = model.predict()

        # Tests
        np.testing.assert_almost_equal(sorted(check1),
                                       sorted(model.gpm.phenotype))
Exemple #4
0
    def test_init(self, gpm):
        model = EpistasisLinearRegression(order=self.order, model_type="local")
        model.add_gpm(gpm)

        # Checks
        check1 = model.order
        check2 = model.model_type
        assert check1 == self.order
        assert check2 == "local"
Exemple #5
0
    def test_fit(self, gpm):
        model = EpistasisLinearRegression(order=self.order, model_type="local")
        model.add_gpm(gpm)
        model.fit()
        # Checks

        check2 = hasattr(model, "coef_")
        check3 = hasattr(model, "epistasis")

        # Tests

        assert check2 is True
        assert check3 is True
Exemple #6
0
class EpistasisClassifierMixin:
    """A Mixin class for epistasis classifiers
    """
    def _fit_additive(self, X=None, y=None):
        # Construct an additive model.
        self.Additive = EpistasisLinearRegression(
            order=1, model_type=self.model_type)

        self.Additive.add_gpm(self.gpm)

        # Prepare a high-order model
        self.Additive.epistasis = EpistasisMap(
            sites=self.Additive.Xcolumns,
        )

        # Fit the additive model and infer additive phenotypes
        self.Additive.fit(X=X, y=y)
        return self

    def _fit_classifier(self, X=None, y=None):
        # This method builds x and y from data.
        add_coefs = self.Additive.epistasis.values
        add_X = self.Additive._X(data=X)

        # Project X into padd space.
        X = add_X * add_coefs

        # Label X.
        y = binarize(y.reshape(1, -1), self.threshold)[0]
        self.classes = y

        # Fit classifier.
        super().fit(X=X, y=y)
        return self

    def fit_transform(self, X=None, y=None, **kwargs):
        self.fit(X=X, y=y, **kwargs)
        ypred = self.predict(X=X)

        # Transform map.
        gpm = GenotypePhenotypeMap.read_dataframe(
            dataframe=self.gpm.data[ypred==1],
            wildtype=self.gpm.wildtype,
            mutations=self.gpm.mutations
        )
        return gpm

    def predict(self, X=None):
        Xadd = self.Additive._X(data=X)
        X = Xadd * self.Additive.epistasis.values
        return super().predict(X=X)

    def predict_transform(self, X=None, y=None):
        x = self.predict(X=X)
        y[x <= 0.5] = self.threshold
        return y

    def predict_log_proba(self, X=None):
        Xadd = self.Additive._X(data=X)
        X = Xadd * self.Additive.epistasis.values
        return super().predict_log_proba(X)

    def predict_proba(self, X=None):
        Xadd = self.Additive._X(data=X)
        X = Xadd * self.Additive.epistasis.values
        return super().predict_proba(X=X)
Exemple #7
0
class EpistasisNonlinearRegression(BaseModel):
    """Use nonlinear least-squares regression to estimate epistatic coefficients
    and nonlinear scale in a nonlinear genotype-phenotype map.

    This models has two steps:
        1. Fit an additive, linear regression to approximate the average effect
        of individual mutations.
        2. Fit the nonlinear function to the observed phenotypes vs. the
        additive phenotypes estimated in step 1.

    Methods are described in the following publication:
        Sailer, Z. R. & Harms, M. J. 'Detecting High-Order Epistasis in
        Nonlinear Genotype-Phenotype Maps'. Genetics 205, 1079-1088 (2017).

    Parameters
    ----------
    function : callable
        Nonlinear function between Pobs and Padd

    model_type : str (default: global)
        type of epistasis model to use. See paper above for more information.

    Keyword Arguments
    -----------------
    Keyword arguments are interpreted as intial guesses for the nonlinear
    function parameters. Must have the same name as parameters in the
    nonlinear function.

    Attributes
    ----------
    Additive : EpistasisLinearRegression
        Linear regression object for fitting additive model

    parameters : Parameters object
        Mapping object for nonlinear coefficients

    minimizer :
        Object that fits data using the function and a least squares minimization.
    """
    def __init__(self,
                 function,
                 model_type="global",
                 **p0):

        # Set up the function for fitting.
        self.function = function
        self.minimizer = FunctionMinimizer(self.function, **p0)
        self.parameters = self.minimizer.parameters
        self.order = 1
        self.Xbuilt = {}

        # Construct parameters object
        self.set_params(model_type=model_type)

        # Store model specs.
        self.model_specs = dict(
            function=self.function,
            model_type=self.model_type,
            **p0)

        # Set up additive and high-order linear model
        self.Additive = EpistasisLinearRegression(
            order=1, model_type=self.model_type)

    def add_gpm(self, gpm):
        super(EpistasisNonlinearRegression, self).add_gpm(gpm)
        # Add gpm to other models.
        self.Additive.add_gpm(gpm)
        return self

    @property
    def thetas(self):
        return np.concatenate((list(self.parameters.values()),
                               self.Additive.coef_))

    @property
    def num_of_params(self):
        n = 0
        n += len(self.parameters) + len(self.Additive.coef_)
        return n

    @arghandler
    def transform(self, X=None, y=None):
        # Use a first order matrix only.
        if type(X) == np.ndarray or type(X) == pd.DataFrame:
            Xadd = X[:, :self.Additive.epistasis.n]
        else:
            Xadd = X

        # Predict additive model.
        x = self.Additive.predict(X=Xadd)

        # Transform y onto x scale
        return self.minimizer.transform(x, y)

    def fit(self,
            X=None,
            y=None,
            **kwargs):
        # Fit linear portion
        self._fit_additive(X=X, y=y)

        # Step 2: fit nonlinear function
        self._fit_nonlinear(X=X, y=y, **kwargs)
        return self

    def _fit_additive(self, X=None, y=None, **kwargs):
        # Fit with an additive model
        self.Additive.epistasis = EpistasisMap(
            sites=self.Additive.Xcolumns,
        )

        # Use a first order matrix only.
        if type(X) == np.ndarray or type(X) == pd.DataFrame:
            Xadd = X[:, :self.Additive.epistasis.n]
        else:
            Xadd = X

        # Fit Additive model
        self.Additive.fit(X=Xadd, y=y)
        self.Additive.epistasis.values = self.Additive.coef_
        return self

    @arghandler
    def _fit_nonlinear(self, X=None, y=None, **kwargs):
        """Estimate the scale of multiple mutations in a genotype-phenotype
        map."""
        # Use a first order matrix only.
        if type(X) == np.ndarray or type(X) == pd.DataFrame:
            Xadd = X[:, :self.Additive.epistasis.n]
        else:
            Xadd = X

        # Predict additive phenotypes.
        x = self.Additive.predict(X='fit')

        # Fit function
        self.minimizer.fit(x, y)
        self.parameters = self.minimizer.parameters

    @arghandler
    def fit_transform(self, X=None, y=None, **kwargs):
        self.fit(X=X, y=y, **kwargs)

        linear_phenotypes = self.transform(X=X, y=y)

        # Transform map.
        gpm = GenotypePhenotypeMap.read_dataframe(
            dataframe=self.gpm.data,
            wildtype=self.gpm.wildtype,
            mutations=self.gpm.mutations
        )

        gpm.data['phenotypes'] = linear_phenotypes
        return gpm

    def predict(self, X=None):
        x = self.Additive.predict(X=X)
        y = self.minimizer.predict(x)
        return y

    def predict_transform(self, X=None, y=None):
        if y is None:
            x = self.Additive.predict(X=X)
        else:
            x = y
        return self.minimizer.predict(x)

    @arghandler
    def hypothesis(self, X=None, thetas=None):
        # ----------------------------------------------------------------------
        # Part 0: Break up thetas
        # ----------------------------------------------------------------------
        i, j = len(self.parameters.valuesdict()), self.Additive.epistasis.n
        parameters = thetas[:i]
        epistasis = thetas[i:i + j]

        # Part 1: Linear portion
        x = np.dot(X, epistasis)

        # Part 2: Nonlinear portion
        ynonlin = self.minimizer.function(x, *parameters)

        return ynonlin

    def hypothesis_transform(self, X=None, y=None, thetas=None):
        # Break up thetas
        i, j = len(self.parameters.valuesdict()), self.Additive.epistasis.n
        parameters = thetas[:i]
        epistasis = thetas[i:i + j]

        if y is None:
            x = self.Additive.hypothesis(X=X, thetas=epistasis)
        else:
            x = y
        y_transform = self.minimizer.function(x, *parameters)
        return y_transform

    @arghandler
    def score(self, X=None, y=None):
        x = self.Additive.predict(X=X)
        ypred = self.minimizer.predict(x)
        return pearson(y, ypred)**2

    @arghandler
    def lnlike_of_data(self, X=None, y=None, yerr=None, thetas=None):
        # Calculate ymodel
        ymodel = self.hypothesis(X=X, thetas=thetas)

        # Likelihood of data given model
        return (- 0.5 * np.log(2 * np.pi * yerr**2) -
                (0.5 * ((y - ymodel)**2 / yerr**2)))

    @arghandler
    def lnlike_transform(
            self,
            X=None,
            y=None,
            yerr=None,
            lnprior=None,
            thetas=None):

        # Update likelihood.
        lnlike = self.lnlike_of_data(X=X, y=y, yerr=yerr, thetas=thetas)
        return lnlike + lnprior