def test_lnlikelihood(self, gpm): model = EpistasisLinearRegression(order=self.order, model_type="local") model.add_gpm(gpm) model.fit() # Calculate lnlikelihood lnlike = model.lnlikelihood() assert lnlike.dtype == float
def test_score(self, gpm): model = EpistasisLinearRegression(order=self.order, model_type="local") model.add_gpm(gpm) model.fit() score = model.score() # Tests assert score >= 0 assert score <= 1
def test_hypothesis(self, gpm): model = EpistasisLinearRegression(order=self.order, model_type="local") model.add_gpm(gpm) model.fit() # Checks check1 = model.hypothesis(thetas=model.coef_) # Tests np.testing.assert_almost_equal(sorted(check1), sorted(model.gpm.phenotype))
def test_predict(self, gpm): model = EpistasisLinearRegression(order=self.order, model_type="local") model.add_gpm(gpm) model.fit() check1 = model.predict() # Tests np.testing.assert_almost_equal(sorted(check1), sorted(model.gpm.phenotype))
def test_fit(self, gpm): model = EpistasisLinearRegression(order=self.order, model_type="local") model.add_gpm(gpm) model.fit() # Checks check2 = hasattr(model, "coef_") check3 = hasattr(model, "epistasis") # Tests assert check2 is True assert check3 is True
class EpistasisClassifierMixin: """A Mixin class for epistasis classifiers """ def _fit_additive(self, X=None, y=None): # Construct an additive model. self.Additive = EpistasisLinearRegression( order=1, model_type=self.model_type) self.Additive.add_gpm(self.gpm) # Prepare a high-order model self.Additive.epistasis = EpistasisMap( sites=self.Additive.Xcolumns, ) # Fit the additive model and infer additive phenotypes self.Additive.fit(X=X, y=y) return self def _fit_classifier(self, X=None, y=None): # This method builds x and y from data. add_coefs = self.Additive.epistasis.values add_X = self.Additive._X(data=X) # Project X into padd space. X = add_X * add_coefs # Label X. y = binarize(y.reshape(1, -1), self.threshold)[0] self.classes = y # Fit classifier. super().fit(X=X, y=y) return self def fit_transform(self, X=None, y=None, **kwargs): self.fit(X=X, y=y, **kwargs) ypred = self.predict(X=X) # Transform map. gpm = GenotypePhenotypeMap.read_dataframe( dataframe=self.gpm.data[ypred==1], wildtype=self.gpm.wildtype, mutations=self.gpm.mutations ) return gpm def predict(self, X=None): Xadd = self.Additive._X(data=X) X = Xadd * self.Additive.epistasis.values return super().predict(X=X) def predict_transform(self, X=None, y=None): x = self.predict(X=X) y[x <= 0.5] = self.threshold return y def predict_log_proba(self, X=None): Xadd = self.Additive._X(data=X) X = Xadd * self.Additive.epistasis.values return super().predict_log_proba(X) def predict_proba(self, X=None): Xadd = self.Additive._X(data=X) X = Xadd * self.Additive.epistasis.values return super().predict_proba(X=X)
class EpistasisNonlinearRegression(BaseModel): """Use nonlinear least-squares regression to estimate epistatic coefficients and nonlinear scale in a nonlinear genotype-phenotype map. This models has two steps: 1. Fit an additive, linear regression to approximate the average effect of individual mutations. 2. Fit the nonlinear function to the observed phenotypes vs. the additive phenotypes estimated in step 1. Methods are described in the following publication: Sailer, Z. R. & Harms, M. J. 'Detecting High-Order Epistasis in Nonlinear Genotype-Phenotype Maps'. Genetics 205, 1079-1088 (2017). Parameters ---------- function : callable Nonlinear function between Pobs and Padd model_type : str (default: global) type of epistasis model to use. See paper above for more information. Keyword Arguments ----------------- Keyword arguments are interpreted as intial guesses for the nonlinear function parameters. Must have the same name as parameters in the nonlinear function. Attributes ---------- Additive : EpistasisLinearRegression Linear regression object for fitting additive model parameters : Parameters object Mapping object for nonlinear coefficients minimizer : Object that fits data using the function and a least squares minimization. """ def __init__(self, function, model_type="global", **p0): # Set up the function for fitting. self.function = function self.minimizer = FunctionMinimizer(self.function, **p0) self.parameters = self.minimizer.parameters self.order = 1 self.Xbuilt = {} # Construct parameters object self.set_params(model_type=model_type) # Store model specs. self.model_specs = dict( function=self.function, model_type=self.model_type, **p0) # Set up additive and high-order linear model self.Additive = EpistasisLinearRegression( order=1, model_type=self.model_type) def add_gpm(self, gpm): super(EpistasisNonlinearRegression, self).add_gpm(gpm) # Add gpm to other models. self.Additive.add_gpm(gpm) return self @property def thetas(self): return np.concatenate((list(self.parameters.values()), self.Additive.coef_)) @property def num_of_params(self): n = 0 n += len(self.parameters) + len(self.Additive.coef_) return n @arghandler def transform(self, X=None, y=None): # Use a first order matrix only. if type(X) == np.ndarray or type(X) == pd.DataFrame: Xadd = X[:, :self.Additive.epistasis.n] else: Xadd = X # Predict additive model. x = self.Additive.predict(X=Xadd) # Transform y onto x scale return self.minimizer.transform(x, y) def fit(self, X=None, y=None, **kwargs): # Fit linear portion self._fit_additive(X=X, y=y) # Step 2: fit nonlinear function self._fit_nonlinear(X=X, y=y, **kwargs) return self def _fit_additive(self, X=None, y=None, **kwargs): # Fit with an additive model self.Additive.epistasis = EpistasisMap( sites=self.Additive.Xcolumns, ) # Use a first order matrix only. if type(X) == np.ndarray or type(X) == pd.DataFrame: Xadd = X[:, :self.Additive.epistasis.n] else: Xadd = X # Fit Additive model self.Additive.fit(X=Xadd, y=y) self.Additive.epistasis.values = self.Additive.coef_ return self @arghandler def _fit_nonlinear(self, X=None, y=None, **kwargs): """Estimate the scale of multiple mutations in a genotype-phenotype map.""" # Use a first order matrix only. if type(X) == np.ndarray or type(X) == pd.DataFrame: Xadd = X[:, :self.Additive.epistasis.n] else: Xadd = X # Predict additive phenotypes. x = self.Additive.predict(X='fit') # Fit function self.minimizer.fit(x, y) self.parameters = self.minimizer.parameters @arghandler def fit_transform(self, X=None, y=None, **kwargs): self.fit(X=X, y=y, **kwargs) linear_phenotypes = self.transform(X=X, y=y) # Transform map. gpm = GenotypePhenotypeMap.read_dataframe( dataframe=self.gpm.data, wildtype=self.gpm.wildtype, mutations=self.gpm.mutations ) gpm.data['phenotypes'] = linear_phenotypes return gpm def predict(self, X=None): x = self.Additive.predict(X=X) y = self.minimizer.predict(x) return y def predict_transform(self, X=None, y=None): if y is None: x = self.Additive.predict(X=X) else: x = y return self.minimizer.predict(x) @arghandler def hypothesis(self, X=None, thetas=None): # ---------------------------------------------------------------------- # Part 0: Break up thetas # ---------------------------------------------------------------------- i, j = len(self.parameters.valuesdict()), self.Additive.epistasis.n parameters = thetas[:i] epistasis = thetas[i:i + j] # Part 1: Linear portion x = np.dot(X, epistasis) # Part 2: Nonlinear portion ynonlin = self.minimizer.function(x, *parameters) return ynonlin def hypothesis_transform(self, X=None, y=None, thetas=None): # Break up thetas i, j = len(self.parameters.valuesdict()), self.Additive.epistasis.n parameters = thetas[:i] epistasis = thetas[i:i + j] if y is None: x = self.Additive.hypothesis(X=X, thetas=epistasis) else: x = y y_transform = self.minimizer.function(x, *parameters) return y_transform @arghandler def score(self, X=None, y=None): x = self.Additive.predict(X=X) ypred = self.minimizer.predict(x) return pearson(y, ypred)**2 @arghandler def lnlike_of_data(self, X=None, y=None, yerr=None, thetas=None): # Calculate ymodel ymodel = self.hypothesis(X=X, thetas=thetas) # Likelihood of data given model return (- 0.5 * np.log(2 * np.pi * yerr**2) - (0.5 * ((y - ymodel)**2 / yerr**2))) @arghandler def lnlike_transform( self, X=None, y=None, yerr=None, lnprior=None, thetas=None): # Update likelihood. lnlike = self.lnlike_of_data(X=X, y=y, yerr=yerr, thetas=thetas) return lnlike + lnprior