class Base(PresencePipe): """Linear regression classifiers with elastic net regularization. The `Base` model takes a symmetric sample over `l1_ratio` regardless of the performance of the classifier at each value of this parameter in order for us to gain a better understanding of how mixing the LASSO and ridge regularization penalties affects classifier behaviour. Models that implement tuning grids that favour better-performing values of `l1_ratio` are implemented below. """ tune_priors = ( ('fit__alpha', tuple(10**np.linspace(-4, -1 / 3, 12))), ('fit__l1_ratio', (0.25, 0.5, 0.75)), ) feat_inst = SelectMeanVar(mean_perc=95, var_perc=95) norm_inst = StandardScaler() fit_inst = SGDClassifier(loss='log', penalty='elasticnet', max_iter=1000, class_weight='balanced') def __init__(self): super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst), ('fit', self.fit_inst)])
class SVCrbf(Base, Kernel): feat_inst = SelectMeanVar(mean_perc=90, var_perc=100) tune_priors = ( ('fit__C', tuple(10 ** np.linspace(-3, 4, 8))), ) test_count = 8 fit_inst = SVC(kernel='rbf', gamma='scale', probability=True, cache_size=500, class_weight='balanced')
class Forests(Base, Trees): feat_inst = SelectMeanVar(mean_perc=90, var_perc=100) tune_priors = ( ('fit__min_samples_leaf', (1, 2, 3, 4, 6, 8, 10, 15)), ) test_count = 8 fit_inst = RandomForestClassifier(n_estimators=5000, class_weight='balanced')
class Ridge(Base, LinearPipe): feat_inst = SelectMeanVar(mean_perc=90, var_perc=100) tune_priors = ( ('fit__C', tuple(10 ** np.linspace(-7, 0, 8))), ) test_count = 8 fit_inst = LogisticRegression(solver='liblinear', penalty='l2', max_iter=200, class_weight='balanced')
class StanPipe(PresencePipe): tune_priors = (('fit__alpha', tuple(10**np.linspace(-3, 0.68, 24))), ) feat_inst = SelectMeanVar(mean_perc=75) norm_inst = StandardScaler() fit_inst = OptimModel(model_code=gauss_model) def __init__(self): super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst), ('fit', self.fit_inst)])
class Base(MultiPipe, PresencePipe): tune_priors = ( ('fit__margin', (0.4, 0.6, 0.8, 1.0, 1.2, 1.4)), ('fit__sigma_h', (0.04, 0.08, 0.1, 0.12, 0.16, 0.24)), ) feat_inst = SelectMeanVar(mean_perc=95, var_perc=95) norm_inst = StandardScaler() fit_inst = SingleDomain(latent_features=5, max_iter=500, stop_tol=0.05) def __init__(self): super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst), ('fit', self.fit_inst)])
class Base(PresencePipe): tune_priors = ( ('fit__max_depth', (2, 3, 4, 5)), ('fit__min_samples_split', tuple(np.linspace(0.003, 0.051, 9))), ) feat_inst = SelectMeanVar(mean_perc=95, var_perc=95) norm_inst = StandardScaler() fit_inst = GradientBoostingClassifier() def __init__(self): super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst), ('fit', self.fit_inst)])
class Base(PresencePipe): tune_priors = ( ('fit__alpha', tuple(10**np.linspace(-4, -2.35, 12))), ('fit__gamma', (0.5, 1., 2.)), ) feat_inst = SelectMeanVar(mean_perc=95, var_perc=95) norm_inst = StandardScaler() fit_inst = OptimModel(model_code=base_model) def __init__(self): super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst), ('fit', self.fit_inst)])
class MultiTransfer(TransferPipe, PresencePipe): tune_priors = ( ('fit__margin', (2. / 3, 24. / 23)), ('fit__sigma_h', (1. / 11, 1. / 7)), ) feat_inst = SelectMeanVar(mean_perc=80, var_perc=90) norm_inst = StandardScaler() fit_inst = MultiDomain(latent_features=3, max_iter=50) def __init__(self): super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst), ('fit', self.fit_inst)])
class Base(PresencePipe): tune_priors = ( ('fit__max_features', tuple(10**np.linspace(-3, -2 / 3, 6))), ('fit__min_samples_leaf', (1, 2, 3, 4, 6, 8)), ) feat_inst = SelectMeanVar(mean_perc=95, var_perc=95) norm_inst = StandardScaler() fit_inst = RandomForestClassifier(n_estimators=500, class_weight='balanced') def __init__(self): super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst), ('fit', self.fit_inst)])
class Base(OmicPipe): """An abstract class for the set of standard transformers. The transformers in this module are designed to use all available -omic features save those with very low expression in order to get the fullest possible picture of the features that can be used to cluster a given task. """ feat_inst = SelectMeanVar(mean_perc=90, var_perc=100) norm_inst = StandardScaler() def __init__(self): super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst), ('fit', self.fit_inst)])
class Meanvar(Base): """Ridge regression with tuning over feature selection thresholds. We fix a value of `C` that tends to work well across a wide variety of mutation prediction tasks, and then tune over different filter cutoffs for removing expression features with low mean or variance. """ tune_priors = ( ('feat__mean_perc', (100. / 3, 50, 75, 90, 98, 100)), ('feat__var_perc', (100. / 3, 50, 75, 90, 98, 100)), ) feat_inst = SelectMeanVar() fit_inst = LogisticRegression(C=0.002, penalty='l2', class_weight='balanced')
class Meanvar(Base): """LASSO regression with tuning over feature selection thresholds. We fix a value of `C` that tends to work well across a wide variety of mutation prediction tasks, and then tune over different filter cutoffs for removing expression features with low mean or variance. """ tune_priors = ( ('feat__mean_perc', (50, 65, 75, 85, 90, 99)), ('feat__var_perc', (50, 65, 75, 85, 90, 99)), ) feat_inst = SelectMeanVar() fit_inst = LogisticRegression(penalty='l1', max_iter=200, C=np.exp(1), class_weight='balanced')
class Base(PresencePipe): """Linear regression classifiers with the ridge regularization penalty. Note that the `C` regularization strength parameter should have this same testing value grid in all cases where it is tuned over. This reflects that optimal values of `C` tend to always fall well within this selected range for observed mutation prediction tasks, and also that past a certain point all large values of `C` will result in no regularization. """ tune_priors = (('fit__C', tuple(10**np.linspace(-7.1, 3.4, 36))), ) feat_inst = SelectMeanVar(mean_perc=95, var_perc=95) norm_inst = StandardScaler() fit_inst = LogisticRegression(penalty='l2', class_weight='balanced') def __init__(self): super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst), ('fit', self.fit_inst)])
class Meanvar(Base): """Elastic net regression with tuning over feature selection thresholds. We fix values of `alpha` and `l1_ratio` that tend to work well across a wide variety of mutation prediction tasks, and then tune over different filter cutoffs for removing expression features with low mean or variance. """ tune_priors = ( ('feat__mean_perc', (50, 65, 75, 85, 90, 99)), ('feat__var_perc', (50, 65, 75, 85, 90, 99)), ) feat_inst = SelectMeanVar() fit_inst = SGDClassifier(loss='log', penalty='elasticnet', max_iter=1000, l1_ratio=0.5, alpha=0.01, class_weight='balanced')
class Base(PresencePipe): """Linear regression classifiers with the LASSO regularization penalty. Note that the `C` regularization strength parameter should have this same testing value grid in all cases where it is tuned over. The selected range of values reflects the finding that setting `C` to less than 0.01 doesn't appear to ever work in the context of predicting mutation status in any of the variants of LASSO regression given below, and also that past a certain point all large values of `C` will simply result in no regularization. """ tune_priors = (('fit__C', tuple(10**np.linspace(-4.25, 8, 36))), ) feat_inst = SelectMeanVar(mean_perc=95, var_perc=95) norm_inst = StandardScaler() fit_inst = LogisticRegression(penalty='l1', max_iter=200, class_weight='balanced') def __init__(self): super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst), ('fit', self.fit_inst)])
class Base(PresencePipe): """Support Vector Classifiers with various kernels. The `Base` model corresponds to the simplest linear kernel; other choices of kernels are implemented below. Note that a unique tuning grid for the `C` regularization parameter needs to be specified in each version of this model due to the differences in characteristics associated with each kernel. """ tune_priors = (('fit__C', tuple(10**np.linspace(-6.3, -2.8, 36))), ) feat_inst = SelectMeanVar(mean_perc=95, var_perc=95) norm_inst = StandardScaler() fit_inst = SVC(kernel='linear', probability=True, cache_size=500, class_weight='balanced') def __init__(self): super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst), ('fit', self.fit_inst)])
class Cauchy(Base): feat_inst = SelectMeanVar(mean_perc=200. / 3, var_perc=200. / 3) fit_inst = OptimModel(model_code=cauchy_model)
class Select_few(Base): feat_inst = SelectMeanVar(mean_perc=200. / 3, var_perc=200. / 3)