Ejemplo n.º 1
0
class Base(PresencePipe):
    """Linear regression classifiers with elastic net regularization.

    The `Base` model takes a symmetric sample over `l1_ratio` regardless of
    the performance of the classifier at each value of this parameter in order
    for us to gain a better understanding of how mixing the LASSO and ridge
    regularization penalties affects classifier behaviour.

    Models that implement tuning grids that favour better-performing values of
    `l1_ratio` are implemented below.
    """

    tune_priors = (
        ('fit__alpha', tuple(10**np.linspace(-4, -1 / 3, 12))),
        ('fit__l1_ratio', (0.25, 0.5, 0.75)),
    )

    feat_inst = SelectMeanVar(mean_perc=95, var_perc=95)
    norm_inst = StandardScaler()
    fit_inst = SGDClassifier(loss='log',
                             penalty='elasticnet',
                             max_iter=1000,
                             class_weight='balanced')

    def __init__(self):
        super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst),
                          ('fit', self.fit_inst)])
Ejemplo n.º 2
0
class SVCrbf(Base, Kernel):

    feat_inst = SelectMeanVar(mean_perc=90, var_perc=100)

    tune_priors = (
        ('fit__C', tuple(10 ** np.linspace(-3, 4, 8))),
        )
    test_count = 8

    fit_inst = SVC(kernel='rbf', gamma='scale', probability=True,
                   cache_size=500, class_weight='balanced')
Ejemplo n.º 3
0
class Forests(Base, Trees):

    feat_inst = SelectMeanVar(mean_perc=90, var_perc=100)

    tune_priors = (
        ('fit__min_samples_leaf', (1, 2, 3, 4, 6, 8, 10, 15)),
        )
    test_count = 8
 
    fit_inst = RandomForestClassifier(n_estimators=5000,
                                      class_weight='balanced')
Ejemplo n.º 4
0
class Ridge(Base, LinearPipe):

    feat_inst = SelectMeanVar(mean_perc=90, var_perc=100)

    tune_priors = (
        ('fit__C', tuple(10 ** np.linspace(-7, 0, 8))),
        )
    test_count = 8

    fit_inst = LogisticRegression(solver='liblinear', penalty='l2',
                                  max_iter=200, class_weight='balanced')
Ejemplo n.º 5
0
class StanPipe(PresencePipe):

    tune_priors = (('fit__alpha', tuple(10**np.linspace(-3, 0.68, 24))), )

    feat_inst = SelectMeanVar(mean_perc=75)
    norm_inst = StandardScaler()
    fit_inst = OptimModel(model_code=gauss_model)

    def __init__(self):
        super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst),
                          ('fit', self.fit_inst)])
Ejemplo n.º 6
0
class Base(MultiPipe, PresencePipe):

    tune_priors = (
        ('fit__margin', (0.4, 0.6, 0.8, 1.0, 1.2, 1.4)),
        ('fit__sigma_h', (0.04, 0.08, 0.1, 0.12, 0.16, 0.24)),
    )

    feat_inst = SelectMeanVar(mean_perc=95, var_perc=95)
    norm_inst = StandardScaler()
    fit_inst = SingleDomain(latent_features=5, max_iter=500, stop_tol=0.05)

    def __init__(self):
        super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst),
                          ('fit', self.fit_inst)])
Ejemplo n.º 7
0
class Base(PresencePipe):

    tune_priors = (
        ('fit__max_depth', (2, 3, 4, 5)),
        ('fit__min_samples_split', tuple(np.linspace(0.003, 0.051, 9))),
    )

    feat_inst = SelectMeanVar(mean_perc=95, var_perc=95)
    norm_inst = StandardScaler()
    fit_inst = GradientBoostingClassifier()

    def __init__(self):
        super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst),
                          ('fit', self.fit_inst)])
Ejemplo n.º 8
0
class Base(PresencePipe):

    tune_priors = (
        ('fit__alpha', tuple(10**np.linspace(-4, -2.35, 12))),
        ('fit__gamma', (0.5, 1., 2.)),
    )

    feat_inst = SelectMeanVar(mean_perc=95, var_perc=95)
    norm_inst = StandardScaler()
    fit_inst = OptimModel(model_code=base_model)

    def __init__(self):
        super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst),
                          ('fit', self.fit_inst)])
Ejemplo n.º 9
0
class MultiTransfer(TransferPipe, PresencePipe):

    tune_priors = (
        ('fit__margin', (2. / 3, 24. / 23)),
        ('fit__sigma_h', (1. / 11, 1. / 7)),
    )

    feat_inst = SelectMeanVar(mean_perc=80, var_perc=90)
    norm_inst = StandardScaler()
    fit_inst = MultiDomain(latent_features=3, max_iter=50)

    def __init__(self):
        super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst),
                          ('fit', self.fit_inst)])
Ejemplo n.º 10
0
class Base(PresencePipe):

    tune_priors = (
        ('fit__max_features', tuple(10**np.linspace(-3, -2 / 3, 6))),
        ('fit__min_samples_leaf', (1, 2, 3, 4, 6, 8)),
    )

    feat_inst = SelectMeanVar(mean_perc=95, var_perc=95)
    norm_inst = StandardScaler()
    fit_inst = RandomForestClassifier(n_estimators=500,
                                      class_weight='balanced')

    def __init__(self):
        super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst),
                          ('fit', self.fit_inst)])
Ejemplo n.º 11
0
class Base(OmicPipe):
    """An abstract class for the set of standard transformers.

    The transformers in this module are designed to use all available -omic
    features save those with very low expression in order to get the fullest
    possible picture of the features that can be used to cluster a given task.

    """

    feat_inst = SelectMeanVar(mean_perc=90, var_perc=100)
    norm_inst = StandardScaler()

    def __init__(self):
        super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst),
                          ('fit', self.fit_inst)])
Ejemplo n.º 12
0
class Meanvar(Base):
    """Ridge regression with tuning over feature selection thresholds.

    We fix a value of `C` that tends to work well across a wide variety of
    mutation prediction tasks, and then tune over different filter cutoffs for
    removing expression features with low mean or variance.
    """

    tune_priors = (
        ('feat__mean_perc', (100. / 3, 50, 75, 90, 98, 100)),
        ('feat__var_perc', (100. / 3, 50, 75, 90, 98, 100)),
    )

    feat_inst = SelectMeanVar()
    fit_inst = LogisticRegression(C=0.002,
                                  penalty='l2',
                                  class_weight='balanced')
Ejemplo n.º 13
0
class Meanvar(Base):
    """LASSO regression with tuning over feature selection thresholds.

    We fix a value of `C` that tends to work well across a wide variety of
    mutation prediction tasks, and then tune over different filter cutoffs for
    removing expression features with low mean or variance.
    """

    tune_priors = (
        ('feat__mean_perc', (50, 65, 75, 85, 90, 99)),
        ('feat__var_perc', (50, 65, 75, 85, 90, 99)),
    )

    feat_inst = SelectMeanVar()
    fit_inst = LogisticRegression(penalty='l1',
                                  max_iter=200,
                                  C=np.exp(1),
                                  class_weight='balanced')
Ejemplo n.º 14
0
class Base(PresencePipe):
    """Linear regression classifiers with the ridge regularization penalty.

    Note that the `C` regularization strength parameter should have this same
    testing value grid in all cases where it is tuned over. This reflects that
    optimal values of `C` tend to always fall well within this selected range
    for observed mutation prediction tasks, and also that past a certain point
    all large values of `C` will result in no regularization.
    """

    tune_priors = (('fit__C', tuple(10**np.linspace(-7.1, 3.4, 36))), )

    feat_inst = SelectMeanVar(mean_perc=95, var_perc=95)
    norm_inst = StandardScaler()
    fit_inst = LogisticRegression(penalty='l2', class_weight='balanced')

    def __init__(self):
        super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst),
                          ('fit', self.fit_inst)])
Ejemplo n.º 15
0
class Meanvar(Base):
    """Elastic net regression with tuning over feature selection thresholds.

    We fix values of `alpha` and `l1_ratio` that tend to work well across a
    wide variety of mutation prediction tasks, and then tune over different
    filter cutoffs for removing expression features with low mean or variance.
    """

    tune_priors = (
        ('feat__mean_perc', (50, 65, 75, 85, 90, 99)),
        ('feat__var_perc', (50, 65, 75, 85, 90, 99)),
    )

    feat_inst = SelectMeanVar()
    fit_inst = SGDClassifier(loss='log',
                             penalty='elasticnet',
                             max_iter=1000,
                             l1_ratio=0.5,
                             alpha=0.01,
                             class_weight='balanced')
Ejemplo n.º 16
0
class Base(PresencePipe):
    """Linear regression classifiers with the LASSO regularization penalty.

    Note that the `C` regularization strength parameter should have this same
    testing value grid in all cases where it is tuned over. The selected range
    of values reflects the finding that setting `C` to less than 0.01 doesn't
    appear to ever work in the context of predicting mutation status in any of
    the variants of LASSO regression given below, and also that past a certain
    point all large values of `C` will simply result in no regularization.
    """

    tune_priors = (('fit__C', tuple(10**np.linspace(-4.25, 8, 36))), )

    feat_inst = SelectMeanVar(mean_perc=95, var_perc=95)
    norm_inst = StandardScaler()
    fit_inst = LogisticRegression(penalty='l1',
                                  max_iter=200,
                                  class_weight='balanced')

    def __init__(self):
        super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst),
                          ('fit', self.fit_inst)])
Ejemplo n.º 17
0
class Base(PresencePipe):
    """Support Vector Classifiers with various kernels. 

    The `Base` model corresponds to the simplest linear kernel; other choices
    of kernels are implemented below.

    Note that a unique tuning grid for the `C` regularization parameter needs
    to be specified in each version of this model due to the differences in
    characteristics associated with each kernel.
    """

    tune_priors = (('fit__C', tuple(10**np.linspace(-6.3, -2.8, 36))), )

    feat_inst = SelectMeanVar(mean_perc=95, var_perc=95)
    norm_inst = StandardScaler()
    fit_inst = SVC(kernel='linear',
                   probability=True,
                   cache_size=500,
                   class_weight='balanced')

    def __init__(self):
        super().__init__([('feat', self.feat_inst), ('norm', self.norm_inst),
                          ('fit', self.fit_inst)])
Ejemplo n.º 18
0
class Cauchy(Base):

    feat_inst = SelectMeanVar(mean_perc=200. / 3, var_perc=200. / 3)
    fit_inst = OptimModel(model_code=cauchy_model)
Ejemplo n.º 19
0
class Select_few(Base):

    feat_inst = SelectMeanVar(mean_perc=200. / 3, var_perc=200. / 3)