Ejemplo n.º 1
0
 def _get_meta_features(self, gene_sets, filter_down):
     gs = extract_geneset_pcs(self.df, gene_sets, filter_down)
     self.loadings, self.pct_var, pathways = gs
     if hasattr(self.global_vars, 'background'):
         r = screen_feature(self.global_vars.background, pearson_pandas, 
                            pathways)
         pathways = pathways.ix[r.p > 10e-5]
     pathways = ((pathways.T - pathways.mean(1)) / pathways.std(1)).T
     U, S, pc = frame_svd(pathways)
     
     self.pathways = pathways
     self.features['pathways'] = pathways
     self.global_vars['pathway_pc1'] = pc[0]
     self.global_vars['pathway_pc2'] = pc[1]
     self.global_loadings['pathway_pc1'] = U[0]
     self.global_loadings['pathway_pc2'] = U[1]
Ejemplo n.º 2
0
 def _get_meta_features(self, gene_sets, filter_down):
     gs = extract_geneset_pcs(self.df, gene_sets, filter_down)
     self.loadings, self.pct_var, pathways = gs
     if hasattr(self.global_vars, 'background'):
         r = screen_feature(self.global_vars.background, pearson_pandas,
                            pathways)
         pathways = pathways.ix[r.p > 10e-5]
     pathways = ((pathways.T - pathways.mean(1)) / pathways.std(1)).T
     U, S, pc = frame_svd(pathways)
     
     self.pathways = pathways
     self.features['pathways'] = pathways
     self.global_vars['pathway_pc1'] = pc[0]
     self.global_vars['pathway_pc2'] = pc[1]
     self.global_loadings['pathway_pc1'] = U[0]
     self.global_loadings['pathway_pc2'] = U[1]
Ejemplo n.º 3
0
 def _get_real_features(self):
     binary, singles, real = extract_features(self.df)
     background_df = real.ix[real.index.diff(singles.index)].dropna()
     background = extract_pc(background_df, 0)
     ss = screen_feature(background['pat_vec'], pearson_pandas, singles)
     singles = singles.ix[ss.p > 10e-5]
     
     singles = ((singles.T - singles.mean(1)) / singles.std(1)).T
     U, S, pc = frame_svd(singles)
     
     self.features['binary'] = binary
     self.features['real'] = singles
     self.global_vars['background'] = background['pat_vec']
     self.global_vars['filtered_pc1'] = pc[0]
     self.global_vars['filtered_pc2'] = pc[1]
     self.global_loadings['background'] = background['gene_vec']
     self.global_loadings['filtered_pc1'] = U[0]
     self.global_loadings['filtered_pc2'] = U[1]
Ejemplo n.º 4
0
 def _get_real_features(self):
     binary, singles, real = extract_features(self.df)
     background_df = real.ix[real.index.diff(singles.index)].dropna()
     background = extract_pc(background_df, 0)
     ss = screen_feature(background['pat_vec'], pearson_pandas, singles)
     singles = singles.ix[ss.p > 10e-5]
     
     singles = ((singles.T - singles.mean(1)) / singles.std(1)).T
     U, S, pc = frame_svd(singles)
     
     self.features['binary'] = binary
     self.features['real'] = singles
     self.global_vars['background'] = background['pat_vec']
     self.global_vars['filtered_pc1'] = pc[0]
     self.global_vars['filtered_pc2'] = pc[1]
     self.global_loadings['background'] = background['gene_vec']
     self.global_loadings['filtered_pc1'] = U[0]
     self.global_loadings['filtered_pc2'] = U[1]
Ejemplo n.º 5
0
def SVC_fill_old(feature, df):
    gg = df.apply(lambda s: to_quants(s, std=1) > 0)
    diff = screen_feature(feature, chi2_cont_test, gg)
    dd = diff[diff.p < .05]
    
    pats = gg.columns.intersection(feature.index)
    mat = gg.ix[dd.index]
    X = mat.ix[:, pats].T.as_matrix()
    Y = (feature)
    Y = np.array(Y.ix[pats])
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, Y, test_size=0.35, random_state=5796543)
    
    params = [{'kernel': ['rbf'], 'gamma': [0, .1, .05, .01, 1e-3, 1e-4, 1e-5],
               'C': [.1, 1, 10, 100, 1000], 'class_weight': ['auto']},
              {'kernel': ['linear'], 'C': [1, 10, 100, 1000],
               'class_weight': ['auto']},
              {'kernel': ['poly'], 'C': [1, 10, 100, 1000],
               'class_weight': ['auto']}]
    
    clf = GridSearchCV(SVC(C=1), params, score_func=auc_score)
    clf.fit(X_train, y_train, cv=5);
    best = clf.best_estimator_
    auc = clf.score(X, Y)
    
    mat_all = gg.ix[mat.index].T.as_matrix()
    inferred = best.predict(mat_all)
    inferred = pd.Series(inferred, index=gg.columns)
    fun = pd.Series(best.decision_function(mat_all)[:, 0], mat.columns)
    f = feature.copy()
    f = f.ix[inferred.index]
    f[f.isnull()] = inferred[f.isnull()]
    filled_feature = f.astype(float)
    return {'auc': auc, 'model': best, 'decision_function': fun,
            'inferred_values': inferred, 'filled_feature': filled_feature}