Beispiel #1
0
def rec_feature_elim(data,num_features=17700):
    X = data.get_gene_exp_matrix()
    y = data.get_labels()
    svc = SVC(kernel="linear", C=1)
    rfe = RFE(estimator=svc, n_features_to_select=num_features, step=1)
    selector = rfe.fit(X, y)
    mask = map(lambda x: 1 if x is True else 0,selector.support_)
    print_genes_nonzero_coeff(data,mask)
Beispiel #2
0
def kFoldGetSparsity(data,logregAlgo,k=4):
     print "--------------------------------------------"
     X = data.get_gene_exp_matrix()
     Y = data.get_labels()
     kf = cross_validation.KFold(len(X), k=k,shuffle=True)
     sparsity = []
     for train_index, test_index in kf:
         X_train, X_test = [X[i] for i in train_index], [X[i] for i in test_index]
         y_train, y_test = [Y[i] for i in train_index], [Y[i] for i in test_index]
         logregAlgo.fit(X_train,y_train)
         coeffs = logregAlgo.coef_.ravel()
         print print_genes_nonzero_coeff(data,coeffs) 
         sparsity.append(np.mean(coeffs==0)*100)
     return(sparsity)
Beispiel #3
0
def rec_feature_elim_with_KFold(data):
    """Recursive feature elimination 
    FIXME: How to pick a kernel?
    WARNING: ridiculously slow?
    """
    X = data.get_gene_exp_matrix()
    y = data.get_labels()
    # Create the RFE object and compute a cross-validated score.
    svc = SVC(kernel="linear")
    rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(y, 2),loss_func=zero_one)
    selector = rfecv.fit(X, y) 
    mask = map(lambda x: 1 if x is True else 0,selector.support_)
    print_genes_nonzero_coeff(data,mask)
    print "Optimal number of features : %d" % rfecv.n_features_