예제 #1
0
    train_data, train_response=prepare_data(data)
    test_data,test_response=prepare_data(data1)
    #print train_data[0,:],test_data[0,:]

    #train_data,test_data= scale_data(train_data.astype(float),test_data.astype(float))
    #model=svm.SVC(probability=True)

    model=RandomForestClassifier(n_estimators=500, criterion="entropy") 
    imp=preprocessing.Imputer(missing_values=0,strategy="median",axis=0)
    imp.fit(train_data)
    train_data=imp.transform(train_data)
    test_data=imp.transform(test_data)
    #Pipeline([("imputer",preprocessing.Imputer(missing_values=0.0,strategy="median",axis=0)),("forest", RandomForestClassifier(n_estimators=500, criterion="entropy"))])
    #model=ExtraTreesClassifier(n_estimators=500,criterion="entropy")
    print '-------find optimze hyperparameter--------'
    model.C=35.33 #hyperparam(train_data,train_response,model)
    print '---- doing cv now ------------'
    cv_score= cv_loop(train_data, train_response, model)
    print 'cv mean AUC score= ',cv_score
    print '------ fitting full model -------------'
    model.fit(train_data, train_response)
    preds=model.predict_proba(test_data)[:,1]
    #print model.accuracy_score(test_scaled, test_response)
    print 'test data AUC score= ', metrics.roc_auc_score(test_response, preds)
    print '------------------------------'
    print 'entries in test data= ',len(preds)
    ct=0
    for pred in preds:
        #print pred
        if pred>=0.5: 
            ct+=1
예제 #2
0
importantidx = [t[1]+1 for t in mylist[:impcount]]
Xtr = data[:tr,importantidx]
Ytr = data[:tr,0]
Xtes = data[tr:,importantidx]
Ytes = data[tr:,0]


#### New SVM with selected Features from 
for k in K:
    kf = KFold(len(Xtr),n_folds=5, shuffle=True)
    Error =[]
    F1score =[]
    for train, test in kf:
        #clf = svm.SVC(kernel='poly', class_weight={1: 4})
        clf = svm.SVC(kernel='poly', degree =1, class_weight={1:4})
        clf.C = k
        clf.fit(Xtr[train],Ytr[train])

        Yhat = clf.predict(Xtr[test])
        print sum(abs(Ytr[test]-Yhat))
        Error.append(1- abs(Ytr[test]-Yhat).mean())
        F1score.append(f1_score(Ytr[test], Yhat, average='weighted'))
    scores.append(np.mean(F1score))
    scores_std.append(np.std(F1score))



"""Plotting section goes here
I will plot scores +, - one standard deviation"""

plt.figure(1, figsize=(4, 3))