train_data, train_response=prepare_data(data) test_data,test_response=prepare_data(data1) #print train_data[0,:],test_data[0,:] #train_data,test_data= scale_data(train_data.astype(float),test_data.astype(float)) #model=svm.SVC(probability=True) model=RandomForestClassifier(n_estimators=500, criterion="entropy") imp=preprocessing.Imputer(missing_values=0,strategy="median",axis=0) imp.fit(train_data) train_data=imp.transform(train_data) test_data=imp.transform(test_data) #Pipeline([("imputer",preprocessing.Imputer(missing_values=0.0,strategy="median",axis=0)),("forest", RandomForestClassifier(n_estimators=500, criterion="entropy"))]) #model=ExtraTreesClassifier(n_estimators=500,criterion="entropy") print '-------find optimze hyperparameter--------' model.C=35.33 #hyperparam(train_data,train_response,model) print '---- doing cv now ------------' cv_score= cv_loop(train_data, train_response, model) print 'cv mean AUC score= ',cv_score print '------ fitting full model -------------' model.fit(train_data, train_response) preds=model.predict_proba(test_data)[:,1] #print model.accuracy_score(test_scaled, test_response) print 'test data AUC score= ', metrics.roc_auc_score(test_response, preds) print '------------------------------' print 'entries in test data= ',len(preds) ct=0 for pred in preds: #print pred if pred>=0.5: ct+=1
importantidx = [t[1]+1 for t in mylist[:impcount]] Xtr = data[:tr,importantidx] Ytr = data[:tr,0] Xtes = data[tr:,importantidx] Ytes = data[tr:,0] #### New SVM with selected Features from for k in K: kf = KFold(len(Xtr),n_folds=5, shuffle=True) Error =[] F1score =[] for train, test in kf: #clf = svm.SVC(kernel='poly', class_weight={1: 4}) clf = svm.SVC(kernel='poly', degree =1, class_weight={1:4}) clf.C = k clf.fit(Xtr[train],Ytr[train]) Yhat = clf.predict(Xtr[test]) print sum(abs(Ytr[test]-Yhat)) Error.append(1- abs(Ytr[test]-Yhat).mean()) F1score.append(f1_score(Ytr[test], Yhat, average='weighted')) scores.append(np.mean(F1score)) scores_std.append(np.std(F1score)) """Plotting section goes here I will plot scores +, - one standard deviation""" plt.figure(1, figsize=(4, 3))