def svm_training(train_percent): train_set_x,train_set_y,predict_set_x,target_y = data_input.getdata("processed.cleveland.data",train_percent) clf = svm.SVC(gamma='scale') clf.fit(train_set_x,np.ravel(train_set_y)) predict_set_y = clf.predict(predict_set_x) dataframe = pd.DataFrame({'id':range(len(predict_set_y)),'predict_set_y':np.ravel(predict_set_y),'target_y':np.ravel(target_y)}) dataframe.to_csv("./svm/"+str(train_percent)+".csv",index=False,sep=',')
def GMM(train_percent): train_set_x, train_set_y, predict_set_x, target_y = data_input.getdata( "processed.cleveland.data", train_percent) train_set_y = ravel(train_set_y) target_y = ravel(target_y) K = len(set(train_set_y)) Y = GaussianMixture(n_components=K, covariance_type='full').fit(train_set_x) Y = Y.predict(predict_set_x) label = [0, 0, 0, 0, 0] for k in range(K): print target_y[Y == k]
def BR(train_percent): from sklearn.linear_model import BayesianRidge train_set_x, train_set_y, predict_set_x, target_y = data_input.getdata( "processed.cleveland.data", train_percent) train_set_y = normalize(train_set_y) target_y = normalize(target_y) clf = BayesianRidge(compute_score=True) clf.fit(train_set_x, np.ravel(train_set_y)) # predict_set_y = np.round(clf.predict(predict_set_x)) predict_set_y0 = clf.predict(predict_set_x) predict_set_y = [] for y in predict_set_y0: predict_set_y.append(int(y)) dataframe = pd.DataFrame({ 'id': range(len(predict_set_y)), 'predict_set_y': np.ravel(predict_set_y), 'target_y': np.ravel(target_y) }) print np.sum(np.abs(np.ravel(np.round(predict_set_y)) - target_y)) dataframe.to_csv("./regression/BR" + str(train_percent) + ".csv", index=False, sep=',')
def LASSO(train_percent): train_set_x, train_set_y, predict_set_x, target_y = data_input.getdata( "processed.cleveland.data", train_percent) alphas = np.linspace(0.5, 1, 100) best_alp = 1 best_score = 10.0 for alp in alphas: LASSO = Lasso(alpha=alp, fit_intercept=False, max_iter=1000) LASSO.fit(train_set_x, train_set_y) score = LASSO.score(predict_set_x, target_y) if (score - 1.0)**2 < (best_score - 1.0)**2: best_alp = alp best_score = score LASSO = Lasso(alpha=best_alp, fit_intercept=False, max_iter=1000) LASSO.fit(train_set_x, train_set_y) predict_set_y = np.round(LASSO.predict(predict_set_x)) dataframe = pd.DataFrame({ 'id': range(len(predict_set_y)), 'predict_set_y': np.ravel(predict_set_y), 'target_y': np.ravel(target_y) }) dataframe.to_csv("./regression/LASSO" + str(train_percent) + ".csv", index=False, sep=',')