def runRegressionModelTest(featureSet, valueVector, model): output = '' clf = 0 if model == 1: print "\nLINEAR REGRESSION\n" clf = linear_regression_fit(featureSet, valueVector) elif model == 2: print "\nSVR\n" clf = SVR_fit(featureSet, valueVector) elif model == 4: print "\nSTOCHASTIC\n" clf = SGD_r_fit(featureSet, valueVector) joblib.dump(clf, 'sgd.pkl') elif model == 5: print "\nNEIGHBOURS\n" clf = neighbours_fit(featureSet, valueVector) elif model == 6: print "\nLOGISTIC\n" clf = log_regression_fit(featureSet, valueVector) elif model == 7: print "\nBAYESIANRIDGE\n" clf = bayesian_ridge_fit(featureSet, valueVector) elif model == 8: print "\nRIDGE\n" clf = ridge_fit(featureSet, valueVector) elif model == 9: print "\nELASTIC NET\n" clf = elastic_fit(featureSet, valueVector) elif model == 10: print "\nLASSO\n" clf = lasso_fit(featureSet, valueVector) else: print 'Invalid choice\n' return clf
def runRegressionModelTest(featureSet, valueVector, X_test, y_test, model): output = '' score = 0 clf = 0 if model == 1: output += "\nLINEAR REGRESSION\n" clf = linear_regression_fit(featureSet, valueVector) elif model == 2: output += "\nSVR\n" clf = SVR_fit(featureSet, valueVector) elif model == 3: output += "\nEXTREME LEARNING MACHINE\n" clf = elm.ELMRegressor() clf.fit(featureSet, valueVector) joblib.dump(clf, 'elm.pkl') elif model == 4: output += "\nSTOCHASTIC\n" clf = SGD_r_fit(featureSet, valueVector) joblib.dump(clf, 'sgd.pkl') elif model == 5: output += "\nNEIGHBOURS\n" clf = neighbours_fit(featureSet, valueVector) elif model == 6: output += "\nLOGISTIC\n" clf = log_regression_fit(featureSet, valueVector) elif model == 7: output += "\nBAYESIANRIDGE\n" clf = bayesian_ridge_fit(featureSet, valueVector) else: output += 'Invalid choice\n' score = mean_squared_error(y_test, clf.predict(X_test)) score2 = r2_score(y_test, clf.predict(X_test)) cv = cross_validation.ShuffleSplit(featureSet.shape[0], n_iter=50, test_size=0.25, random_state=0) a = cross_validation.cross_val_score(clf, featureSet, valueVector, cv=cv) a = a[a > 0] output += 'Cross V score: :' + ' '.join("%10.3f" % x for x in a) + '\n' output += ('Mean Score: %.3f\n' % np.mean(a)) output += ('Mean Squared Error: %.3f\n' % score) output += ('R^2: %.3f\n' % score2) return output
def runRegressionModelTest(featureSet, valueVector, X_test, y_test, model): output = '' score = 0 clf = 0 if model == 1: output += "\nLINEAR REGRESSION\n" clf = linear_regression_fit(featureSet, valueVector) elif model == 2: output += "\nSVR\n" clf = SVR_fit(featureSet, valueVector) elif model == 3: output += "\nEXTREME LEARNING MACHINE\n" clf = elm.ELMRegressor() clf.fit(featureSet, valueVector) joblib.dump(clf, 'elm.pkl') elif model == 4: output += "\nSTOCHASTIC\n" clf = SGD_r_fit(featureSet, valueVector) joblib.dump(clf, 'sgd.pkl') elif model == 5: output += "\nNEIGHBOURS\n" clf = neighbours_fit(featureSet, valueVector) elif model == 6: output += "\nLOGISTIC\n" clf = log_regression_fit(featureSet, valueVector) elif model == 7: output += "\nBAYESIANRIDGE\n" clf = bayesian_ridge_fit(featureSet, valueVector) else : output += 'Invalid choice\n' score = mean_squared_error(y_test, clf.predict(X_test)) score2 = r2_score(y_test, clf.predict(X_test)) cv = cross_validation.ShuffleSplit(featureSet.shape[0], n_iter=50,test_size=0.25,random_state=0) a = cross_validation.cross_val_score(clf, featureSet, valueVector, cv=cv) a = a[a > 0] output += 'Cross V score: :' + ' '.join("%10.3f" % x for x in a) + '\n' output += ('Mean Score: %.3f\n' % np.mean(a)) output += ('Mean Squared Error: %.3f\n' % score) output += ('R^2: %.3f\n' % score2) return output
def runClassificationTest(X, y, Xt, yt, model, labs): output = '' clf = 0 if model == 1: output += "\nSVC\n" clf = svc_fit(X, y) elif model == 2: output += '\nLinearSVC\n' clf = linear_svc_fit(X, y) elif model == 3: output += '\nStochasticGradientDescent\n' clf = SGD_c_fit(X, y) elif model == 4: output += '\nKNearestNeighbours\n' clf = nearest_fit(X, y) elif model == 5: output += '\nRandomForest\n' clf = random_forest_fit(X, y) elif model == 6: output += '\nLogistic\n' clf = log_regression_fit(X, y) accuracy = accuracy_score(yt, clf.predict(Xt)) f1 = f1_score(yt, clf.predict(Xt), labels=labs) cv = cross_validation.ShuffleSplit(X.shape[0], n_iter=50,test_size=0.3,random_state=0) a = cross_validation.cross_val_score(clf, X, y, cv=cv) a = a[a > 0] output += 'Cross V score: :' + ' '.join("%10.3f" % x for x in a) + '\n' output += "\n\nAccuracy " + str(accuracy) output += "\nF1 Score " + str(f1) return clf, output;
def runClassificationTest(X, y, Xt, yt, model, labs): output = '' clf = 0 if model == 1: output += "\nSVC\n" clf = svc_fit(X, y) elif model == 2: output += '\nLinearSVC\n' clf = linear_svc_fit(X, y) elif model == 3: output += '\nStochasticGradientDescent\n' clf = SGD_c_fit(X, y) elif model == 4: output += '\nKNearestNeighbours\n' clf = nearest_fit(X, y) elif model == 5: output += '\nRandomForest\n' clf = random_forest_fit(X, y) elif model == 6: output += '\nLogistic\n' clf = log_regression_fit(X, y) accuracy = accuracy_score(yt, clf.predict(Xt)) f1 = f1_score(yt, clf.predict(Xt), labels=labs) cv = cross_validation.ShuffleSplit(X.shape[0], n_iter=50, test_size=0.3, random_state=0) a = cross_validation.cross_val_score(clf, X, y, cv=cv) a = a[a > 0] output += 'Cross V score: :' + ' '.join("%10.3f" % x for x in a) + '\n' output += "\n\nAccuracy " + str(accuracy) output += "\nF1 Score " + str(f1) return clf, output
def train_classifier(X, y, model, featureset, data_source): kernel = 'rbf' parameters = np.zeros([21, 3]) if data_source == 1: parameters[0] = [10000, 0.001, 10000] # Manual parameters[1] = [10, 0.01, 0.1] # Unigram parameters[2] = [10, 0.001, 0.1] parameters[3] = [10, 0.1, 1] parameters[4] = [100000, 0.001, 0.1] # Bigram parameters[5] = [100, 0.1, 10] parameters[6] = [100, 0.001, 0.1] # Trigram parameters[7] = [10, 0.1, 10000] parameters[8] = [1000, 0.001, 1] # Bigram only parameters[9] = [10, 0.1, 100] parameters[10] = [0.01, 0.1, 1] # Trigram only parameters[11] = [0.01, 10, 100] parameters[12] = [10, 0.0001, 0.001] # Character Ngram parameters[13] = [10, 1, 10] parameters[14] = [10, 0.001, 0.01] # Character Skipgram parameters[15] = [10000, 0.0001, 1] parameters[16] = [1000000, 10.0, 0.001] # LDA parameters[17] = [10000, 0.0001, 1] # Word2Vec TFIDF parameters[18] = [10000000, 0.0001, 10000] # Word2Vec BOC parameters[19] = [1, 0.01, 10] # Doc2Vec parameters[20] = [10, 0.001, 0.01] # Google Word2vec TFIDF elif data_source == 2: # TOY parameters[0] = [0.1, 0.1, 10] # Manual parameters[1] = [100, 0.01, 1] # Unigram parameters[2] = [100, 0.01, 1] parameters[3] = [10, 1, 10] parameters[4] = [10, 0.01, 0.1] # Bigram parameters[5] = [100, 0.1, 1000] parameters[6] = [10, 0.01, 10] # Trigram parameters[7] = [10, 0.1, 10000] parameters[8] = [1000, 0.1, 1] # Bigram only parameters[9] = [10000, 0.1, 10000] parameters[10] = [10, 0.1, 1] # Trigram only parameters[11] = [10, 10, 1000] parameters[12] = [10, 0.001, 0.1] # Character Ngram parameters[13] = [100, 0.1, 100] parameters[14] = [100, 0.001, 0.1] # Character Skipgram parameters[15] = [10, 1, 10] parameters[16] = [10000000, 1, 1] # LDA parameters[17] = [10, 0.01, 0.01] # Word2Vec TFIDF parameters[18] = [1000, 0.0001, 0.1] # Word2Vec BOC parameters[19] = [100, 0.0001, 0.01] # Doc2Vec parameters[20] = [10, 0.001, 1] # Google Word2vec TFIDF elif data_source == 3: # SLASHDOT parameters[0] = [1000000, 0.0001, 1000] # Manual parameters[1] = [1000, 0.1, 1000] # Unigram parameters[2] = [1000, 0.1, 1] parameters[3] = [1000, 1, 100] parameters[4] = [100, 0.001, 1000] # Bigram parameters[5] = [1, 1, 10000] parameters[6] = [100, 0.001, 10000] # Trigram parameters[7] = [1, 0.1, 10000] parameters[8] = [1000, 0.001, 1] # Bigram only parameters[9] = [0.1, 1, 100] parameters[10] = [10, 0.1, 1] # Trigram only parameters[11] = [100, 1, 1000] parameters[12] = [1000, 0.001, 0.01] # Character Ngram parameters[13] = [10, 1, 100] parameters[14] = [10, 0.001, 0.1] # Character Skipgram parameters[15] = [10, 1, 10] parameters[16] = [1000000, 0.01, 10] # LDA parameters[17] = [10, 0.01, 100] # Word2Vec TFIDF parameters[18] = [1, 0.001, 0.001] # Word2Vec BOC parameters[19] = [1000000, 0.0001, 1000] # Doc2Vec parameters[20] = [10000000, 0.0001, 10000] # Google Word2vec TFIDF C = parameters[featureset][0] gamma = parameters[featureset][1] Lc = parameters[featureset][2] if model == 1: print "\nSVC\n" clf = svc_fit(X, y, kernel=kernel, C=C, gamma=gamma) elif model == 2: print '\nLinearSVC\n' clf = linear_svc_fit(X, y, C=Lc) elif model == 3: print '\nStochasticGradientDescent\n' clf = SGD_c_fit(X, y) elif model == 4: print '\nKNearestNeighbours\n' clf = nearest_fit(X, y) elif model == 5: print '\nRandomForest\n' clf = random_forest_fit(X, y) elif model == 6: print '\nLogistic\n' clf = log_regression_fit(X, y) return clf