def train_classifier(fmat, labels, method, c_param, nu_param, learn_rate, n_estimators): classifiers = { 'c_svc': mlpy.LibSvm(C=c_param), 'nu_svc_linear': mlpy.LibSvm('nu_svc', 'linear', nu=nu_param), 'nu_svc_sigmoid': mlpy.LibSvm('nu_svc', 'sigmoid', nu=nu_param), 'c_svc_prob': mlpy.LibSvm(probability=True, C=c_param), 'nu_svc_linear_prob': mlpy.LibSvm('nu_svc', 'linear', nu=nu_param, probability=True), 'nu_svc_sigmoid_prob': mlpy.LibSvm('nu_svc', 'sigmoid', nu=nu_param, probability=True), 'lr': mlpy.LibLinear(), 'gboost': GradientBoostingClassifier(learning_rate=learn_rate, n_estimators=n_estimators) } clas = classifiers[method] if hasattr(clas, 'learn'): clas.learn(fmat, labels) else: clas.fit(fmat, labels) return clas
def logisticRegressionLearn(filename): x, yTemp = parseFile(filename, False) y = [] for element in yTemp: y.append(element[0]) logisticReg = mlpy.LibLinear(solver_type='l1r_lr') logisticReg.learn(x, y) weights = logisticReg.w() #write results to a file filenameResults = getFilename() + ".txt" with open(filenameResults, 'w+') as results: results.write("The weights are : " + str(weights) + "\n") results.flush() return filenameResults
def main(xfile, yfile, algorithm=""): x = np.loadtxt(open(xfile, "rb"), delimiter=" ") y = np.loadtxt(open(yfile, "rb"), delimiter=",") x, y = shuffle_in_unison_inplace(x, y) tr_size = 6000 te_size = 4000 xtrain = x[0:tr_size] xtest = x[tr_size:(tr_size + te_size)] ytrain = y[0:tr_size] ytest = y[tr_size:(tr_size + te_size)] algorithms = ['l1r_l2loss_svc'] for algorithm in algorithms: print algorithm ftest = open(str(algorithm) + '_Test.csv', 'w') ftrain = open(str(algorithm) + '_Train.csv', 'w') ftest.write( "Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n" ) ftrain.write( "Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n" ) for i in range(1, 10): for b in range(1, 20): beta = 0 + .1 * b w = {0: 1, 1: (+i * .5)} solver = mlpy.LibLinear(solver_type=algorithm, C=beta, eps=0.01, weight=w) solver.learn(xtrain, ytrain) yhat = solver.pred(xtrain) printStats(ytrain, yhat, algorithm, .0 + i * .2, beta, "train errors", ftrain) yhat = solver.pred(xtest) printStats(ytest, yhat, algorithm, .0 + i * .2, beta, "test errors", ftest) ftest.close() ftrain.close()
def train_classifier(fmat, labels, method): classifiers = { 'c_svc': mlpy.LibSvm(), 'nu_svc_linear': mlpy.LibSvm('nu_svc', 'linear'), 'nu_svc_sigmoid': mlpy.LibSvm('nu_svc', 'sigmoid'), 'c_svc_prob': mlpy.LibSvm(probability=True), 'nu_svc_linear_prob': mlpy.LibSvm('nu_svc', 'linear', probability=True), 'nu_svc_sigmoid_prob': mlpy.LibSvm('nu_svc', 'sigmoid', probability=True), 'lr': mlpy.LibLinear(), 'gboost': GradientBoostingClassifier() } clas = classifiers[method] if hasattr(clas, 'learn'): clas.learn(fmat, labels) else: clas.fit(fmat, labels) return clas
except: print "Error in", modelName directory = raw_input("What directory are the XML files located:\n") regexParse = raw_input("How would you like to parse the words, leave it blank if you would like to parse by whitespace:\n") if(regexParse == ""): regexParse = None [vocab,indexToWord,fullDataPoints] = parseDataPoints(directory,regexParse) [X,Y] = packageData(fullDataPoints,regexParse,vocab, indexToWord) testModel(mlpy.Perceptron(alpha=0.1, thr=0.05, maxiters=1000), X, Y, "Perceptron") testModel(mlpy.ElasticNetC(lmb=0.01, eps=0.001),X,Y, "ElasticNet") testModel(mlpy.LibLinear(solver_type='l2r_l2loss_svc_dual', C=1), X, Y, "LibLinear") testModel(mlpy.DLDA(delta=0.1), X, Y, "DLDA") testModel(mlpy.Golub(), X, Y, "Golub") testModel(mlpy.Parzen(),X,Y,"Parzen") testModel(mlpy.KNN(2),X,Y,"KNN") testModel(mlpy.ClassTree(),X,Y,"Classification Tree") testModel(mlpy.MaximumLikelihoodC(),X,Y,"Maximum Likelihood Classifer")
def getYhat(xtrain, ytrain, solver): solver = mlpy.LibLinear(solver_type='l2r_lr', C=1, eps=0.01, weight={})
class training_model: def __init__(self, datasets, model_type): self.model_type = model_type self.datasets = datasets solver_type_s = "l1r_l2loss_svc" # model details C_v = 5 eps_v = 0.001 weight_d = {} logit1 = mlpy.LibLinear(solver_type=solver_type_s, C=C_v, eps=eps_v, weight=weight_d) logit2 = mlpy.LibLinear(solver_type=solver_type_s, C=C_v, eps=eps_v, weight=weight_d) logit3 = mlpy.LibLinear(solver_type=solver_type_s, C=C_v, eps=eps_v, weight=weight_d) fet_names = [] def classifier_type(self, solver, C_val, eps_val, weight_dict): self.solver_type_s = solver self.C_v = C_val self.eps_v = eps_val self.weight_d = weight_dict def make_model(self): if self.model_type not in [ "Combined", "comb", "Comb", "overall", "Overall", "o", "O" ]: training_fet = np.array([ select_fet2(i[0], i[1], self.model_type) for i in self.datasets.training() ]) testing_fet = np.array([ select_fet2(i[0], i[1], self.model_type) for i in self.datasets.testing() ]) self.logit1.learn(training_fet[:, :-1], training_fet[:, -1]) test_pred = [self.logit1.pred(i[:-1]) for i in testing_fet] test_vals = [i[-1] for i in testing_fet] cds_test_pred = [ self.logit1.pred(i[:-1]) for i in testing_fet if i[-1] == '1' ] cds_test_vals = [i[-1] for i in testing_fet if i[-1] == '1'] ncds_test_pred = [ self.logit1.pred(i[:-1]) for i in testing_fet if i[-1] == '0' ] ncds_test_vals = [i[-1] for i in testing_fet if i[-1] == '0'] print "=-" * 5 + "Accuracies" + "=-" * 5 print "Overall accuracy of predictor : %s" % mlpy.accuracy( test_vals, test_pred) print "Accuracy of predicting CDS correctly : %s" % mlpy.accuracy( cds_test_vals, cds_test_pred) print "Accuracy of predicting NCDS correctly : %s" % mlpy.accuracy( ncds_test_vals, ncds_test_pred) else: training_fet_1 = np.array([ select_fet2(i[0], i[1], "u") for i in self.datasets.training() ]) testing_fet_1 = np.array([ select_fet2(i[0], i[1], "u") for i in self.datasets.testing() ]) self.logit1.learn(training_fet_1[:, :-1], training_fet_1[:, -1]) training_fet_2 = np.array([ select_fet2(i[0], i[1], "c") for i in self.datasets.training() ]) testing_fet_2 = np.array([ select_fet2(i[0], i[1], "c") for i in self.datasets.testing() ]) self.logit2.learn(training_fet_2[:, :-1], training_fet_2[:, -1]) k = len(training_fet_1) j = len(testing_fet_1) comb_training = [[ self.logit1.pred_probability(training_fet_1[i][:-1])[1], self.logit2.pred_probability(training_fet_2[i][:-1])[1], training_fet_1[i][-1] ] for i in range(k)] # training file comb_training = np.array(comb_training, dtype=float) self.logit3.learn(comb_training[:, :-1], comb_training[:, -1]) comb_testing = [[ self.logit1.pred_probability(testing_fet_1[i][:-1])[1], self.logit2.pred_probability(testing_fet_2[i][:-1])[1], testing_fet_1[i][-1] ] for i in range(j)] # testing file comb_testing = np.array(comb_testing, dtype=float) test_pred = [self.logit3.pred(i[:-1]) for i in comb_testing] test_vals = [i[-1] for i in comb_testing] cds_test_pred = [ self.logit3.pred(i[:-1]) for i in comb_testing if i[-1] == 1 ] cds_test_vals = [i[-1] for i in comb_testing if i[-1] == 1] ncds_test_pred = [ self.logit3.pred(i[:-1]) for i in comb_testing if i[-1] == 0 ] ncds_test_vals = [i[-1] for i in comb_testing if i[-1] == 0] print "=-" * 5 + "Accuracies" + "=-" * 5 print "Overall accuracy of predictor : %s" % mlpy.accuracy( test_vals, test_pred) print "Accuracy of predicting CDS correctly : %s" % mlpy.accuracy( cds_test_vals, cds_test_pred) print "Accuracy of predicting NCDS correctly : %s" % mlpy.accuracy( ncds_test_vals, ncds_test_pred) def save(self): model_name = raw_input("Model name") while True: mydir = "%s/%s" % (os.getcwd(), model_name) try: os.makedirs(mydir) break except OSError, e: if e.errno != 17: raise pass if self.model_type not in [ "Combined", "comb", "Comb", "overall", "Overall" ]: self.logit1.save_model(self.model_type) else: self.logit1.save_model("%s/1_Pos_utr_model_%s" % (model_name, self.model_type)) self.logit2.save_model("%s/2_Comp_model_%s" % (model_name, self.model_type)) self.logit3.save_model("%s/3_Overall_model_%s" % (model_name, self.model_type))
tree = mlpy.ClassTree(minsize=10) tree.learn(basic.training_data, basic.training_label) classified = 0 for i in range(len(basic.testing_label)): if (int)(basic.testing_label[i]) == (int)(tree.pred( basic.testing_data[i])): classified += 1 fd.write("%s,%s,%d,%d,%d\n" % (datasets[d][0], "GINI", k, size, classified)) ml = mlpy.MaximumLikelihoodC() ml.learn(basic.training_data, basic.training_label) classified = 0 for i in range(len(basic.testing_label)): if (int)(basic.testing_label[i]) == (int)(ml.pred( basic.testing_data[i])): classified += 1 fd.write("%s,%s,%d,%d,%d\n" % (datasets[d][0], "MACL", k, size, classified)) svm = mlpy.LibLinear(solver_type='mcsvm_cs', C=0.01) svm.learn(basic.training_data, basic.training_label) classified = 0 for i in range(len(basic.testing_label)): if (int)(basic.testing_label[i]) == (int)(svm.pred( basic.testing_data[i])): classified += 1 fd.write("%s,%s,%d,%d,%d\n" % (datasets[d][0], "SVM", k, size, classified)) fd.close()