Esempio n. 1
0
def train_classifier(fmat, labels, method, c_param, nu_param, learn_rate,
                     n_estimators):
    classifiers = {
        'c_svc':
        mlpy.LibSvm(C=c_param),
        'nu_svc_linear':
        mlpy.LibSvm('nu_svc', 'linear', nu=nu_param),
        'nu_svc_sigmoid':
        mlpy.LibSvm('nu_svc', 'sigmoid', nu=nu_param),
        'c_svc_prob':
        mlpy.LibSvm(probability=True, C=c_param),
        'nu_svc_linear_prob':
        mlpy.LibSvm('nu_svc', 'linear', nu=nu_param, probability=True),
        'nu_svc_sigmoid_prob':
        mlpy.LibSvm('nu_svc', 'sigmoid', nu=nu_param, probability=True),
        'lr':
        mlpy.LibLinear(),
        'gboost':
        GradientBoostingClassifier(learning_rate=learn_rate,
                                   n_estimators=n_estimators)
    }
    clas = classifiers[method]
    if hasattr(clas, 'learn'):
        clas.learn(fmat, labels)
    else:
        clas.fit(fmat, labels)
    return clas
Esempio n. 2
0
def logisticRegressionLearn(filename):
    x, yTemp = parseFile(filename, False)
    y = []
    for element in yTemp:
        y.append(element[0])
    logisticReg = mlpy.LibLinear(solver_type='l1r_lr')
    logisticReg.learn(x, y)
    weights = logisticReg.w()
    #write results to a file
    filenameResults = getFilename() + ".txt"
    with open(filenameResults, 'w+') as results:
        results.write("The weights are : " + str(weights) + "\n")
        results.flush()
    return filenameResults
Esempio n. 3
0
def main(xfile, yfile, algorithm=""):
    x = np.loadtxt(open(xfile, "rb"), delimiter=" ")
    y = np.loadtxt(open(yfile, "rb"), delimiter=",")

    x, y = shuffle_in_unison_inplace(x, y)

    tr_size = 6000
    te_size = 4000

    xtrain = x[0:tr_size]
    xtest = x[tr_size:(tr_size + te_size)]

    ytrain = y[0:tr_size]
    ytest = y[tr_size:(tr_size + te_size)]

    algorithms = ['l1r_l2loss_svc']
    for algorithm in algorithms:
        print algorithm
        ftest = open(str(algorithm) + '_Test.csv', 'w')
        ftrain = open(str(algorithm) + '_Train.csv', 'w')
        ftest.write(
            "Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n"
        )
        ftrain.write(
            "Weight beta Accuracy_on_winning_bids Accuracy_on_nonwinning_bids\n"
        )
        for i in range(1, 10):
            for b in range(1, 20):

                beta = 0 + .1 * b
                w = {0: 1, 1: (+i * .5)}
                solver = mlpy.LibLinear(solver_type=algorithm,
                                        C=beta,
                                        eps=0.01,
                                        weight=w)
                solver.learn(xtrain, ytrain)

                yhat = solver.pred(xtrain)
                printStats(ytrain, yhat, algorithm, .0 + i * .2, beta,
                           "train errors", ftrain)

                yhat = solver.pred(xtest)
                printStats(ytest, yhat, algorithm, .0 + i * .2, beta,
                           "test errors", ftest)
        ftest.close()
        ftrain.close()
Esempio n. 4
0
def train_classifier(fmat, labels, method):
    classifiers = {
        'c_svc': mlpy.LibSvm(),
        'nu_svc_linear': mlpy.LibSvm('nu_svc', 'linear'),
        'nu_svc_sigmoid': mlpy.LibSvm('nu_svc', 'sigmoid'),
        'c_svc_prob': mlpy.LibSvm(probability=True),
        'nu_svc_linear_prob': mlpy.LibSvm('nu_svc', 'linear',
                                          probability=True),
        'nu_svc_sigmoid_prob': mlpy.LibSvm('nu_svc', 'sigmoid',
                                           probability=True),
        'lr': mlpy.LibLinear(),
        'gboost': GradientBoostingClassifier()
    }
    clas = classifiers[method]
    if hasattr(clas, 'learn'):
        clas.learn(fmat, labels)
    else:
        clas.fit(fmat, labels)
    return clas
Esempio n. 5
0
	except:
		print "Error in", modelName



directory = raw_input("What directory are the XML files located:\n")
regexParse = raw_input("How would you like to parse the words, leave it blank if you would like to parse by whitespace:\n")
if(regexParse == ""):
	regexParse = None
[vocab,indexToWord,fullDataPoints] = parseDataPoints(directory,regexParse)
[X,Y] = packageData(fullDataPoints,regexParse,vocab, indexToWord)


testModel(mlpy.Perceptron(alpha=0.1, thr=0.05, maxiters=1000), X, Y, "Perceptron")
testModel(mlpy.ElasticNetC(lmb=0.01, eps=0.001),X,Y, "ElasticNet")
testModel(mlpy.LibLinear(solver_type='l2r_l2loss_svc_dual', C=1), X, Y, "LibLinear")
testModel(mlpy.DLDA(delta=0.1), X, Y, "DLDA")
testModel(mlpy.Golub(), X, Y, "Golub")
testModel(mlpy.Parzen(),X,Y,"Parzen")
testModel(mlpy.KNN(2),X,Y,"KNN")
testModel(mlpy.ClassTree(),X,Y,"Classification Tree")
testModel(mlpy.MaximumLikelihoodC(),X,Y,"Maximum Likelihood Classifer")








Esempio n. 6
0
def getYhat(xtrain, ytrain, solver):
    solver = mlpy.LibLinear(solver_type='l2r_lr', C=1, eps=0.01, weight={})
Esempio n. 7
0
class training_model:
    def __init__(self, datasets, model_type):
        self.model_type = model_type
        self.datasets = datasets

    solver_type_s = "l1r_l2loss_svc"  # model details
    C_v = 5
    eps_v = 0.001
    weight_d = {}
    logit1 = mlpy.LibLinear(solver_type=solver_type_s,
                            C=C_v,
                            eps=eps_v,
                            weight=weight_d)
    logit2 = mlpy.LibLinear(solver_type=solver_type_s,
                            C=C_v,
                            eps=eps_v,
                            weight=weight_d)
    logit3 = mlpy.LibLinear(solver_type=solver_type_s,
                            C=C_v,
                            eps=eps_v,
                            weight=weight_d)
    fet_names = []

    def classifier_type(self, solver, C_val, eps_val, weight_dict):
        self.solver_type_s = solver
        self.C_v = C_val
        self.eps_v = eps_val
        self.weight_d = weight_dict

    def make_model(self):
        if self.model_type not in [
                "Combined", "comb", "Comb", "overall", "Overall", "o", "O"
        ]:
            training_fet = np.array([
                select_fet2(i[0], i[1], self.model_type)
                for i in self.datasets.training()
            ])
            testing_fet = np.array([
                select_fet2(i[0], i[1], self.model_type)
                for i in self.datasets.testing()
            ])
            self.logit1.learn(training_fet[:, :-1], training_fet[:, -1])
            test_pred = [self.logit1.pred(i[:-1]) for i in testing_fet]
            test_vals = [i[-1] for i in testing_fet]
            cds_test_pred = [
                self.logit1.pred(i[:-1]) for i in testing_fet if i[-1] == '1'
            ]
            cds_test_vals = [i[-1] for i in testing_fet if i[-1] == '1']
            ncds_test_pred = [
                self.logit1.pred(i[:-1]) for i in testing_fet if i[-1] == '0'
            ]
            ncds_test_vals = [i[-1] for i in testing_fet if i[-1] == '0']
            print "=-" * 5 + "Accuracies" + "=-" * 5
            print "Overall accuracy of predictor : %s" % mlpy.accuracy(
                test_vals, test_pred)
            print "Accuracy of predicting CDS correctly : %s" % mlpy.accuracy(
                cds_test_vals, cds_test_pred)
            print "Accuracy of predicting NCDS correctly : %s" % mlpy.accuracy(
                ncds_test_vals, ncds_test_pred)
        else:
            training_fet_1 = np.array([
                select_fet2(i[0], i[1], "u") for i in self.datasets.training()
            ])
            testing_fet_1 = np.array([
                select_fet2(i[0], i[1], "u") for i in self.datasets.testing()
            ])
            self.logit1.learn(training_fet_1[:, :-1], training_fet_1[:, -1])
            training_fet_2 = np.array([
                select_fet2(i[0], i[1], "c") for i in self.datasets.training()
            ])
            testing_fet_2 = np.array([
                select_fet2(i[0], i[1], "c") for i in self.datasets.testing()
            ])
            self.logit2.learn(training_fet_2[:, :-1], training_fet_2[:, -1])
            k = len(training_fet_1)
            j = len(testing_fet_1)
            comb_training = [[
                self.logit1.pred_probability(training_fet_1[i][:-1])[1],
                self.logit2.pred_probability(training_fet_2[i][:-1])[1],
                training_fet_1[i][-1]
            ] for i in range(k)]  # training file
            comb_training = np.array(comb_training, dtype=float)
            self.logit3.learn(comb_training[:, :-1], comb_training[:, -1])
            comb_testing = [[
                self.logit1.pred_probability(testing_fet_1[i][:-1])[1],
                self.logit2.pred_probability(testing_fet_2[i][:-1])[1],
                testing_fet_1[i][-1]
            ] for i in range(j)]  # testing file
            comb_testing = np.array(comb_testing, dtype=float)
            test_pred = [self.logit3.pred(i[:-1]) for i in comb_testing]
            test_vals = [i[-1] for i in comb_testing]
            cds_test_pred = [
                self.logit3.pred(i[:-1]) for i in comb_testing if i[-1] == 1
            ]
            cds_test_vals = [i[-1] for i in comb_testing if i[-1] == 1]
            ncds_test_pred = [
                self.logit3.pred(i[:-1]) for i in comb_testing if i[-1] == 0
            ]
            ncds_test_vals = [i[-1] for i in comb_testing if i[-1] == 0]
            print "=-" * 5 + "Accuracies" + "=-" * 5
            print "Overall accuracy of predictor : %s" % mlpy.accuracy(
                test_vals, test_pred)
            print "Accuracy of predicting CDS correctly : %s" % mlpy.accuracy(
                cds_test_vals, cds_test_pred)
            print "Accuracy of predicting NCDS correctly : %s" % mlpy.accuracy(
                ncds_test_vals, ncds_test_pred)

    def save(self):
        model_name = raw_input("Model name")
        while True:
            mydir = "%s/%s" % (os.getcwd(), model_name)
            try:
                os.makedirs(mydir)
                break
            except OSError, e:
                if e.errno != 17:
                    raise
                pass
        if self.model_type not in [
                "Combined", "comb", "Comb", "overall", "Overall"
        ]:
            self.logit1.save_model(self.model_type)
        else:
            self.logit1.save_model("%s/1_Pos_utr_model_%s" %
                                   (model_name, self.model_type))
            self.logit2.save_model("%s/2_Comp_model_%s" %
                                   (model_name, self.model_type))
            self.logit3.save_model("%s/3_Overall_model_%s" %
                                   (model_name, self.model_type))
Esempio n. 8
0
            tree = mlpy.ClassTree(minsize=10)
            tree.learn(basic.training_data, basic.training_label)
            classified = 0
            for i in range(len(basic.testing_label)):
                if (int)(basic.testing_label[i]) == (int)(tree.pred(
                        basic.testing_data[i])):
                    classified += 1
            fd.write("%s,%s,%d,%d,%d\n" %
                     (datasets[d][0], "GINI", k, size, classified))

            ml = mlpy.MaximumLikelihoodC()
            ml.learn(basic.training_data, basic.training_label)
            classified = 0
            for i in range(len(basic.testing_label)):
                if (int)(basic.testing_label[i]) == (int)(ml.pred(
                        basic.testing_data[i])):
                    classified += 1
            fd.write("%s,%s,%d,%d,%d\n" %
                     (datasets[d][0], "MACL", k, size, classified))

            svm = mlpy.LibLinear(solver_type='mcsvm_cs', C=0.01)
            svm.learn(basic.training_data, basic.training_label)
            classified = 0
            for i in range(len(basic.testing_label)):
                if (int)(basic.testing_label[i]) == (int)(svm.pred(
                        basic.testing_data[i])):
                    classified += 1
            fd.write("%s,%s,%d,%d,%d\n" %
                     (datasets[d][0], "SVM", k, size, classified))
    fd.close()