Beispiel #1
0
    def train(self):
        """
        Trains Minitagger on the given train data. If test data is given, it reports the accuracy of the trained model
        and the F1_score (macro average of f1_score of each label)
        @type data_train: SequenceData
        @param data_train: the training data set
        @type data_test: SequenceData
        @param data_test: the test data set
        """

        # keep the training start timestamp
        start_time = time.time()

        # print some useful information about the data
        if not self.quiet:
            print("{0} labeled words (out of {1})".format(len(self.label_list_train), self.data_train.num_of_words))
            print("{0} label types".format(len(self.data_train.label_count)))
            print("label types: ", self.data_train.label_count)
            print("{0} word types".format(len(self.data_train.word_count)))
            print("\"{0}\" feature template".format(self.feature_extractor.feature_template))
            print("{0} feature types".format(self.feature_extractor.num_feature_types()))
        # define problem to be trained using the parameters received from the feature_extractor
        problem = liblinearutil.problem(self.label_list_train, self.features_list_train)
        # train the model (-q stands for quiet = True in the liblinearutil)
        #        self.__liblinear_model = liblinearutil.train(problem, liblinearutil.parameter(" -q -p " + str(self.epsilon) + " -c " +str(self.cost)))
        self.__liblinear_model = liblinearutil.train(problem, liblinearutil.parameter(" -q"))

        # training is done, set is_training to False, so that prediction can be done
        self.feature_extractor.is_training = False

        # print some useful information
        if not self.quiet:
            num_seconds = int(math.ceil(time.time() - start_time))
            # how much did the training last
            print("Training time: {0}".format(str(datetime.timedelta(seconds=num_seconds))))
            # perform prediction on the data_test and report accuracy
        if self.data_test is not None:
            quiet_value = self.quiet
            self.quiet = True
            pred_labels, acc = self.predict()
            self.quiet = quiet_value

            self.data_test.save_prediction_to_file(pred_labels, self.prediction_path)
            exact_score, inexact_score, conllEval = report_fscore_from_file(self.prediction_path + "/predictions.txt",
                                                                            wikiner=self.wikiner)
            # create some files useful for debugging
            if self.debug:
                self.__debug(self.data_test, pred_labels)
        if not self.quiet:
            self.display_results("Conll", conllEval)
            self.display_results("Exact", exact_score)
            self.display_results("Inexact", inexact_score)

        self.save_results(conllEval, exact_score, inexact_score)

        return exact_score, inexact_score, conllEval
Beispiel #2
0
def model_training(n, i):
    meth = method[i]
    y, x = liblinearutil.svm_read_problem(
        "other_method/kmeans_linear/%s/original_%s" % (name, meth))
    prob = liblinearutil.problem(y, x)
    temp_result = np.empty((13))

    for idx, val in enumerate(cost):
        param = liblinearutil.parameter('-v 5 -q -c %f' % (val))
        temp_result[idx] = liblinearutil.train(prob, param)
    return (i, f_idx, n, temp_result)
Beispiel #3
0
def model_training(x,y,x_test,y_test):
    prob = liblinearutil.problem(y, x)
    start = time.process_time()
    # sklearn.svm.libsvm.cross_validation(x,y)
    # sklearn.svm.libsvm.fit(x,y)
    param = liblinearutil.parameter('-q')
    m = liblinearutil.train(prob, param)
    train_time = time.process_time() - start
    start2 = time.process_time()
    pred_labels, (acc, MSE, SCC), pred_values = liblinearutil.predict(y_test, x_test, m)
    predict_time = time.process_time() - start2
    # print(time.process_time() - start)
    return train_time,predict_time
Beispiel #4
0
def model_training(n, i):
    meth = method[i]
    y, x = liblinearutil.svm_read_problem("other_method/kmeans_linear/%s/train_%s" % (
        name, meth))
    y_test, x_test = liblinearutil.svm_read_problem("other_method/kmeans_linear/%s/test_%s" % (
        name, meth))

    prob = liblinearutil.problem(y, x)
    temp_result = np.empty((14))

    for idx, val in enumerate(cost):
        param = liblinearutil.parameter(' -q -c %f' % (val))
        m = liblinearutil.train(prob, param)
        pred_labels, (temp_result[idx], MSE, SCC), pred_values = liblinearutil.predict(y_test, x_test, m)

    return (i, f_idx, n, temp_result)
Beispiel #5
0
def model_training():
    meth = method[0]
    y, x = liblinearutil.svm_read_problem("other_method/kmeans_linear/%s/train_%s" % (
        name, meth))
    y_test, x_test = liblinearutil.svm_read_problem("other_method/kmeans_linear/%s/test_%s" % (
        name, meth))

    prob = liblinearutil.problem(y, x)
    temp_result = np.zeros((12))
    # print(x.shape(1))

    for idx, val in enumerate(cost):
        start = time.time()
        param = liblinearutil.parameter(' -q -c %f' % (val))
        m = liblinearutil.train(prob, param)
        pred_labels, (temp_result[idx], MSE, SCC), pred_values = liblinearutil.predict(y_test, x_test, m)
    # print(temp_result)
    t2 = time.time()-start
    return np.max(temp_result),t2
Beispiel #6
0
def model_training():
    x, y = load_svmlight_file("svm/BudgetedSVM/original/%s/train" % (name))
    x_test, y_test = load_svmlight_file("svm/BudgetedSVM/original/%s/test" %
                                        (name))
    # x = np.asarray(x)
    # x_test = np.asarray(x_test)
    # scaler = StandardScaler().fit(x)
    #
    # x = scaler.transform(x)
    prob = liblinearutil.problem(y, x)
    temp_result = np.empty((14))

    # x_test = scaler.transform(x_test)
    param = liblinearutil.parameter(' -q ')
    start = time.process_time()
    m = liblinearutil.train(prob, param)
    print(time.process_time() - start)
    start2 = time.process_time()
    pred_labels, (acc, MSE,
                  SCC), pred_values = liblinearutil.predict(y_test, x_test, m)
    print(time.process_time() - start2, acc)
    exit()
Beispiel #7
0
    def train_with_step(self, data_train, data_test):
        print("number of feature:", len(self.feature_extractor._map_feature_str2num.keys()))

        score = Score("finale_score", None)
        param = dict()
        param["epsilon"] = self.epsilon

        for i in np.linspace(10, len(data_train.sequence_pairs), 50):
            self.feature_extractor.reset()
            start_time = time.time()
            self.extract_features(data_train, data_test, num_data=i)

            problem = liblinearutil.problem(self.label_list_train, self.features_list_train)

            self.__liblinear_model = liblinearutil.train(problem, liblinearutil.parameter("-q -p " + str(self.epsilon)))
            pred_labels, acc = self.predict()
            self.data_test.save_prediction_to_file(pred_labels, self.prediction_path)
            exact_score, inexact_score, conllEval = report_fscore_from_file(self.prediction_path + "/predictions.txt",
                                                                            wikiner=self.wikiner, quiet=True)
            score.add_new_iteration(i, time.time() - start_time, conllEval, exact_score, inexact_score, param)
            print("Iteration:", time.time() - start_time, "score: ", conllEval)

            score.save_class_to_file(self.model_path)