def train(self): """ Trains Minitagger on the given train data. If test data is given, it reports the accuracy of the trained model and the F1_score (macro average of f1_score of each label) @type data_train: SequenceData @param data_train: the training data set @type data_test: SequenceData @param data_test: the test data set """ # keep the training start timestamp start_time = time.time() # print some useful information about the data if not self.quiet: print("{0} labeled words (out of {1})".format(len(self.label_list_train), self.data_train.num_of_words)) print("{0} label types".format(len(self.data_train.label_count))) print("label types: ", self.data_train.label_count) print("{0} word types".format(len(self.data_train.word_count))) print("\"{0}\" feature template".format(self.feature_extractor.feature_template)) print("{0} feature types".format(self.feature_extractor.num_feature_types())) # define problem to be trained using the parameters received from the feature_extractor problem = liblinearutil.problem(self.label_list_train, self.features_list_train) # train the model (-q stands for quiet = True in the liblinearutil) # self.__liblinear_model = liblinearutil.train(problem, liblinearutil.parameter(" -q -p " + str(self.epsilon) + " -c " +str(self.cost))) self.__liblinear_model = liblinearutil.train(problem, liblinearutil.parameter(" -q")) # training is done, set is_training to False, so that prediction can be done self.feature_extractor.is_training = False # print some useful information if not self.quiet: num_seconds = int(math.ceil(time.time() - start_time)) # how much did the training last print("Training time: {0}".format(str(datetime.timedelta(seconds=num_seconds)))) # perform prediction on the data_test and report accuracy if self.data_test is not None: quiet_value = self.quiet self.quiet = True pred_labels, acc = self.predict() self.quiet = quiet_value self.data_test.save_prediction_to_file(pred_labels, self.prediction_path) exact_score, inexact_score, conllEval = report_fscore_from_file(self.prediction_path + "/predictions.txt", wikiner=self.wikiner) # create some files useful for debugging if self.debug: self.__debug(self.data_test, pred_labels) if not self.quiet: self.display_results("Conll", conllEval) self.display_results("Exact", exact_score) self.display_results("Inexact", inexact_score) self.save_results(conllEval, exact_score, inexact_score) return exact_score, inexact_score, conllEval
def model_training(n, i): meth = method[i] y, x = liblinearutil.svm_read_problem( "other_method/kmeans_linear/%s/original_%s" % (name, meth)) prob = liblinearutil.problem(y, x) temp_result = np.empty((13)) for idx, val in enumerate(cost): param = liblinearutil.parameter('-v 5 -q -c %f' % (val)) temp_result[idx] = liblinearutil.train(prob, param) return (i, f_idx, n, temp_result)
def model_training(x,y,x_test,y_test): prob = liblinearutil.problem(y, x) start = time.process_time() # sklearn.svm.libsvm.cross_validation(x,y) # sklearn.svm.libsvm.fit(x,y) param = liblinearutil.parameter('-q') m = liblinearutil.train(prob, param) train_time = time.process_time() - start start2 = time.process_time() pred_labels, (acc, MSE, SCC), pred_values = liblinearutil.predict(y_test, x_test, m) predict_time = time.process_time() - start2 # print(time.process_time() - start) return train_time,predict_time
def model_training(n, i): meth = method[i] y, x = liblinearutil.svm_read_problem("other_method/kmeans_linear/%s/train_%s" % ( name, meth)) y_test, x_test = liblinearutil.svm_read_problem("other_method/kmeans_linear/%s/test_%s" % ( name, meth)) prob = liblinearutil.problem(y, x) temp_result = np.empty((14)) for idx, val in enumerate(cost): param = liblinearutil.parameter(' -q -c %f' % (val)) m = liblinearutil.train(prob, param) pred_labels, (temp_result[idx], MSE, SCC), pred_values = liblinearutil.predict(y_test, x_test, m) return (i, f_idx, n, temp_result)
def model_training(): meth = method[0] y, x = liblinearutil.svm_read_problem("other_method/kmeans_linear/%s/train_%s" % ( name, meth)) y_test, x_test = liblinearutil.svm_read_problem("other_method/kmeans_linear/%s/test_%s" % ( name, meth)) prob = liblinearutil.problem(y, x) temp_result = np.zeros((12)) # print(x.shape(1)) for idx, val in enumerate(cost): start = time.time() param = liblinearutil.parameter(' -q -c %f' % (val)) m = liblinearutil.train(prob, param) pred_labels, (temp_result[idx], MSE, SCC), pred_values = liblinearutil.predict(y_test, x_test, m) # print(temp_result) t2 = time.time()-start return np.max(temp_result),t2
def model_training(): x, y = load_svmlight_file("svm/BudgetedSVM/original/%s/train" % (name)) x_test, y_test = load_svmlight_file("svm/BudgetedSVM/original/%s/test" % (name)) # x = np.asarray(x) # x_test = np.asarray(x_test) # scaler = StandardScaler().fit(x) # # x = scaler.transform(x) prob = liblinearutil.problem(y, x) temp_result = np.empty((14)) # x_test = scaler.transform(x_test) param = liblinearutil.parameter(' -q ') start = time.process_time() m = liblinearutil.train(prob, param) print(time.process_time() - start) start2 = time.process_time() pred_labels, (acc, MSE, SCC), pred_values = liblinearutil.predict(y_test, x_test, m) print(time.process_time() - start2, acc) exit()
def train_with_step(self, data_train, data_test): print("number of feature:", len(self.feature_extractor._map_feature_str2num.keys())) score = Score("finale_score", None) param = dict() param["epsilon"] = self.epsilon for i in np.linspace(10, len(data_train.sequence_pairs), 50): self.feature_extractor.reset() start_time = time.time() self.extract_features(data_train, data_test, num_data=i) problem = liblinearutil.problem(self.label_list_train, self.features_list_train) self.__liblinear_model = liblinearutil.train(problem, liblinearutil.parameter("-q -p " + str(self.epsilon))) pred_labels, acc = self.predict() self.data_test.save_prediction_to_file(pred_labels, self.prediction_path) exact_score, inexact_score, conllEval = report_fscore_from_file(self.prediction_path + "/predictions.txt", wikiner=self.wikiner, quiet=True) score.add_new_iteration(i, time.time() - start_time, conllEval, exact_score, inexact_score, param) print("Iteration:", time.time() - start_time, "score: ", conllEval) score.save_class_to_file(self.model_path)