def main(): print("Model_testing") while True: user = input( "1. ARIMA\n2. Linear Regression\n3. Polynomial Regression\n4. SVR\n5. SVR_2\n6. Random Forest\n7. Quit\n" ) if user == '1': ARIMA.run() elif user == '2': Lin_reg.run() elif user == '3': Poly_reg.run() elif user == '4': SVR.run() elif user == '5': SVR_2.run() elif user == '6': RF.run() elif user == '7': break print("\n----------MAIN-----------\n")
def main(run=1, force_run=False): mkdir(_model_folder) if not force_run and len(os.listdir(_model_folder)) > 0: ans = input( "Found something in '%s', which may be overwitten.\nProceed? [y/n]: " % _model_folder) if ans.lower() == 'n': exit(-1) for k in range(run): samples = preprocessing.tp_sample.get_samples(_sample_folder) if _name_filter is not None: samples = [s for s in samples if s.batch_name in _name_filter] print(np.var([get_label(s) for s in samples])) random.shuffle(samples) batches = preprocessing.batch_data(samples, cross_valid) for i in range(cross_valid): valid_samples = batches[i] train_samples = [] savedir = "%s/%d/" % (_model_folder, i + 1) mkdir(savedir) for j in range(cross_valid): if j != i: train_samples.extend(batches[j]) if _filter_samples: train_samples = preprocessing.score_portion( train_samples, get_label, _high_portion, _low_portion) train_texts = [sample.text for sample in train_samples] valid_texts = [sample.text for sample in valid_samples] train_matrix, valid_matrix, words = preprocessing.preprocess( train_texts, valid_texts, savedir=savedir, **_strategy_parameters) train_labels = np.asarray( [get_label(sample) for sample in train_samples]) valid_labels = np.asarray( [get_label(sample) for sample in valid_samples]) model, valid_mse = None, None if _model_type == "NN": model = Neural_Network(_attributes, _hidden_nodes=hidden_nodes, _learning_rate=learning_rate) valid_mse = model.train(train_matrix, train_labels, valid_matrix, valid_labels, max_iter=15000) else: model = SVR(**_svm_parameters) valid_mse = model.train(train_matrix, train_labels, valid_matrix, valid_labels) model.save(savedir) model.destroy() print("Fold %2d: %.4f" % (i + 1, valid_mse))
def SVR_forecasting(dataset, lookBack, C=2.0, epsilon=0.01, plot_flag=False): # normalize time series scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(dataset) # divide the series into training/testing samples # NOTE: Not RNN format train, test = util.divideTrainTest(dataset) trainX, trainY = util.createSamples(train, lookBack, RNN=False) testX, testY = util.createSamples(test, lookBack, RNN=False) print("trainX shape is", trainX.shape) print("trainY shape is", trainY.shape) print("testX shape is", testX.shape) print("testY shape is", testY.shape) # buil model and train SVRModel = SVR.SVRModel(C=C, epsilon=epsilon) SVRModel.train(trainX, trainY) # forecasting trainPred = SVRModel.predict(trainX).reshape(-1, 1) testPred = SVRModel.predict(testX).reshape(-1, 1) # reverse the time series trainPred = scaler.inverse_transform(trainPred) trainY = scaler.inverse_transform(trainY) testPred = scaler.inverse_transform(testPred) testY = scaler.inverse_transform(testY) # evaluate MAE = eval.calcMAE(testY, testPred) print("test MAE", MAE) MRSE = eval.calcRMSE(testY, testPred) print("test RMSE", MRSE) MAPE = eval.calcMAPE(testY, testPred) print("test MAPE", MAPE) SMAPE = eval.calcSMAPE(testY, testPred) print("test SMAPE", SMAPE) if plot_flag: util.plot(trainPred, trainY, testPred, testY) return trainPred, testPred, MAE, MRSE, SMAPE
def main(run=1, force_run=False): mkdir(_model_folder) if not force_run and len(os.listdir(_model_folder)) > 0: ans = input( "Found something in '%s', which may be overwitten.\nProceed? [y/n]: " % _model_folder) if ans.lower() == 'n': exit(-1) for k in range(run): samples = preprocessing.tp_sample.get_samples(_sample_folder) if _name_filter is not None: samples = [s for s in samples if s.batch_name in _name_filter] print(np.var([get_label(s) for s in samples])) random.shuffle(samples) batches = preprocessing.batch_data(samples, _cross_valid) for i in range(_cross_valid): valid_samples = batches[i] train_samples = [] savedir = "%s/%d/" % (_model_folder, i + 1) mkdir(savedir) for j in range(_cross_valid): if j != i: train_samples.extend(batches[j]) train_matrix = preprocessing.tb_similarity(train_samples, chs=_tb_chs) valid_matrix = preprocessing.tb_similarity(valid_samples, chs=_tb_chs) train_labels = np.asarray( [get_label(sample) for sample in train_samples]) valid_labels = np.asarray( [get_label(sample) for sample in valid_samples]) model = SVR(**_svm_parameters) valid_mse = model.train(train_matrix, train_labels, valid_matrix, valid_labels) model.save(savedir) model.destroy() print("Fold %2d: %.4f" % (i + 1, valid_mse))
def main(run = 1, force_run = False): mkdir(_model_folder) if not force_run and len(os.listdir(_model_folder)) > 0: ans = input("Found something in '%s', which may be overwitten.\nProceed? [y/n]: "%_model_folder) if ans.lower() == 'n': exit(-1) for k in range(run): samples = preprocessing.tp_sample.get_samples(_sample_folder) if _name_filter is not None: samples = [s for s in samples if s.batch_name in _name_filter] print("Variance: %.3f"%np.var([get_label(s) for s in samples])) random.shuffle(samples) batches = preprocessing.batch_data(samples, _cross_valid) for i in range(_cross_valid): valid_samples = batches[i] train_samples = [] savedir = "%s/%d/"%(_model_folder, i+1) mkdir(savedir) for j in range(_cross_valid): if j != i: train_samples.extend(batches[j]) train_texts = [sample.comment for sample in train_samples] valid_texts = [sample.comment for sample in valid_samples] train_matrix, valid_matrix, words = preprocessing.preprocess(train_texts, valid_texts, savedir = savedir, **_strategy_parameters) #print("\tBag of words: %d"%len(words)) train_labels = np.asarray([get_label(sample) for sample in train_samples]) valid_labels = np.asarray([get_label(sample) for sample in valid_samples]) model, valid_mse = None, None model = SVR(**_svm_parameters) valid_mse = model.train(train_matrix, train_labels, valid_matrix, valid_labels) model.save(savedir) model.destroy() print("Fold %2d: %.4f"%(i+1, valid_mse))
print("Train set distribution:", preprocessing.samples_statistics(train_samples, _sections, get_section)) print("Test set distribution:", preprocessing.samples_statistics(test_samples, _sections, get_section)) train_texts = [sample.text for sample in train_samples] test_texts = [sample.text for sample in test_samples] tfidf_vectorizer = get_tfidfVectorizer_of_essay_top_tf_words() print("Vectorizer built..") train_matrix, test_matrix, words = preprocessing.preprocess(train_texts, test_texts, savedir = _save_dir, words_src = tfidf_vectorizer, normalize_flag = False, reduction = _reduction, reduce_n_attr = _reduce_n_attr, stem_words = _stem_words) for section in _sections: train_labels = preprocessing.samples_to_binary(train_samples, [section], get_section) test_labels = preprocessing.samples_to_binary(test_samples, [section], get_section) model = SVR() print("Training for %s section.. "%section) model.train(train_matrix, train_labels) predict = model.predict(test_matrix) accuracy = 0 for i in range(predict.shape[0]): if predict[i] >= 0.5: predict[i] = 1 else: predict[i] = 0 if predict[i] == test_labels[i]: accuracy += 1.0 accuracy /= predict.shape[0] model.save("%s/%s/"%(_save_dir,section)) print("Accuracy: %.3f"%accuracy)
selection="tfidf", select_top=_textbook_words, savedir=_save_dir, words_src="textbook", normalize_flag=False, reduction=_reduction, reduce_n_attr=_reduce_n_attr, stem_words=_stem_words) for section in _sections: train_labels = preprocessing.samples_to_binary(train_samples, [section], get_section) test_labels = preprocessing.samples_to_binary(test_samples, [section], get_section) model = SVR() print("Training for %s section.. " % section) model.train(train_matrix, train_labels) predict = model.predict(test_matrix) accuracy = 0 for i in range(predict.shape[0]): if predict[i] >= 0.5: predict[i] = 1 else: predict[i] = 0 if predict[i] == test_labels[i]: accuracy += 1.0 accuracy /= predict.shape[0] model.save("%s/%s/" % (_save_dir, section)) print("Accuracy: %.3f" % accuracy)