예제 #1
0
def main():

    print("Model_testing")

    while True:
        user = input(
            "1. ARIMA\n2. Linear Regression\n3. Polynomial Regression\n4. SVR\n5. SVR_2\n6. Random Forest\n7. Quit\n"
        )

        if user == '1':
            ARIMA.run()
        elif user == '2':
            Lin_reg.run()
        elif user == '3':
            Poly_reg.run()
        elif user == '4':
            SVR.run()
        elif user == '5':
            SVR_2.run()
        elif user == '6':
            RF.run()
        elif user == '7':
            break

        print("\n----------MAIN-----------\n")
예제 #2
0
def main(run=1, force_run=False):
    mkdir(_model_folder)
    if not force_run and len(os.listdir(_model_folder)) > 0:
        ans = input(
            "Found something in '%s', which may be overwitten.\nProceed? [y/n]: "
            % _model_folder)
        if ans.lower() == 'n':
            exit(-1)

    for k in range(run):
        samples = preprocessing.tp_sample.get_samples(_sample_folder)
        if _name_filter is not None:
            samples = [s for s in samples if s.batch_name in _name_filter]
        print(np.var([get_label(s) for s in samples]))
        random.shuffle(samples)
        batches = preprocessing.batch_data(samples, cross_valid)
        for i in range(cross_valid):
            valid_samples = batches[i]
            train_samples = []

            savedir = "%s/%d/" % (_model_folder, i + 1)
            mkdir(savedir)

            for j in range(cross_valid):
                if j != i:
                    train_samples.extend(batches[j])

            if _filter_samples:
                train_samples = preprocessing.score_portion(
                    train_samples, get_label, _high_portion, _low_portion)
            train_texts = [sample.text for sample in train_samples]
            valid_texts = [sample.text for sample in valid_samples]
            train_matrix, valid_matrix, words = preprocessing.preprocess(
                train_texts,
                valid_texts,
                savedir=savedir,
                **_strategy_parameters)
            train_labels = np.asarray(
                [get_label(sample) for sample in train_samples])
            valid_labels = np.asarray(
                [get_label(sample) for sample in valid_samples])
            model, valid_mse = None, None
            if _model_type == "NN":
                model = Neural_Network(_attributes,
                                       _hidden_nodes=hidden_nodes,
                                       _learning_rate=learning_rate)
                valid_mse = model.train(train_matrix,
                                        train_labels,
                                        valid_matrix,
                                        valid_labels,
                                        max_iter=15000)
            else:
                model = SVR(**_svm_parameters)
                valid_mse = model.train(train_matrix, train_labels,
                                        valid_matrix, valid_labels)
            model.save(savedir)
            model.destroy()

            print("Fold %2d: %.4f" % (i + 1, valid_mse))
def SVR_forecasting(dataset, lookBack, C=2.0, epsilon=0.01, plot_flag=False):

    # normalize time series
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)

    # divide the series into training/testing samples
    # NOTE: Not RNN format
    train, test = util.divideTrainTest(dataset)

    trainX, trainY = util.createSamples(train, lookBack, RNN=False)
    testX, testY = util.createSamples(test, lookBack, RNN=False)
    print("trainX shape is", trainX.shape)
    print("trainY shape is", trainY.shape)
    print("testX shape is", testX.shape)
    print("testY shape is", testY.shape)

    # buil model and train
    SVRModel = SVR.SVRModel(C=C, epsilon=epsilon)
    SVRModel.train(trainX, trainY)

    # forecasting
    trainPred = SVRModel.predict(trainX).reshape(-1, 1)
    testPred = SVRModel.predict(testX).reshape(-1, 1)

    # reverse the time series
    trainPred = scaler.inverse_transform(trainPred)
    trainY = scaler.inverse_transform(trainY)
    testPred = scaler.inverse_transform(testPred)
    testY = scaler.inverse_transform(testY)

    # evaluate
    MAE = eval.calcMAE(testY, testPred)
    print("test MAE", MAE)
    MRSE = eval.calcRMSE(testY, testPred)
    print("test RMSE", MRSE)
    MAPE = eval.calcMAPE(testY, testPred)
    print("test MAPE", MAPE)
    SMAPE = eval.calcSMAPE(testY, testPred)
    print("test SMAPE", SMAPE)

    if plot_flag:
        util.plot(trainPred, trainY, testPred, testY)

    return trainPred, testPred, MAE, MRSE, SMAPE
예제 #4
0
def main(run=1, force_run=False):
    mkdir(_model_folder)
    if not force_run and len(os.listdir(_model_folder)) > 0:
        ans = input(
            "Found something in '%s', which may be overwitten.\nProceed? [y/n]: "
            % _model_folder)
        if ans.lower() == 'n':
            exit(-1)

    for k in range(run):
        samples = preprocessing.tp_sample.get_samples(_sample_folder)
        if _name_filter is not None:
            samples = [s for s in samples if s.batch_name in _name_filter]
        print(np.var([get_label(s) for s in samples]))
        random.shuffle(samples)
        batches = preprocessing.batch_data(samples, _cross_valid)
        for i in range(_cross_valid):
            valid_samples = batches[i]
            train_samples = []

            savedir = "%s/%d/" % (_model_folder, i + 1)
            mkdir(savedir)

            for j in range(_cross_valid):
                if j != i:
                    train_samples.extend(batches[j])

            train_matrix = preprocessing.tb_similarity(train_samples,
                                                       chs=_tb_chs)
            valid_matrix = preprocessing.tb_similarity(valid_samples,
                                                       chs=_tb_chs)

            train_labels = np.asarray(
                [get_label(sample) for sample in train_samples])
            valid_labels = np.asarray(
                [get_label(sample) for sample in valid_samples])

            model = SVR(**_svm_parameters)
            valid_mse = model.train(train_matrix, train_labels, valid_matrix,
                                    valid_labels)

            model.save(savedir)
            model.destroy()

            print("Fold %2d: %.4f" % (i + 1, valid_mse))
def main(run = 1, force_run = False):
	mkdir(_model_folder)
	if not force_run and len(os.listdir(_model_folder)) > 0:
		ans = input("Found something in '%s', which may be overwitten.\nProceed? [y/n]: "%_model_folder)
		if ans.lower() == 'n':
			exit(-1)

	for k in range(run):
		samples = preprocessing.tp_sample.get_samples(_sample_folder)
		if _name_filter is not None:
			samples = [s for s in samples if s.batch_name in _name_filter]
		print("Variance: %.3f"%np.var([get_label(s) for s in samples]))
		random.shuffle(samples)
		batches = preprocessing.batch_data(samples, _cross_valid)
		for i in range(_cross_valid):
			valid_samples = batches[i]
			train_samples = []

			savedir = "%s/%d/"%(_model_folder, i+1)
			mkdir(savedir)
			
			for j in range(_cross_valid):
				if j != i:
					train_samples.extend(batches[j])
			
			train_texts = [sample.comment for sample in train_samples]
			valid_texts = [sample.comment for sample in valid_samples]
			train_matrix, valid_matrix, words = preprocessing.preprocess(train_texts, valid_texts, savedir = savedir, **_strategy_parameters)
			
			#print("\tBag of words: %d"%len(words))

			train_labels = np.asarray([get_label(sample) for sample in train_samples])
			valid_labels = np.asarray([get_label(sample) for sample in valid_samples])
			model, valid_mse = None, None
			
			model = SVR(**_svm_parameters)
			valid_mse = model.train(train_matrix, train_labels, valid_matrix, valid_labels)
			model.save(savedir)
			model.destroy()

			print("Fold %2d: %.4f"%(i+1, valid_mse))
print("Train set distribution:", preprocessing.samples_statistics(train_samples, _sections, get_section))
print("Test set distribution:", preprocessing.samples_statistics(test_samples, _sections, get_section))

train_texts = [sample.text for sample in train_samples]
test_texts = [sample.text for sample in test_samples]

tfidf_vectorizer = get_tfidfVectorizer_of_essay_top_tf_words()
print("Vectorizer built..")
train_matrix, test_matrix, words = preprocessing.preprocess(train_texts, test_texts, savedir = _save_dir, words_src = tfidf_vectorizer, normalize_flag = False, reduction = _reduction, reduce_n_attr = _reduce_n_attr,  stem_words = _stem_words)


for section in _sections:
	train_labels = preprocessing.samples_to_binary(train_samples, [section], get_section)
	test_labels = preprocessing.samples_to_binary(test_samples, [section], get_section)

	model = SVR()
	print("Training for %s section.. "%section)

	model.train(train_matrix, train_labels)
	predict = model.predict(test_matrix)
	accuracy = 0
	for i in range(predict.shape[0]):
		if predict[i] >= 0.5:
			predict[i] = 1
		else:
			predict[i] = 0
		if predict[i] == test_labels[i]:
			accuracy += 1.0
	accuracy /= predict.shape[0]
	model.save("%s/%s/"%(_save_dir,section))
	print("Accuracy: %.3f"%accuracy)
    selection="tfidf",
    select_top=_textbook_words,
    savedir=_save_dir,
    words_src="textbook",
    normalize_flag=False,
    reduction=_reduction,
    reduce_n_attr=_reduce_n_attr,
    stem_words=_stem_words)

for section in _sections:
    train_labels = preprocessing.samples_to_binary(train_samples, [section],
                                                   get_section)
    test_labels = preprocessing.samples_to_binary(test_samples, [section],
                                                  get_section)

    model = SVR()
    print("Training for %s section.. " % section)

    model.train(train_matrix, train_labels)
    predict = model.predict(test_matrix)
    accuracy = 0
    for i in range(predict.shape[0]):
        if predict[i] >= 0.5:
            predict[i] = 1
        else:
            predict[i] = 0
        if predict[i] == test_labels[i]:
            accuracy += 1.0
    accuracy /= predict.shape[0]
    model.save("%s/%s/" % (_save_dir, section))
    print("Accuracy: %.3f" % accuracy)