Esempio n. 1
0
def test(args, shared_model, dataset, targets, log):
    start_time = time.time()
    log.info('Test time ' + time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)) + ', ' + 'Start testing.')
    local_model = SVM()
    local_model.load_state_dict(shared_model.state_dict())
    if args.gpu:
        local_model = local_model.cuda()

    correct_cnt = 0
    predictions = np.zeros([targets.shape[0]], dtype=np.int64)

    for idx in range(targets.shape[0]):
        data = dataset[idx]
        data = Variable(torch.from_numpy(data))
        if args.gpu:
            data = data.cuda()

        target = targets[idx]
        output = local_model(data)
        if args.gpu:
            output = output.cpu()
        predict_class = output.max(0)[1].data.numpy()[0]
        predictions[idx] = predict_class
        if target == predict_class:
            correct_cnt += 1
        # else:
        #     print(predict_class)

        # if (idx + 1) % 100 == 0:
        #     log.info('Test time ' + time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)) + ', ' + 'Accuracy: %d / %d\t%0.4f' % (correct_cnt, idx + 1, correct_cnt / (idx + 1)))

    log.info('Overall f1 score = %0.4f' % (f1_score(list(targets), list(predictions), average='weighted')))
    log.info('Overall accuracy = %0.2f%%' % (100 * correct_cnt / targets.shape[0]))
    return correct_cnt / targets.shape[0]
Esempio n. 2
0
def svm():
    # *********************    load the dataset and divide to X&y   ***********************
    from sklearn.datasets import make_blobs
    X, Y = make_blobs(cluster_std=0.9,
                      random_state=20,
                      n_samples=1000,
                      centers=10,
                      n_features=10)

    from Algorithms.ML_.helper.data_helper import split_train_val_test
    X, Xv, y, Yv, Xt, Yt = split_train_val_test(X, Y)
    print(X.shape, y.shape, Xv.shape, Yv.shape, Xt.shape, Yt.shape)

    # *********************   build model    ***********************
    from model import SVM
    from activation import Activation, Softmax, Hinge
    from regularization import Regularization, L1, L2, L12
    from optimizer import Vanilla
    model = SVM()
    learning_rate, reg_rate = 1e-3, 5e-1
    model.compile(alpha=learning_rate,
                  lambda_=reg_rate,
                  activation=Softmax(),
                  reg=L2(),
                  opt=Vanilla())
    model.describe()
    # *********************    train   ***********************
    loss_train, loss_val = model.train(X,
                                       y,
                                       val=(Xv, Yv),
                                       iter_=1000,
                                       return_loss=True,
                                       verbose=True,
                                       eps=1e-3)
    import matplotlib.pyplot as plt
    plt.plot(range(len(loss_train)), loss_train)
    plt.plot(range(len(loss_val)), loss_val)
    plt.legend(['train', 'val'])
    plt.xlabel('Iteration')
    plt.ylabel('Training loss')
    plt.title('Training Loss history')
    plt.show()
    # *********************    predict   ***********************
    pred_train = model.predict(X)
    pred_val = model.predict(Xv)
    pred_test = model.predict(Xt)

    import metrics

    print('train accuracy=', metrics.accuracy(y, pred_train))
    print('val accuracy=', metrics.accuracy(Yv, pred_val))
    print('test accuracy=', metrics.accuracy(Yt, pred_test))
    print('null accuracy=', metrics.null_accuracy(y))
    import metrics
    metrics.print_metrics(Yt, pred_test)
Esempio n. 3
0
def run():
    X_train, Y_train, X_test, Y_test = load_dataset()

    print("SVM model...")
    SVM(X_train, Y_train, X_test, Y_test)
    print("Done SVM")

    print("Random forest model...")
    random_forest(X_train, Y_train, X_test, Y_test)
    print("Done random forest")

    print("Neural network model...")

    C = tf.constant(2, name='C')
    one_hot_matrix_train = tf.one_hot(Y_train, C, axis=0)
    one_hot_matrix_test = tf.one_hot(Y_test, C, axis=0)
    with tf.Session() as sess:
        one_hot_train = sess.run(one_hot_matrix_train)
        one_hot_test = sess.run(one_hot_matrix_test)

    Y_train = one_hot_train
    Y_test = one_hot_test
    X_train = X_train.T
    X_test = X_test.T

    weights = neural_network(X_train,
                             Y_train,
                             X_test,
                             Y_test, [18, 8, 2],
                             print_cost=True)

    return weights
Esempio n. 4
0
def train_new_model(data_obj,
                    with_weights,
                    n_of_models,
                    training_steps,
                    svm=False,
                    with_importance=False):
    LAMBDA = 0.005

    models = []
    accuracies, fscores = np.zeros(n_of_models), np.zeros(n_of_models)

    for k in range(n_of_models):  # Cross-validation
        data_obj.compute_splits(RANDOM_STATE + k)

        if not svm:
            the_model = Model(data_obj,
                              with_weights=with_weights,
                              with_importance=with_importance)
        else:
            the_model = SVM(data_obj)

        print("\nTraining model {:d} ({:s})".format(k, data_obj.country))
        print("===============================")

        the_model.train(training_steps=training_steps, lambda0=LAMBDA)

        models.append(the_model)
        accuracies[k], fscores[k], _, _ = the_model.get_accuracy("test")

    print("Test accuracy (cross-validation): ", np.mean(accuracies),
          np.std(accuracies), accuracies)
    print("Test fscore (cross-validation): ", np.mean(fscores),
          np.std(fscores), fscores)

    # Save models
    fname = MODELS_PATH + "models-" + models[-1].name + ".pkl"
    print("Saving model in " + fname)
    if not svm:
        if with_weights:
            models_data = [(model.train_params, model.d.get_value(),
                            model.w.get_value()) for model in models]
        else:
            models_data = [(model.train_params, model.d.get_value())
                           for model in models]
    else:
        models_data = [model.predictions for model in models]

    with open(fname, 'wb') as f:
        pickle.dump(models_data, f, pickle.HIGHEST_PROTOCOL)

    if not svm:
        # Plot training loss
        err_vec = models[-1].train_params['training_loss']
        plt.semilogy(range(5, training_steps, 5), err_vec[1:])
        plt.show()

    return models
Esempio n. 5
0
def train():   

    train_loader = DataLoader(dataset=dataset,
                                batch_size=config.batch,
                                shuffle=True,
                                collate_fn=collate_fn,
                                num_workers=0)

    model = SVM(config.embedding, config.strmaxlen, dataset.get_vocab_size(), config.output_size)
    optimizer = optim.SGD(model.parameters(), lr=config.lr)
    model.train()
    for epoch in range(config.epoch):

        sum_loss = 0
        for i, (data, labels) in enumerate(train_loader):


            optimizer.zero_grad()
            output = model(data).squeeze()
            weight = model.weight.squeeze()
            weight = weight.reshape((weight.shape[0],1))
            
            loss = model.loss(output, labels)
            tmp = weight.t() @ weight
            loss += config.c * tmp[0][0] / 2.0

            loss.backward()
            optimizer.step()

            sum_loss += float(loss)

        print("Epoch: {:4d}\tloss: {}".format(epoch, sum_loss /len(dataset)))
Esempio n. 6
0
def test_svm(train_data,
             test_data,
             kernel_func=linear_kernel,
             lambda_param=.1):
    """
    Create an SVM classifier with a specificied kernel_func, train it with
    train_data and print the accuracy of model on test_data

    :param train_data: a namedtuple including training inputs and training labels
    :param test_data: a namedtuple including test inputs and test labels
    :param kernel_func: kernel function to use in the SVM
    :return: None
    """
    svm_model = SVM(kernel_func=kernel_func, lambda_param=lambda_param)
    svm_model.train(train_data.inputs, train_data.labels)
    train_accuracy = svm_model.accuracy(train_data.inputs, train_data.labels)
    test_accuracy = svm_model.accuracy(test_data.inputs, test_data.labels)
    if not (train_accuracy is None):
        print('Train accuracy: ', round(train_accuracy * 100, 2), '%')
    if not (test_accuracy is None):
        print('Test accuracy:', round(test_accuracy * 100, 2), '%')
Esempio n. 7
0
	exp = re.compile(POS, re.IGNORECASE)
	
	# load sentiment dictionary
	bag = utils.load_dictionary()

	# load model if exist
	try:
		with open("../Resources/models/model", "rb") as model_file:
			model = pickle.load(model_file)
	except IOError as err:
		# load training reviews from file	
		train_review = utils.load_reviews("../Resources/samples/train_data")
		# get feature from train data
		train_data, train_label = feature_data(tagger, exp, bag, train_review)
		# initalize classifer class
		model = SVM()
		# train model
		model.train(train_data, train_label)
		#save model
		with open("../Resources/models/model", "wb") as model_file:
			pickle.dump(model, model_file)
	else:
		print("use saved model..")
	
	# load test reviews from file
	test_review  = utils.load_reviews("../Resources/samples/test_data")
	# get feature from test data
	test_data, test_label = feature_data(tagger, exp, bag, test_review)
	
	# predict model
	result = model.predict(test_data)
Esempio n. 8
0
mu = X.mean(axis=0)
X, Xv, Xte, Xd = X - mu, Xv - mu, Xte - mu, Xd - mu

# *********************    train   ***********************
# model = SVM()
# model.compile(lambda_=2.5e4, alpha=1e-7)  # 1e-7, reg=2.5e4,
# loss_history = model.train(X, y, eps=0.001, batch=200, iter_=1500)
#
# plt.plot(range(len(loss_history)), loss_history)
# plt.xlabel('Iteration number')
# plt.ylabel('Loss value')
# plt.show()
# print(loss_history[::100])
# lr, rg = SVM.ff(X, y, Xv, Yv, [1e-7, 1e-6],[2e4, 2.5e4, 3e4, 3.5e4, 4e4, 4.5e4, 5e4, 6e4])
# print(lr, rg)
model = SVM()
model.compile(alpha=1e-7, lambda_=2, activation=Softmax, reg=L2)
# model.compile(alpha=0, lambda_=0, activation=Hinge, Reg=L2, dReg=dL2)
history = model.train(Xd, Yd, iter_=0, eps=0.0001)
print(model.loss(model.X, model.y, add_ones=False),
      np.sum(model.grad(model.X, model.y, False)))
L, dW = model.grad(model.X, model.y, True)
print(L, np.sum(dW))
# print(np.sum(model.W))

# print(np.sum(model.grad(model.X, model.y, loss_=False)))
# print(np.sum(model.grad1(model.X, model.y)))
# L, dW = model.activation.loss_grad_loop(model.X, model.W, model.y)
# print(L, np.sum(dW))

loss_history = model.train(X, y, eps=0.0001, batch=200, iter_=1500)
Esempio n. 9
0
    exp = re.compile(POS, re.IGNORECASE)

    # load sentiment dictionary
    bag = utils.load_dictionary()

    # load model if exist
    try:
        with open("../Resources/models/model", "rb") as model_file:
            model = pickle.load(model_file)
    except IOError as err:
        # load training reviews from file
        train_review = utils.load_reviews("../Resources/samples/train_data")
        # get feature from train data
        train_data, train_label = feature_data(tagger, exp, bag, train_review)
        # initalize classifer class
        model = SVM()
        # train model
        model.train(train_data, train_label)
        #save model
        with open("../Resources/models/model", "wb") as model_file:
            pickle.dump(model, model_file)
    else:
        print("use saved model..")

    # load test reviews from file
    test_review = utils.load_reviews("../Resources/samples/test_data")
    # get feature from test data
    test_data, test_label = feature_data(tagger, exp, bag, test_review)

    # predict model
    result = model.predict(test_data)
Esempio n. 10
0
    def MFE(self, X_split, y_split, model):
        if model == 'SVM':
            X_split_scaled = standard_scale(X_split)
            Model = SVM()
            Model.fit(X_split_scaled[0], y_split[0])
            y_hat = Model.predict(X_split_scaled[2])

        elif model == 'RF':
            Model = RF()
            Model.fit(np.concatenate([X_split[0], X_split[1]]),
                      np.concatenate([y_split[0], y_split[1]]))
            y_hat = Model.predict(X_split[2])

        elif model == 'FNN':
            X_split_scaled = standard_scale(X_split)
            Model = FNN(model)
            Model.fit(X_split_scaled[0],
                      y_split[0],
                      validation_data=[X_split_scaled[1], y_split[1]],
                      epochs=self.MAX_EPOCH,
                      batch_size=self.BATCH_SIZE,
                      callbacks=[self.es])
            y_hat = Model.predict_classes(X_split_scaled[2])

        else:
            print('model undefined')

        return self.evaluate(y_split[2], y_hat)
Esempio n. 11
0
dataset_path = "../output/data/dataset_test.npy"
target_path = "../output/data/target_test.npy"

if __name__ == '__main__':
    args = parser.parse_args()
    torch.set_default_tensor_type('torch.DoubleTensor')
    torch.manual_seed(args.seed)
    random.seed(args.seed)

    if not os.path.exists(args.model_dir):
        os.mkdir(args.model_dir)
    if not os.path.exists(args.log_dir):
        os.mkdir(args.log_dir)

    if args.train:
        model = SVM()
        if args.model_load:
            try:
                saved_state = torch.load(
                    os.path.join(args.model_dir, 'best_model.dat'))
                model.load_state_dict(saved_state)
            except:
                print('Cannot load existing model from file!')
        if args.gpu:
            model = model.cuda()

        dataset = torch.from_numpy(np.load("../output/data/dataset_train.npy"))
        targets = torch.from_numpy(
            np.int64(np.load("../output/data/target_train.npy")))
        dataset_test = np.load(dataset_path)
        targets_test = np.int64(np.load(target_path))