ans2_pros.append(result[i + 1][0])
            i += 2
        if sorted(ans1_pros, reverse=True)[0] > sorted(ans2_pros,
                                                       reverse=True)[0]:
            predicty[key][0] = 1
            predicty[key + 1][0] = 0
        else:
            predicty[key][0] = 0
            predicty[key + 1][0] = 1
        print("update")

    return predicty


# train
model = svm.SVC(gamma=10, probability=True)
svm_train(data.trainset, model, 1470)
y, predicty = svm_test(data.testset, model)
eval1 = Evaluation()
eval1.accuracy(y, predicty, data)
with open('result_svm.txt', 'w') as f:
    for index, maxd in enumerate(eval1.wrong):
        f.write("Case #{}: {} ".format(index + 1, maxd) + '\n')
# predicty=[[0.1], [0.2], [0.1], [0.2], [0.1], [0.2]]
predicty = use_csk(data.testset, predicty, model)
# Evaluation
eval = Evaluation()
eval.accuracy(y, predicty, data)

final = time.time()
print("time:", final - begin)
Esempio n. 2
0
if __name__ == '__main__':
    # datalen = [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000,
    #            2200, 2400, 2600, 2800, 3000]
    # data = Dataset()
    # results = []
    # for i in datalen:
    #     model = svm.SVC(gamma=10, probability=True)
    #     train(data.trainset, model, i)
    #     y, predicty = test(data.testset, model)
    #     eval = Evaluation()
    #     results.append(eval.accuracy(y, predicty, data))
    #
    # plt.xlabel("Train data text amount")
    # plt.ylabel("accuracy")
    # plt.plot(datalen, results)
    # plt.show()

    begin = time.time()
    data = Dataset()
    model = svm.SVC(gamma=10, probability=True)
    train(data.trainset, model, 1470)
    y, predicty = test(data.testset, model)
    eval = Evaluation()
    eval.accuracy(y, predicty, data)
    with open('result_svm.txt', 'w') as f:
        for index, maxd in enumerate(eval.wrong):
            f.write("Case #{}: {} ".format(index + 1, maxd) + '\n')
    # final = time.time()
    # print("time", final - begin)
Esempio n. 3
0
 def run(self):
     results = self.fit_to_validation()
     evaluation = Evaluation(results, verbose=False)
     evaluation.run()
Esempio n. 4
0
def main():
    # Directory Setting
    train_dir = "../data/binary_train.csv"
    test_dir = "../data/binary_test.csv"
    model_dir = "./model_save"

    # HyperParameter
    max_len = 50
    epoch = 2
    batch = 512
    hidden_units = 256

    # Flow
    print("0. Setting Environment")
    set_env()

    print("1. load data")
    train_x, train_y, test_x, test_y, val_x, val_y = load_data(
        train_dir, test_dir)

    print("2. pre processing")
    train_x, val_x, test_x = train_x.tolist(), val_x.tolist(), test_x.tolist()

    train_x = [' '.join(t.split()[0:max_len]) for t in train_x]
    train_x = np.array(train_x, dtype=object)[:, np.newaxis]

    val_x = [' '.join(t.split()[0:max_len]) for t in val_x]
    val_x = np.array(val_x, dtype=object)[:, np.newaxis]

    test_x = [' '.join(t.split()[0:max_len]) for t in test_x]
    test_x = np.array(test_x, dtype=object)[:, np.newaxis]

    tokenizer = create_tokenizer_from_hub_module()

    train_examples = convert_text_to_examples(train_x, train_y)
    val_examples = convert_text_to_examples(val_x, val_y)
    test_examples = convert_text_to_examples(test_x, test_y)

    train_input_ids, train_input_masks, train_segment_ids, train_labels = convert_examples_to_features(
        tokenizer, train_examples, max_len)
    val_input_ids, val_input_masks, val_segment_ids, val_labels = convert_examples_to_features(
        tokenizer, val_examples, max_len)
    test_input_ids, test_input_masks, test_segment_ids, test_labels = convert_examples_to_features(
        tokenizer, test_examples, max_len)

    print("3. build model")
    model = BERT(max_len, data_type="binary")
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    initialize_vars(sess)

    cp_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=model_dir + "/model-weights.{epoch:02d}-{val_acc:.6f}.hdf5",
        monitor='val_acc',
        save_best_only=True,
        verbose=1)
    model.fit(
        [train_input_ids, train_input_masks, train_segment_ids],
        train_labels,
        validation_data=([val_input_ids, val_input_masks,
                          val_segment_ids], val_labels),
        epochs=epoch,
        batch_size=batch,
        callbacks=[cp_callback])

    print("4. evaluation")
    evaluation = Evaluation(
        model, [test_input_ids, test_input_masks, test_segment_ids], test_y)
    accuracy, cf_matrix, report = evaluation.eval_classification_bert(
        data_type="binary")
    print("## Classification Report \n", report)
    print("## Confusion Matrix \n", cf_matrix)
    print("## Accuracy \n", accuracy)