Esempio n. 1
0
def main():
    np.set_printoptions(precision=3)
    random.seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)

    my_data = dataset.Dataset(FILE_NAME, TRAINING_PROPORTION, NORMALIZE,
                              SVD_DIM, VERBOSE)

    analyses.compute_feature_correlations(my_data, True)

    # my_knn = knn.Knn(my_data, MIN_MAX_KNN, DISTANCE_METRIC, VERBOSE)
    # my_knn.train()
    # my_knn.test(my_data.test_list, my_data.training_list, my_knn.best_k)
    # knn_test_accuracy = my_knn.test_accuracy

    # my_logreg = lr.LogisticRegression(my_data, LEARNING_RATE, NUM_EPOCHS, VERBOSE, OUTPUT_FILE_NAME)
    # my_logreg.train()
    # my_logreg.test()
    # logreg_test_accuracy = my_logreg.test_accuracy

    if GENERATE_PLOTS:
        vis.plot_feature_scatter(my_data, WORD_LABELS, F1, F2, PLOT_SVDS)
        vis.plot_feature_by_category_scatter(my_data, F_INDEX, WORD_LABELS,
                                             PLOT_SVDS)
        vis.plot_hierarchical_cluster(my_data, PLOT_SVDS, SIM)

        if my_logreg is not None:
            vis.plot_weight_heat_map(my_logreg)
            vis.plot_ypredict_yactual_scatter(my_logreg, WORD_LABELS, C_INDEX)
Esempio n. 2
0
def main():
    np.set_printoptions(precision=3)
    random.seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)

    my_data = dataset.Dataset(FILE_NAME, TRAINING_PROPORTION, NORMALIZE, SVD_DIM, VERBOSE)

    # analyses.compute_feature_correlations(my_data, VERBOSE)
    #
    # vis.plot_feature_scatter(my_data, WORD_LABELS, F1, F2, PLOT_SVDS)
    # vis.plot_feature_by_category_scatter(my_data, F_INDEX, WORD_LABELS, PLOT_SVDS)
    # vis.plot_hierarchical_cluster(my_data, PLOT_SVDS, SIM)
    #
    # my_knn = knn.Knn(my_data, MIN_MAX_KNN, DISTANCE_METRIC, VERBOSE)
    # my_knn.train()
    # my_knn.test(my_data.test_list, my_data.training_list, my_knn.best_k)

    my_logreg = lr.LogisticRegression(my_data, LEARNING_RATE, NUM_EPOCHS, VERBOSE, OUTPUT_FILE_NAME)
    my_logreg.train()
    my_logreg.test()

    accuracy_list = []
    for i in range(100):
        my_logreg = lr.LogisticRegression(my_data, LEARNING_RATE, NUM_EPOCHS, VERBOSE, OUTPUT_FILE_NAME)
        my_logreg.train()
        my_logreg.test()
        accuracy_list.append(my_logreg.test_accuracy)
Esempio n. 3
0
def main():
    np.set_printoptions(precision=3)
    random.seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)

    my_data = dataset.Dataset(FILE_NAME, TRAINING_PROPORTION, NORMALIZE,
                              SVD_DIM, VERBOSE)

    analyses.compute_feature_correlations(my_data, VERBOSE)
    #
    # vis.plot_feature_scatter(my_data, WORD_LABELS, F1, F2, PLOT_SVDS)
    vis.plot_feature_by_category_scatter(my_data, F_INDEX, WORD_LABELS,
                                         PLOT_SVDS)
Esempio n. 4
0
#     look_ahead_mask=None,
#     dec_padding_mask=None,
# )

# logger.info(
#     "output weights: {}, attention_weights: {}".format(fn_out.shape)
# )  # (batch_size, tar_seq_len, target_vocab_size)
# logger.info("END\n\n\n\n")

import src.dataset as dt

logger.info("Dataset Testing")
BUFFER_SIZE = 20000
BATCH_SIZE = 32

dataset = dt.Dataset(filename="./data/test.tsv")
dataset.build_train_test(test=0.2)
train_examples, test_examples = dataset.format_train_test()
tokenizer_source, tokenizer_target = dataset.tokenizer(train_examples)

train_dataset = train_examples.map(dataset.tf_encode)

test_dataset = test_examples.map(dataset.tf_encode)

logger.info("######## Source")

sample_string = dt.preprocess_sentence([
    i.numpy().decode("UTF-8").split("\t")[0].encode() for i in train_examples
][0])

tokenized_string = tokenizer_source.encode(sample_string)
Esempio n. 5
0
def main():

    my_data = dataset.Dataset(FILE_NAME, TRAINING_PROPORTION, NORMALIZE,
                              SVD_DIM, VERBOSE)