def main(): np.set_printoptions(precision=3) random.seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) my_data = dataset.Dataset(FILE_NAME, TRAINING_PROPORTION, NORMALIZE, SVD_DIM, VERBOSE) analyses.compute_feature_correlations(my_data, True) # my_knn = knn.Knn(my_data, MIN_MAX_KNN, DISTANCE_METRIC, VERBOSE) # my_knn.train() # my_knn.test(my_data.test_list, my_data.training_list, my_knn.best_k) # knn_test_accuracy = my_knn.test_accuracy # my_logreg = lr.LogisticRegression(my_data, LEARNING_RATE, NUM_EPOCHS, VERBOSE, OUTPUT_FILE_NAME) # my_logreg.train() # my_logreg.test() # logreg_test_accuracy = my_logreg.test_accuracy if GENERATE_PLOTS: vis.plot_feature_scatter(my_data, WORD_LABELS, F1, F2, PLOT_SVDS) vis.plot_feature_by_category_scatter(my_data, F_INDEX, WORD_LABELS, PLOT_SVDS) vis.plot_hierarchical_cluster(my_data, PLOT_SVDS, SIM) if my_logreg is not None: vis.plot_weight_heat_map(my_logreg) vis.plot_ypredict_yactual_scatter(my_logreg, WORD_LABELS, C_INDEX)
def main(): np.set_printoptions(precision=3) random.seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) my_data = dataset.Dataset(FILE_NAME, TRAINING_PROPORTION, NORMALIZE, SVD_DIM, VERBOSE) # analyses.compute_feature_correlations(my_data, VERBOSE) # # vis.plot_feature_scatter(my_data, WORD_LABELS, F1, F2, PLOT_SVDS) # vis.plot_feature_by_category_scatter(my_data, F_INDEX, WORD_LABELS, PLOT_SVDS) # vis.plot_hierarchical_cluster(my_data, PLOT_SVDS, SIM) # # my_knn = knn.Knn(my_data, MIN_MAX_KNN, DISTANCE_METRIC, VERBOSE) # my_knn.train() # my_knn.test(my_data.test_list, my_data.training_list, my_knn.best_k) my_logreg = lr.LogisticRegression(my_data, LEARNING_RATE, NUM_EPOCHS, VERBOSE, OUTPUT_FILE_NAME) my_logreg.train() my_logreg.test() accuracy_list = [] for i in range(100): my_logreg = lr.LogisticRegression(my_data, LEARNING_RATE, NUM_EPOCHS, VERBOSE, OUTPUT_FILE_NAME) my_logreg.train() my_logreg.test() accuracy_list.append(my_logreg.test_accuracy)
def main(): np.set_printoptions(precision=3) random.seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) my_data = dataset.Dataset(FILE_NAME, TRAINING_PROPORTION, NORMALIZE, SVD_DIM, VERBOSE) analyses.compute_feature_correlations(my_data, VERBOSE) # # vis.plot_feature_scatter(my_data, WORD_LABELS, F1, F2, PLOT_SVDS) vis.plot_feature_by_category_scatter(my_data, F_INDEX, WORD_LABELS, PLOT_SVDS)
# look_ahead_mask=None, # dec_padding_mask=None, # ) # logger.info( # "output weights: {}, attention_weights: {}".format(fn_out.shape) # ) # (batch_size, tar_seq_len, target_vocab_size) # logger.info("END\n\n\n\n") import src.dataset as dt logger.info("Dataset Testing") BUFFER_SIZE = 20000 BATCH_SIZE = 32 dataset = dt.Dataset(filename="./data/test.tsv") dataset.build_train_test(test=0.2) train_examples, test_examples = dataset.format_train_test() tokenizer_source, tokenizer_target = dataset.tokenizer(train_examples) train_dataset = train_examples.map(dataset.tf_encode) test_dataset = test_examples.map(dataset.tf_encode) logger.info("######## Source") sample_string = dt.preprocess_sentence([ i.numpy().decode("UTF-8").split("\t")[0].encode() for i in train_examples ][0]) tokenized_string = tokenizer_source.encode(sample_string)
def main(): my_data = dataset.Dataset(FILE_NAME, TRAINING_PROPORTION, NORMALIZE, SVD_DIM, VERBOSE)