def train(): def getTrainBatch(): labels = [] arr = np.zeros([BATCH_SIZE, MAX_WORDS_IN_REVIEW, EMBEDDING_SIZE]) for i in range(BATCH_SIZE): if (i % 2 == 0): num = randint(0, 12499) labels.append([1, 0]) else: num = randint(12500, 24999) labels.append([0, 1]) arr[i] = training_data_embedded[num, :, :] return arr, labels tf.reset_default_graph() # Call implementation glove_array, glove_dict = load_glove_embeddings() training_data_text = load_data() training_data_embedded = embedd_data(training_data_text, glove_array, glove_dict) input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \ imp.define_graph() # tensorboard tf.summary.scalar("training_accuracy", accuracy) tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() # saver all_saver = tf.train.Saver() sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) logdir = "tensorboard/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") + "/" writer = tf.summary.FileWriter(logdir, sess.graph) for i in range(iterations): batch_data, batch_labels = getTrainBatch() sess.run(optimizer, {input_data: batch_data, labels: batch_labels, dropout_keep_prob: 0.6}) if (i % 50 == 0): loss_value, accuracy_value, summary = sess.run( [loss, accuracy, summary_op], {input_data: batch_data, labels: batch_labels}) writer.add_summary(summary, i) print("Iteration: ", i) print("loss", loss_value) print("acc", accuracy_value) if (i % SAVE_FREQ == 0 and i != 0): if not os.path.exists(checkpoints_dir): os.makedirs(checkpoints_dir) save_path = all_saver.save(sess, checkpoints_dir + "/trained_model.ckpt", global_step=i) print("Saved model to %s" % save_path) sess.close()
num = randint(11499, 13499) if (num <= 12499): labels.append([1, 0]) else: labels.append([0, 1]) arr[i] = training_data[num] return arr, labels # Call implementation glove_array, glove_dict = imp.load_glove_embeddings() training_data = imp.load_data(glove_dict) print('Size of training data: ', len(training_data)) input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = imp.define_graph( glove_array) # tensorboard train_accuracy_op = tf.summary.scalar("accuracy", accuracy) tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() # saver all_saver = tf.train.Saver() sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) str_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_logdir = "tensorboard/" + str_time + "-train/" test_logdir = "tensorboard/" + str_time + "-test/"
arr = np.zeros([batch_size, 40]) for i in range(batch_size): num = randint(10000, 14499) if (num <= 12499): labels.append([1, 0]) else: labels.append([0, 1]) arr[i] = training_data[num - 1:num] return arr, labels # Call implementation glove_array, glove_dict = imp.load_glove_embeddings() training_data = imp.load_data(glove_dict) input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \ imp.define_graph(glove_array) # tensorboard train_accuracy_op = tf.summary.scalar("training_accuracy", accuracy) tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() # saver all_saver = tf.train.Saver() sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) logdir = "tensorboard/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") + "/" writer = tf.summary.FileWriter(logdir, sess.graph)
def train(): def getTrainBatch(): labels = [] arr = np.zeros([BATCH_SIZE, MAX_WORDS_IN_REVIEW, EMBEDDING_SIZE]) for i in range(BATCH_SIZE): if (i % 5 == 0): num = randint(0, 35999) labels.append([1, 0, 0, 0, 0]) elif (i % 5 == 1): num = randint(36000, 71999) labels.append([0, 1, 0, 0, 0]) elif (i % 5 == 2): num = randint(72000, 107999) labels.append([0, 0, 1, 0, 0]) elif (i % 5 == 3): num = randint(108000, 143999) labels.append([0, 0, 0, 1, 0]) elif (i % 5 == 4): num = randint(144000, 179999) labels.append([0, 0, 0, 0, 1]) arr[i] = training_data_embedded[num, :, :] return arr, labels # Call implementation glove_array, glove_dict = load_glove_embeddings() training_data_text = load_data() # pprint(training_data_text) # ========= FOR EVAL ========= data_text = load_data(path=dir_path + "/data/validate") test_data = embedd_data(data_text, glove_array, glove_dict) num_samples = len(test_data) # ========= FOR EVAL ========= training_data_embedded = embedd_data(training_data_text, glove_array, glove_dict) input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \ imp.define_graph() # tensorboard tf.summary.scalar("training_accuracy", accuracy) tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() # saver all_saver = tf.train.Saver() sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) for i in range(iterations): batch_data, batch_labels = getTrainBatch() sess.run(optimizer, { input_data: batch_data, labels: batch_labels, dropout_keep_prob: 0.6 }) if (i % 50 == 0): loss_value, accuracy_value, summary = sess.run( [loss, accuracy, summary_op], { input_data: batch_data, labels: batch_labels }) print("Iteration: ", i) print("loss", loss_value) print("acc", accuracy_value) if (i % SAVE_FREQ == 0 and i != 0): if not os.path.exists(checkpoints_dir): os.makedirs(checkpoints_dir) save_path = all_saver.save(sess, checkpoints_dir + "/trained_model.ckpt", global_step=i) print("Saved model to %s" % save_path) print("Running eval now") num_batches = num_samples // BATCH_SIZE label_list = [[1, 0, 0, 0, 0]] * (num_samples // 5) label_list.extend([[0, 1, 0, 0, 0]] * (num_samples // 5)) label_list.extend([[0, 0, 1, 0, 0]] * (num_samples // 5)) label_list.extend([[0, 0, 0, 1, 0]] * (num_samples // 5)) label_list.extend([[0, 0, 0, 0, 1]] * (num_samples // 5)) assert (len(label_list) == num_samples) total_acc = 0 for i in range(num_batches): sample_index = i * BATCH_SIZE batch = test_data[sample_index:sample_index + BATCH_SIZE] batch_labels = label_list[sample_index:sample_index + BATCH_SIZE] lossV, accuracyV = sess.run([loss, accuracy], { input_data: batch, labels: batch_labels }) total_acc += accuracyV print("Accuracy %s, Loss: %s" % (accuracyV, lossV)) print('-' * 40) print("FINAL ACC:", total_acc / num_batches) #new_sess.close() sess.close()
arr = np.zeros([batch_size, seq_length]) for i in range(batch_size): if (i % 2 == 0): num = randint(0, 12499) labels.append([1, 0]) else: num = randint(12500, 24999) labels.append([0, 1]) arr[i] = training_data[num] return arr, labels # Call implementation glove_array, glove_dict = imp.load_glove_embeddings() training_data = imp.load_data(glove_dict) input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \ imp.define_graph(glove_array) # tensorboard train_accuracy_op = tf.summary.scalar("training_accuracy", accuracy) tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() # saver all_saver = tf.train.Saver() sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) logdir = "tensorboard/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") + "/" writer = tf.summary.FileWriter(logdir, sess.graph)
labels = [] arr = np.zeros([batch_size, seq_length]) for i in range(batch_size): num = randint(0, training_data.shape[0] - 1) label = [0, 0, 0, 0] label[training_classes[num]] = 1 labels.append(label) arr[i] = training_data[num] return arr, labels # Call implementation word2vec_array, word2vec_dict = imp.load_word2vec_embeddings() training_data, training_classes = imp.load_data(word2vec_dict) input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \ imp.define_graph(word2vec_array) # tensorboard train_accuracy_op = tf.summary.scalar("training_accuracy", accuracy) tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() # saver all_saver = tf.train.Saver() sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) logdir = "tensorboard/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") + "/" writer = tf.summary.FileWriter(logdir, sess.graph)
# Call implementation # glove_array, glove_dict = imp.load_glove_embeddings() # training_data = imp.load_data(glove_dict) # np.save('glove_array', glove_array) # np.save('glove_dict', glove_dict) # np.save('training_data',training_data) glove_array = np.load('glove_array.npy') glove_dict = np.load('glove_dict.npy').item() training_data = np.load('training_data.npy') Xtrain, Ytrain, Xtest, Ytest = imp.split_data(training_data, train_test_split) input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \ imp.define_graph(glove_array, batch_size) # tensorboard train_accuracy_op = tf.summary.scalar("training_accuracy", accuracy) tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() # saver all_saver = tf.train.Saver() sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) logdir = "tensorboard/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") + "/" writer = tf.summary.FileWriter(logdir, sess.graph)
def train(): def getTrainBatch(): labels = [] arr = np.zeros([BATCH_SIZE, MAX_WORDS_IN_REVIEW, EMBEDDING_SIZE]) for i in range(BATCH_SIZE): if (i % 2 == 0): num = randint(0, 12499) labels.append([1, 0]) else: num = randint(12500, 24999) labels.append([0, 1]) arr[i] = training_data_embedded[num, :, :] return arr, labels # Call implementation glove_array, glove_dict = load_glove_embeddings() training_data_text = load_zip(dataset='train') training_data_embedded = embedd_data(training_data_text, glove_array, glove_dict) input_data, labels, dropout_keep_prob, optimizer, accuracy, loss, training = \ imp.define_graph() # call the validation data glove_array, glove_dict = load_glove_embeddings() data_text = load_zip(dataset='validate') test_data = embedd_data(data_text, glove_array, glove_dict) num_samples = len(test_data) num_batches = num_samples // BATCH_SIZE label_list = [[1, 0]] * (num_samples // 2 ) # pos always first, neg always second label_list.extend([[0, 1]] * (num_samples // 2)) assert (len(label_list) == num_samples) # tensorboard accuracy_validation = tf.placeholder_with_default( 0.0, shape=(), name="accuracy_validation") loss_validation = tf.placeholder_with_default(0.0, shape=(), name="loss_validation") tf.summary.scalar("dev_acc", accuracy_validation) tf.summary.scalar("dev_loss", loss_validation) summary_op = tf.summary.merge_all() # saver all_saver = tf.train.Saver() sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) logdir_train = "tensorboard/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S-train") + "/" logdir_test = "tensorboard/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S-test") + "/" writer_train = tf.summary.FileWriter(logdir_train, sess.graph) writer_test = tf.summary.FileWriter(logdir_test, sess.graph) for i in range(iterations): batch_data, batch_labels = getTrainBatch() sess.run( optimizer, { input_data: batch_data, labels: batch_labels, dropout_keep_prob: 0.6, training: True }) if (i % 50 == 0): loss_value, accuracy_value = sess.run([loss, accuracy], { input_data: batch_data, labels: batch_labels }) _, _, sum_train = sess.run( [loss_validation, accuracy_validation, summary_op], { loss_validation: loss_value, accuracy_validation: accuracy_value }) writer_train.add_summary(sum_train, i) print("INFO-Iteration: ", i, end=' - ') print("loss: ", loss_value, end=' - ') print("accuracy: ", accuracy_value) if (i % SAVE_FREQ == 0 and i != 0): if not os.path.exists(checkpoints_dir): os.makedirs(checkpoints_dir) save_path = all_saver.save(sess, checkpoints_dir + "/trained_model.ckpt", global_step=i) print("Saved model to %s" % save_path) if i % validatefreq == 0 and i != 0: print( "------------------validation mode activated---------------------" ) total_acc = 0 total_lost = 0 for j in range(num_batches): sample_index = j * BATCH_SIZE batch_dev = test_data[sample_index:sample_index + BATCH_SIZE] batch_labels_dev = label_list[sample_index:sample_index + BATCH_SIZE] lossV, accuracyV = sess.run([loss, accuracy], { input_data: batch_dev, labels: batch_labels_dev }) total_acc += accuracyV total_lost += lossV _, _, validation = sess.run( [accuracy_validation, loss_validation, summary_op], feed_dict={ accuracy_validation: total_acc / num_batches, loss_validation: total_lost / num_batches }) writer_test.add_summary(validation, i) print("Validation INFO-", end='') print("average accuracy: ", total_acc / num_batches, end=' - ') print("average loss: ", total_lost / num_batches) print( "------------------------------end-------------------------------" ) sess.close()
arr = np.zeros([batch_size, seq_length]) for i in range(batch_size): if (i % 2 == 0): num = randint(0, 12499) labels.append([1, 0]) else: num = randint(12500, 24999) labels.append([0, 1]) arr[i] = training_data[num] return arr, labels # Call implementation glove_array, glove_dict = imp.load_glove_embeddings() training_data = imp.load_data(glove_dict) input_data, labels, optimizer, accuracy, loss = imp.define_graph(glove_array) # tensorboard train_accuracy_op = tf.summary.scalar("training_accuracy", accuracy) tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() # saver all_saver = tf.train.Saver() sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) logdir = "tensorboard/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") + "/" writer = tf.summary.FileWriter(logdir, sess.graph)