Esempio n. 1
0
def train():
    def getTrainBatch():
        labels = []
        arr = np.zeros([BATCH_SIZE, MAX_WORDS_IN_REVIEW, EMBEDDING_SIZE])
        for i in range(BATCH_SIZE):
            if (i % 2 == 0):
                num = randint(0, 12499)
                labels.append([1, 0])
            else:
                num = randint(12500, 24999)
                labels.append([0, 1])
            arr[i] = training_data_embedded[num, :, :]
        return arr, labels

    tf.reset_default_graph()
    
    # Call implementation
    glove_array, glove_dict = load_glove_embeddings()

    training_data_text = load_data()
    training_data_embedded = embedd_data(training_data_text, glove_array, glove_dict)
    input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \
        imp.define_graph()

    # tensorboard
    tf.summary.scalar("training_accuracy", accuracy)
    tf.summary.scalar("loss", loss)
    summary_op = tf.summary.merge_all()

    # saver
    all_saver = tf.train.Saver()

    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    logdir = "tensorboard/" + datetime.datetime.now().strftime(
        "%Y%m%d-%H%M%S") + "/"
    writer = tf.summary.FileWriter(logdir, sess.graph)

    for i in range(iterations):
        batch_data, batch_labels = getTrainBatch()
        sess.run(optimizer, {input_data: batch_data, labels: batch_labels,
                             dropout_keep_prob: 0.6})
        if (i % 50 == 0):
            loss_value, accuracy_value, summary = sess.run(
                [loss, accuracy, summary_op],
                {input_data: batch_data,
                 labels: batch_labels})
            writer.add_summary(summary, i)
            print("Iteration: ", i)
            print("loss", loss_value)
            print("acc", accuracy_value)
        if (i % SAVE_FREQ == 0 and i != 0):
            if not os.path.exists(checkpoints_dir):
                os.makedirs(checkpoints_dir)
            save_path = all_saver.save(sess, checkpoints_dir +
                                       "/trained_model.ckpt",
                                       global_step=i)
            print("Saved model to %s" % save_path)
    sess.close()
Esempio n. 2
0
        num = randint(11499, 13499)
        if (num <= 12499):
            labels.append([1, 0])
        else:
            labels.append([0, 1])
        arr[i] = training_data[num]
    return arr, labels


# Call implementation
glove_array, glove_dict = imp.load_glove_embeddings()
training_data = imp.load_data(glove_dict)

print('Size of training data: ', len(training_data))

input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = imp.define_graph(
    glove_array)

# tensorboard
train_accuracy_op = tf.summary.scalar("accuracy", accuracy)
tf.summary.scalar("loss", loss)
summary_op = tf.summary.merge_all()

# saver
all_saver = tf.train.Saver()

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

str_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_logdir = "tensorboard/" + str_time + "-train/"
test_logdir = "tensorboard/" + str_time + "-test/"
    arr = np.zeros([batch_size, 40])
    for i in range(batch_size):
        num = randint(10000, 14499)
        if (num <= 12499):
            labels.append([1, 0])
        else:
            labels.append([0, 1])
        arr[i] = training_data[num - 1:num]
    return arr, labels


# Call implementation
glove_array, glove_dict = imp.load_glove_embeddings()
training_data = imp.load_data(glove_dict)
input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \
    imp.define_graph(glove_array)

# tensorboard
train_accuracy_op = tf.summary.scalar("training_accuracy", accuracy)
tf.summary.scalar("loss", loss)
summary_op = tf.summary.merge_all()

# saver
all_saver = tf.train.Saver()

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

logdir = "tensorboard/" + datetime.datetime.now().strftime(
    "%Y%m%d-%H%M%S") + "/"
writer = tf.summary.FileWriter(logdir, sess.graph)
Esempio n. 4
0
def train():
    def getTrainBatch():
        labels = []
        arr = np.zeros([BATCH_SIZE, MAX_WORDS_IN_REVIEW, EMBEDDING_SIZE])
        for i in range(BATCH_SIZE):
            if (i % 5 == 0):
                num = randint(0, 35999)
                labels.append([1, 0, 0, 0, 0])
            elif (i % 5 == 1):
                num = randint(36000, 71999)
                labels.append([0, 1, 0, 0, 0])
            elif (i % 5 == 2):
                num = randint(72000, 107999)
                labels.append([0, 0, 1, 0, 0])
            elif (i % 5 == 3):
                num = randint(108000, 143999)
                labels.append([0, 0, 0, 1, 0])
            elif (i % 5 == 4):
                num = randint(144000, 179999)
                labels.append([0, 0, 0, 0, 1])
            arr[i] = training_data_embedded[num, :, :]
        return arr, labels

    # Call implementation
    glove_array, glove_dict = load_glove_embeddings()

    training_data_text = load_data()
    # pprint(training_data_text)

    # ========= FOR EVAL =========
    data_text = load_data(path=dir_path + "/data/validate")
    test_data = embedd_data(data_text, glove_array, glove_dict)
    num_samples = len(test_data)
    # ========= FOR EVAL =========

    training_data_embedded = embedd_data(training_data_text, glove_array,
                                         glove_dict)
    input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \
        imp.define_graph()

    # tensorboard
    tf.summary.scalar("training_accuracy", accuracy)
    tf.summary.scalar("loss", loss)
    summary_op = tf.summary.merge_all()

    # saver
    all_saver = tf.train.Saver()

    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    for i in range(iterations):
        batch_data, batch_labels = getTrainBatch()
        sess.run(optimizer, {
            input_data: batch_data,
            labels: batch_labels,
            dropout_keep_prob: 0.6
        })
        if (i % 50 == 0):
            loss_value, accuracy_value, summary = sess.run(
                [loss, accuracy, summary_op], {
                    input_data: batch_data,
                    labels: batch_labels
                })
            print("Iteration: ", i)
            print("loss", loss_value)
            print("acc", accuracy_value)

        if (i % SAVE_FREQ == 0 and i != 0):
            if not os.path.exists(checkpoints_dir):
                os.makedirs(checkpoints_dir)
            save_path = all_saver.save(sess,
                                       checkpoints_dir + "/trained_model.ckpt",
                                       global_step=i)
            print("Saved model to %s" % save_path)

            print("Running eval now")

            num_batches = num_samples // BATCH_SIZE
            label_list = [[1, 0, 0, 0, 0]] * (num_samples // 5)
            label_list.extend([[0, 1, 0, 0, 0]] * (num_samples // 5))
            label_list.extend([[0, 0, 1, 0, 0]] * (num_samples // 5))
            label_list.extend([[0, 0, 0, 1, 0]] * (num_samples // 5))
            label_list.extend([[0, 0, 0, 0, 1]] * (num_samples // 5))
            assert (len(label_list) == num_samples)
            total_acc = 0
            for i in range(num_batches):
                sample_index = i * BATCH_SIZE
                batch = test_data[sample_index:sample_index + BATCH_SIZE]
                batch_labels = label_list[sample_index:sample_index +
                                          BATCH_SIZE]
                lossV, accuracyV = sess.run([loss, accuracy], {
                    input_data: batch,
                    labels: batch_labels
                })
                total_acc += accuracyV
                print("Accuracy %s, Loss: %s" % (accuracyV, lossV))
            print('-' * 40)
            print("FINAL ACC:", total_acc / num_batches)
            #new_sess.close()
    sess.close()
Esempio n. 5
0
    arr = np.zeros([batch_size, seq_length])
    for i in range(batch_size):
        if (i % 2 == 0):
            num = randint(0, 12499)
            labels.append([1, 0])
        else:
            num = randint(12500, 24999)
            labels.append([0, 1])
        arr[i] = training_data[num]
    return arr, labels

# Call implementation
glove_array, glove_dict = imp.load_glove_embeddings()
training_data = imp.load_data(glove_dict)
input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \
    imp.define_graph(glove_array)

# tensorboard
train_accuracy_op = tf.summary.scalar("training_accuracy", accuracy)
tf.summary.scalar("loss", loss)
summary_op = tf.summary.merge_all()

# saver
all_saver = tf.train.Saver()

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

logdir = "tensorboard/" + datetime.datetime.now().strftime(
    "%Y%m%d-%H%M%S") + "/"
writer = tf.summary.FileWriter(logdir, sess.graph)
Esempio n. 6
0
    labels = []
    arr = np.zeros([batch_size, seq_length])
    for i in range(batch_size):
        num = randint(0, training_data.shape[0] - 1)
        label = [0, 0, 0, 0]
        label[training_classes[num]] = 1
        labels.append(label)
        arr[i] = training_data[num]
    return arr, labels


# Call implementation
word2vec_array, word2vec_dict = imp.load_word2vec_embeddings()
training_data, training_classes = imp.load_data(word2vec_dict)
input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \
    imp.define_graph(word2vec_array)

# tensorboard
train_accuracy_op = tf.summary.scalar("training_accuracy", accuracy)
tf.summary.scalar("loss", loss)
summary_op = tf.summary.merge_all()

# saver
all_saver = tf.train.Saver()

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

logdir = "tensorboard/" + datetime.datetime.now().strftime(
    "%Y%m%d-%H%M%S") + "/"
writer = tf.summary.FileWriter(logdir, sess.graph)
Esempio n. 7
0
# Call implementation
# glove_array, glove_dict = imp.load_glove_embeddings()
# training_data = imp.load_data(glove_dict)
# np.save('glove_array', glove_array)
# np.save('glove_dict', glove_dict)
# np.save('training_data',training_data)

glove_array = np.load('glove_array.npy')
glove_dict = np.load('glove_dict.npy').item()
training_data = np.load('training_data.npy')

Xtrain, Ytrain, Xtest, Ytest = imp.split_data(training_data, train_test_split)

input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \
    imp.define_graph(glove_array, batch_size)

# tensorboard
train_accuracy_op = tf.summary.scalar("training_accuracy", accuracy)
tf.summary.scalar("loss", loss)
summary_op = tf.summary.merge_all()

# saver
all_saver = tf.train.Saver()

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

logdir = "tensorboard/" + datetime.datetime.now().strftime(
    "%Y%m%d-%H%M%S") + "/"
writer = tf.summary.FileWriter(logdir, sess.graph)
def train():
    def getTrainBatch():
        labels = []
        arr = np.zeros([BATCH_SIZE, MAX_WORDS_IN_REVIEW, EMBEDDING_SIZE])
        for i in range(BATCH_SIZE):
            if (i % 2 == 0):
                num = randint(0, 12499)
                labels.append([1, 0])
            else:
                num = randint(12500, 24999)
                labels.append([0, 1])
            arr[i] = training_data_embedded[num, :, :]
        return arr, labels

    # Call implementation
    glove_array, glove_dict = load_glove_embeddings()
    training_data_text = load_zip(dataset='train')
    training_data_embedded = embedd_data(training_data_text, glove_array,
                                         glove_dict)
    input_data, labels, dropout_keep_prob, optimizer, accuracy, loss, training = \
        imp.define_graph()

    # call the validation data
    glove_array, glove_dict = load_glove_embeddings()
    data_text = load_zip(dataset='validate')
    test_data = embedd_data(data_text, glove_array, glove_dict)

    num_samples = len(test_data)
    num_batches = num_samples // BATCH_SIZE
    label_list = [[1, 0]] * (num_samples // 2
                             )  # pos always first, neg always second
    label_list.extend([[0, 1]] * (num_samples // 2))
    assert (len(label_list) == num_samples)

    # tensorboard
    accuracy_validation = tf.placeholder_with_default(
        0.0, shape=(), name="accuracy_validation")
    loss_validation = tf.placeholder_with_default(0.0,
                                                  shape=(),
                                                  name="loss_validation")
    tf.summary.scalar("dev_acc", accuracy_validation)
    tf.summary.scalar("dev_loss", loss_validation)
    summary_op = tf.summary.merge_all()

    # saver
    all_saver = tf.train.Saver()

    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    logdir_train = "tensorboard/" + datetime.datetime.now().strftime(
        "%Y%m%d-%H%M%S-train") + "/"
    logdir_test = "tensorboard/" + datetime.datetime.now().strftime(
        "%Y%m%d-%H%M%S-test") + "/"

    writer_train = tf.summary.FileWriter(logdir_train, sess.graph)
    writer_test = tf.summary.FileWriter(logdir_test, sess.graph)

    for i in range(iterations):
        batch_data, batch_labels = getTrainBatch()
        sess.run(
            optimizer, {
                input_data: batch_data,
                labels: batch_labels,
                dropout_keep_prob: 0.6,
                training: True
            })

        if (i % 50 == 0):
            loss_value, accuracy_value = sess.run([loss, accuracy], {
                input_data: batch_data,
                labels: batch_labels
            })

            _, _, sum_train = sess.run(
                [loss_validation, accuracy_validation, summary_op], {
                    loss_validation: loss_value,
                    accuracy_validation: accuracy_value
                })

            writer_train.add_summary(sum_train, i)
            print("INFO-Iteration: ", i, end=' - ')
            print("loss: ", loss_value, end=' - ')
            print("accuracy: ", accuracy_value)

        if (i % SAVE_FREQ == 0 and i != 0):
            if not os.path.exists(checkpoints_dir):
                os.makedirs(checkpoints_dir)
            save_path = all_saver.save(sess,
                                       checkpoints_dir + "/trained_model.ckpt",
                                       global_step=i)
            print("Saved model to %s" % save_path)

        if i % validatefreq == 0 and i != 0:
            print(
                "------------------validation mode activated---------------------"
            )
            total_acc = 0
            total_lost = 0
            for j in range(num_batches):
                sample_index = j * BATCH_SIZE
                batch_dev = test_data[sample_index:sample_index + BATCH_SIZE]
                batch_labels_dev = label_list[sample_index:sample_index +
                                              BATCH_SIZE]
                lossV, accuracyV = sess.run([loss, accuracy], {
                    input_data: batch_dev,
                    labels: batch_labels_dev
                })
                total_acc += accuracyV
                total_lost += lossV

            _, _, validation = sess.run(
                [accuracy_validation, loss_validation, summary_op],
                feed_dict={
                    accuracy_validation: total_acc / num_batches,
                    loss_validation: total_lost / num_batches
                })

            writer_test.add_summary(validation, i)
            print("Validation INFO-", end='')
            print("average accuracy: ", total_acc / num_batches, end=' - ')
            print("average loss: ", total_lost / num_batches)
            print(
                "------------------------------end-------------------------------"
            )

    sess.close()
Esempio n. 9
0
    arr = np.zeros([batch_size, seq_length])
    for i in range(batch_size):
        if (i % 2 == 0):
            num = randint(0, 12499)
            labels.append([1, 0])
        else:
            num = randint(12500, 24999)
            labels.append([0, 1])
        arr[i] = training_data[num]
    return arr, labels


# Call implementation
glove_array, glove_dict = imp.load_glove_embeddings()
training_data = imp.load_data(glove_dict)
input_data, labels, optimizer, accuracy, loss = imp.define_graph(glove_array)

# tensorboard
train_accuracy_op = tf.summary.scalar("training_accuracy", accuracy)
tf.summary.scalar("loss", loss)
summary_op = tf.summary.merge_all()

# saver
all_saver = tf.train.Saver()

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

logdir = "tensorboard/" + datetime.datetime.now().strftime(
    "%Y%m%d-%H%M%S") + "/"
writer = tf.summary.FileWriter(logdir, sess.graph)