Ejemplo n.º 1
0
def batch2input(batch_data, num_items):
    """
    :param batch_data: of form [(sentence, label)]
    :return: 3D matrix
    """
    batch_labels = [num2onehot(y, NUM_CLASSES) for _, y in batch_data]
    lens = [len(x) for x, _ in batch_data]
    batch_data = [x for x, _ in batch_data]
    inputs_matrix = np.zeros((num_items, MAX_SENT_LEN, WORD_DIMENSIONS),
                             dtype=np.float32)
    for i, sentence in enumerate(batch_data):
        pos = 0
        for word in sentence:
            if word in VOCAB:
                vec = WORD2VEC[word]
                inputs_matrix[i, pos, :] = vec
            pos += 1
    return inputs_matrix, lens, batch_labels
Ejemplo n.º 2
0
def get_data():

    print("Loading Data...")
    t = time.time()

    data = useful_functions.load_cspubsumext()
    sents = []
    labs = []
    for item in data:
        sentences = item["sentences"]
        for sent, sec, y in sentences:
            sents.append(sent)
            labs.append(num2onehot(y, NUM_CLASSES))

    print("Done, took ", time.time() - t, " seconds")

    data = {"sentences": sents, "labels": labs}

    return data
Ejemplo n.º 3
0
def batch2input(batch_data, num_items):
    """
    :param batch_data: of form [(sentence, abstract_vector, feature_vector, label)]
    :return: 3D matrix of embedded sentences, sentence lengths, labels, abstracts, features
    """
    batch_labels = [num2onehot(y, NUM_CLASSES) for _, _, _, y in batch_data]
    lens = [len(x) for x, _, _, _ in batch_data]
    batch_sentences = [x for x, _, _, _ in batch_data]
    batch_abstracts = [x for _, x, _, _ in batch_data]
    batch_features = [x for _, _, x, _ in batch_data]
    inputs_matrix = np.zeros((num_items, MAX_SENT_LEN, WORD_DIMENSIONS), dtype=np.float32)
    for i, sentence in enumerate(batch_sentences):
        pos = 0
        for word in sentence:
            if word in VOCAB:
                vec = WORD2VEC[word]
                inputs_matrix[i, pos, :] = vec
            pos += 1
    return inputs_matrix, lens, batch_labels, batch_abstracts, batch_features
Ejemplo n.º 4
0
            for epoch in range(MAX_EPOCHS):

                if breakout:
                    break

                for batch in range(num_batches):

                    print("Running Batch: ", batch, " / ", num_batches, end="\r")

                    # Sample a random batch of data
                    batch_data = random.sample(train_data, BATCH_SIZE)

                    # Extract the data into three numpy arrays
                    batch_sentences = np.asarray([x for x, _ in batch_data])
                    batch_labels = np.asarray([num2onehot(x, NUM_CLASSES) for _, x in batch_data])

                    # Create the feed_dict
                    feed_dict = {
                        sentence_input: batch_sentences,
                        labels: batch_labels,
                        keep_prob: 0.5
                    }

                    # Runs optimisation
                    sess.run(opt, feed_dict=feed_dict)

                    if batch % DISPLAY_EVERY == 0:

                        # Get the batch of test data
                        batch_data = test_data
        for epoch in range(MAX_EPOCHS):

            if breakout:
                break

            for batch in range(num_batches):

                print("Running Batch: ", batch, " / ", num_batches, end="\r")

                # Sample a random batch of data
                batch_data = random.sample(train_data, BATCH_SIZE)

                # Extract the data into three numpy arrays
                batch_sentences = np.asarray([x for x, _, _ in batch_data])
                batch_labels = np.asarray(
                    [num2onehot(x, NUM_CLASSES) for _, x, _ in batch_data])

                # Create the feed_dict
                feed_dict = {
                    sentence_input: batch_sentences,
                    labels: batch_labels
                }

                # Runs optimisation
                sess.run(opt, feed_dict=feed_dict)

                if batch % DISPLAY_EVERY == 0:

                    # Get the batch of test data
                    batch_data = test_data
Ejemplo n.º 6
0
accuracy = features_graph["accuracy"]

with tf.Session() as sess:

    # Initialise all variables
    sess.run(tf.global_variables_initializer())

    # Saving object
    saver = tf.train.Saver()

    # ====> Run the second graph <====
    saver.restore(sess, features_mlp.SAVE_PATH)

    batch_sentences = np.asarray([x for _, _, x, _ in test_1])
    batch_labels = np.asarray(
        [num2onehot(x, NUM_CLASSES) for _, _, _, x in test_1])

    # Create the feed_dict
    feed_dict = {sentence_input: batch_sentences, labels: batch_labels}

    # Run accuracy and loss
    raw_probs_feats = sess.run(predictions, feed_dict=feed_dict)
    prob_pos_feats = raw_probs_feats[:, 1]

# ====> Combine the results <====

summary = []
sents_already_added = set()

# ====> Attempt Four <====
final_probs = []