def batch2input(batch_data, num_items): """ :param batch_data: of form [(sentence, label)] :return: 3D matrix """ batch_labels = [num2onehot(y, NUM_CLASSES) for _, y in batch_data] lens = [len(x) for x, _ in batch_data] batch_data = [x for x, _ in batch_data] inputs_matrix = np.zeros((num_items, MAX_SENT_LEN, WORD_DIMENSIONS), dtype=np.float32) for i, sentence in enumerate(batch_data): pos = 0 for word in sentence: if word in VOCAB: vec = WORD2VEC[word] inputs_matrix[i, pos, :] = vec pos += 1 return inputs_matrix, lens, batch_labels
def get_data(): print("Loading Data...") t = time.time() data = useful_functions.load_cspubsumext() sents = [] labs = [] for item in data: sentences = item["sentences"] for sent, sec, y in sentences: sents.append(sent) labs.append(num2onehot(y, NUM_CLASSES)) print("Done, took ", time.time() - t, " seconds") data = {"sentences": sents, "labels": labs} return data
def batch2input(batch_data, num_items): """ :param batch_data: of form [(sentence, abstract_vector, feature_vector, label)] :return: 3D matrix of embedded sentences, sentence lengths, labels, abstracts, features """ batch_labels = [num2onehot(y, NUM_CLASSES) for _, _, _, y in batch_data] lens = [len(x) for x, _, _, _ in batch_data] batch_sentences = [x for x, _, _, _ in batch_data] batch_abstracts = [x for _, x, _, _ in batch_data] batch_features = [x for _, _, x, _ in batch_data] inputs_matrix = np.zeros((num_items, MAX_SENT_LEN, WORD_DIMENSIONS), dtype=np.float32) for i, sentence in enumerate(batch_sentences): pos = 0 for word in sentence: if word in VOCAB: vec = WORD2VEC[word] inputs_matrix[i, pos, :] = vec pos += 1 return inputs_matrix, lens, batch_labels, batch_abstracts, batch_features
for epoch in range(MAX_EPOCHS): if breakout: break for batch in range(num_batches): print("Running Batch: ", batch, " / ", num_batches, end="\r") # Sample a random batch of data batch_data = random.sample(train_data, BATCH_SIZE) # Extract the data into three numpy arrays batch_sentences = np.asarray([x for x, _ in batch_data]) batch_labels = np.asarray([num2onehot(x, NUM_CLASSES) for _, x in batch_data]) # Create the feed_dict feed_dict = { sentence_input: batch_sentences, labels: batch_labels, keep_prob: 0.5 } # Runs optimisation sess.run(opt, feed_dict=feed_dict) if batch % DISPLAY_EVERY == 0: # Get the batch of test data batch_data = test_data
for epoch in range(MAX_EPOCHS): if breakout: break for batch in range(num_batches): print("Running Batch: ", batch, " / ", num_batches, end="\r") # Sample a random batch of data batch_data = random.sample(train_data, BATCH_SIZE) # Extract the data into three numpy arrays batch_sentences = np.asarray([x for x, _, _ in batch_data]) batch_labels = np.asarray( [num2onehot(x, NUM_CLASSES) for _, x, _ in batch_data]) # Create the feed_dict feed_dict = { sentence_input: batch_sentences, labels: batch_labels } # Runs optimisation sess.run(opt, feed_dict=feed_dict) if batch % DISPLAY_EVERY == 0: # Get the batch of test data batch_data = test_data
accuracy = features_graph["accuracy"] with tf.Session() as sess: # Initialise all variables sess.run(tf.global_variables_initializer()) # Saving object saver = tf.train.Saver() # ====> Run the second graph <==== saver.restore(sess, features_mlp.SAVE_PATH) batch_sentences = np.asarray([x for _, _, x, _ in test_1]) batch_labels = np.asarray( [num2onehot(x, NUM_CLASSES) for _, _, _, x in test_1]) # Create the feed_dict feed_dict = {sentence_input: batch_sentences, labels: batch_labels} # Run accuracy and loss raw_probs_feats = sess.run(predictions, feed_dict=feed_dict) prob_pos_feats = raw_probs_feats[:, 1] # ====> Combine the results <==== summary = [] sents_already_added = set() # ====> Attempt Four <==== final_probs = []