test_data = test_data.sample(frac=1.0).reset_index(drop=True) test_data = test_data.head(int(len(test_data) * args.size)) print(test_data.columns) assert len(test_data.columns) > 1, "Labels of Test set not available." test_label = list(test_data.SECTION) test_data = list(test_data.STORY) predictions = [] if args.model != 'sentence_pair': for story in tqdm(test_data): words = nltk.word_tokenize(story.lower()) embedding = np.array([reader.sent2vec(words)]) prediction = model.predict(embedding)[0] predictions.append(np.argmax(prediction)) else: for story in tqdm(test_data): words = nltk.word_tokenize(story.lower()) embedding = np.array([reader.sent2vec(words)]) prediction = [] for i in range(4): x2 = np.array([reader.embedding[reader.label_map[i]]]) prediction.append(model.predict([embedding, x2])[0]) predictions.append(np.argmax(prediction)) print(classification_report(test_label, predictions, labels=[0, 1, 2, 3]))