def sentence_prediction(sentence): sentence = preprocess(sentence) model_path = config.MODEL_PATH test_dataset = dataset.BERTDataset( review=[sentence], target=[0] ) test_data_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=3 ) device = config.device model = BERTBaseUncased() model.load_state_dict(torch.load( model_path, map_location=torch.device(device))) model.to(device) outputs, [] = engine.predict_fn(test_data_loader, model, device) print(outputs) return outputs[0]
def main(_): input = config.EVAL_PROC output = 'predictions.csv' model_path = config.MODEL_PATH if FLAGS.input: input = FLAGS.input if FLAGS.output: output = FLAGS.input if FLAGS.model_path: model_path = FLAGS.model_path df_test = pd.read_fwf(input) logger.info(f"Bert Model: {config.BERT_PATH}") logger.info( f"Current date and time :{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} " ) logger.info(f"Test file: {input}") logger.info(f"Test size : {len(df_test):.4f}") trg = [] for i in range(len(df_test.values)): trg.append(0) test_dataset = dataset.BERTDataset(text=df_test.values, target=trg) test_data_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=3) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = BERTBaseUncased(config.DROPOUT) model.load_state_dict( torch.load(model_path, map_location=torch.device(device))) model.to(device) outputs, extracted_features = engine.predict_fn( test_data_loader, model, device, extract_features=FLAGS.features) df_test["predicted"] = outputs # save file df_test.to_csv(output, header=None, index=False)
def main(_): test_file = config.EVAL_PROC model_path = config.MODEL_PATH if FLAGS.test_file: test_file = FLAGS.test_file if FLAGS.model_path: model_path = FLAGS.model_path df_test = pd.read_csv(test_file).fillna("none") logger.info(f"Bert Model: {config.BERT_PATH}") logger.info( f"Current date and time :{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} " ) logger.info(f"Test file: {test_file}") logger.info(f"Test size : {len(df_test):.4f}") test_dataset = dataset.BERTDataset(review=df_test.text.values, target=df_test.label.values) test_data_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=3) device = config.device model = BERTBaseUncased() model.load_state_dict( torch.load(model_path, map_location=torch.device(device))) model.to(device) outputs, extracted_features = engine.predict_fn( test_data_loader, model, device, extract_features=FLAGS.features) df_test["predicted"] = outputs # save file df_test.to_csv(model_path.split("/")[-2] + '.csv', header=None, index=False) if FLAGS.features: pca = PCA(n_components=50, random_state=7) X1 = pca.fit_transform(extracted_features) tsne = TSNE(n_components=2, perplexity=10, random_state=6, learning_rate=1000, n_iter=1500) X1 = tsne.fit_transform(X1) # if row == 0: print("Shape after t-SNE: ", X1.shape) X = pd.DataFrame(np.concatenate([X1], axis=1), columns=["x1", "y1"]) X = X.astype({"x1": float, "y1": float}) # Plot for layer -1 plt.figure(figsize=(20, 15)) p1 = sns.scatterplot(x=X["x1"], y=X["y1"], palette="coolwarm") # p1.set_title("development-"+str(row+1)+", layer -1") x_texts = [] for output, value in zip(outputs, df_test.label.values): if output == value: x_texts.append("@" + label_decoder(output)[0] + label_decoder(output)) else: x_texts.append( label_decoder(value) + "-" + label_decoder(output)) X["texts"] = x_texts # X["texts"] = ["@G" + label_decoder(output) if output == value else "@R-" + label_decoder(value) + "-" + label_decoder(output) # for output, value in zip(outputs, df_test.label.values)] # df_test.label.astype(str) #([str(output)+"-" + str(value)] for output, value in zip(outputs, df_test.label.values)) # Label each datapoint with the word it corresponds to for line in X.index: text = X.loc[line, "texts"] + "-" + str(line) if "@U" in text: p1.text(X.loc[line, "x1"] + 0.2, X.loc[line, "y1"], text[2:], horizontalalignment='left', size='medium', color='blue', weight='semibold') elif "@P" in text: p1.text(X.loc[line, "x1"] + 0.2, X.loc[line, "y1"], text[2:], horizontalalignment='left', size='medium', color='green', weight='semibold') elif "@N" in text: p1.text(X.loc[line, "x1"] + 0.2, X.loc[line, "y1"], text[2:], horizontalalignment='left', size='medium', color='red', weight='semibold') else: p1.text(X.loc[line, "x1"] + 0.2, X.loc[line, "y1"], text, horizontalalignment='left', size='medium', color='black', weight='semibold') plt.show() plt.savefig(model_path.split("/")[-2] + '-figure.svg', format="svg")