def evaluate(args, model, eval_dataset, mode, global_step=None): results = {} eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! if global_step != None: logger.info( "***** Running evaluation on {} dataset ({} step) *****".format( mode, global_step)) else: logger.info( "***** Running evaluation on {} dataset *****".format(mode)) logger.info(" Num examples = {}".format(len(eval_dataset))) logger.info(" Eval Batch size = {}".format(args.eval_batch_size)) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None for batch in progress_bar(eval_dataloader): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3] } if args.model_type not in ["distilkobert", "xlm-roberta"]: inputs["token_type_ids"] = batch[ 2] # Distilkobert, XLM-Roberta don't use segment_ids outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs["labels"].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps if output_modes[args.task] == "classification": preds = np.argmax(preds, axis=1) elif output_modes[args.task] == "regression": preds = np.squeeze(preds) result = compute_metrics(args.task, out_label_ids, preds) results.update(result) output_dir = os.path.join(args.output_dir, mode) if not os.path.exists(output_dir): os.makedirs(output_dir) output_eval_file = os.path.join( output_dir, "{}-{}.txt".format(mode, global_step) if global_step else "{}.txt".format(mode)) with open(output_eval_file, "w") as f_w: logger.info("***** Eval results on {} dataset *****".format(mode)) for key in sorted(results.keys()): logger.info(" {} = {}".format(key, str(results[key]))) f_w.write(" {} = {}\n".format(key, str(results[key]))) return results
def evaluate(args, model, eval_dataset, mode, global_step=None): results = {} eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! if global_step != None: logger.info( "***** Running evaluation on {} dataset ({} step) *****".format( mode, global_step)) else: logger.info( "***** Running evaluation on {} dataset *****".format(mode)) logger.info(" Num examples = {}".format(len(eval_dataset))) logger.info(" Eval Batch size = {}".format(args.eval_batch_size)) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None ep_loss = [] for (batch, txt) in progress_bar(eval_dataloader): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2], "labels": batch[3] } if "KOSAC" in args.model_mode: inputs["polarity_ids"] = batch[4] inputs["intensity_ids"] = batch[5] if "KNU" in args.model_mode: inputs["polarity_ids"] = batch[4] if "CHAR" in args.model_mode: inputs["char_token_data"] = txt[1] inputs["word_token_data"] = txt[2] txt = txt[0] outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] if type(tmp_eval_loss) == tuple: # print(list(map(lambda x:x.item(),tmp_eval_loss))) ep_loss.append(list(map(lambda x: x.item(), tmp_eval_loss))) tmp_eval_loss = sum(tmp_eval_loss) else: ep_loss.append([tmp_eval_loss.item()]) eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs["labels"].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps if output_modes[args.task] == "classification": preds = np.argmax(preds, axis=1) elif output_modes[args.task] == "regression": preds = np.squeeze(preds) result = compute_metrics(args.task, out_label_ids, preds) results.update(result) output_dir = os.path.join(args.output_dir, mode) if not os.path.exists(output_dir): os.makedirs(output_dir) output_eval_file = os.path.join( output_dir, "{}-{}.txt".format(mode, global_step) if global_step else "{}.txt".format(mode)) with open(output_eval_file, "w") as f_w: logger.info("***** Eval results on {} dataset *****".format(mode)) for key in sorted(results.keys()): logger.info(" {} = {}".format(key, str(results[key]))) f_w.write(" {} = {}\n".format(key, str(results[key]))) logger.info("Epoch loss = {} ".format( np.mean(np.array(ep_loss), axis=0))) f_w.write("Epoch loss = {} ".format( np.mean(np.array(ep_loss), axis=0))) return results
def evaluate(args, model, eval_dataset, mode, global_step=None): results = {} eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! if global_step != None: logger.info( "***** Running evaluation on {} dataset ({} step) *****".format( mode, global_step)) else: logger.info( "***** Running evaluation on {} dataset *****".format(mode)) logger.info(" Num examples = {}".format(len(eval_dataset))) logger.info(" Eval Batch size = {}".format(args.eval_batch_size)) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None for batch in eval_dataloader: model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3] } if args.model_type not in ["distilkobert", "xlm-roberta"]: inputs["token_type_ids"] = batch[ 2] # Distilkobert, XLM-Roberta don't use segment_ids outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs["labels"].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps results = {"loss": eval_loss} preds = np.argmax(preds, axis=2) labels = processors[args.task](args).get_labels() label_map = {i: label for i, label in enumerate(labels)} out_label_list = [[] for _ in range(out_label_ids.shape[0])] preds_list = [[] for _ in range(out_label_ids.shape[0])] pad_token_label_id = CrossEntropyLoss().ignore_index for i in range(out_label_ids.shape[0]): for j in range(out_label_ids.shape[1]): if out_label_ids[i, j] != pad_token_label_id: out_label_list[i].append(label_map[out_label_ids[i][j]]) preds_list[i].append(label_map[preds[i][j]]) result = compute_metrics(args.task, out_label_list, preds_list) results.update(result) output_dir = os.path.join(args.output_dir, mode) if not os.path.exists(output_dir): os.makedirs(output_dir) output_eval_file = os.path.join( output_dir, "{}-{}.txt".format(mode, global_step) if global_step else "{}.txt".format(mode)) with open(output_eval_file, "w") as f_w: logger.info("***** Eval results on {} dataset *****".format(mode)) for key in sorted(results.keys()): logger.info(" {} = {}".format(key, str(results[key]))) f_w.write(" {} = {}\n".format(key, str(results[key]))) logger.info("\n" + show_ner_report( out_label_list, preds_list)) # Show report for each tag result f_w.write("\n" + show_ner_report(out_label_list, preds_list)) return results
def evaluate(args, model, eval_dataset, mode, global_step=None): results = {} eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! if global_step != None: logger.info("***** Running Test on {} dataset ({} step) *****".format( mode, global_step)) else: logger.info("***** Running Test on {} dataset *****".format(mode)) logger.info(" Num examples = {}".format(len(eval_dataset))) logger.info(" Eval Batch size = {}".format(args.eval_batch_size)) eval_loss = 0.0 nb_eval_steps = 0 preds = None polarity_ids = None intensity_ids = None out_label_ids = None txt_all = [] ep_loss = [] pcaDF = pd.DataFrame( columns=['principal component 1', 'principal component 2', "label"]) for (batch, txt) in progress_bar(eval_dataloader): model.eval() txt_all = txt_all + list(txt) batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): if len(batch) == 4: inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2], "labels": batch[3] } else: inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": None, "labels": batch[2] } outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] emb = outputs[2].detach().cpu().numpy() labels = inputs["labels"].detach().cpu().numpy() pca = PCA(n_components=2) principalComponents = pca.fit_transform(emb) principalDf = pd.DataFrame( data=principalComponents, columns=['principal component 1', 'principal component 2']) principalDf["label"] = labels pcaDF = pd.concat([pcaDF, principalDf], ignore_index=True) if type(tmp_eval_loss) == tuple: # print(list(map(lambda x:x.item(),tmp_eval_loss))) ep_loss.append(list(map(lambda x: x.item(), tmp_eval_loss))) tmp_eval_loss = sum(tmp_eval_loss) else: ep_loss.append([tmp_eval_loss.item()]) eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs["labels"].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps preds = np.argmax(preds, axis=1) fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(1, 1, 1) ax.set_xlabel('Principal Component 1', fontsize=15) ax.set_ylabel('Principal Component 2', fontsize=15) ax.set_title('2 Component PCA', fontsize=20) colors = [ "#7fc97f", "#beaed4", "#fdc086", "#ffff99", "#386cb0", "#f0027f", "", "#666666" ] label_list = set(labels) colors = colors[:len(label_list)] print(pcaDF) for label, color in zip(label_list, colors): indicesToKeep = pcaDF['label'] == label ax.scatter(pcaDF.loc[indicesToKeep, 'principal component 1'], pcaDF.loc[indicesToKeep, 'principal component 2'], c=color, s=10) ax.legend(label_list) ax.grid() plt.show() kmeans = KMeans(n_clusters=2, random_state=0).fit( pcaDF.loc[:, ['principal component 1', 'principal component 2']]) print(kmeans.labels_) print(completeness_score(pcaDF['label'], kmeans.labels_)) dbscan = DBSCAN(eps=3, min_samples=2).fit( pcaDF.loc[:, ['principal component 1', 'principal component 2']]) print(dbscan.labels_) print(set(dbscan.labels_)) print(completeness_score(pcaDF['label'], dbscan.labels_)) result = compute_metrics(out_label_ids, preds) results.update(result) output_dir = os.path.join(args.output_dir, mode) if not os.path.exists(output_dir): os.makedirs(output_dir) output_eval_file = os.path.join( output_dir, "{}-{}.txt".format(mode, global_step) if global_step else "{}.txt".format(mode)) with open(output_eval_file, "w") as f_w: logger.info("***** Eval results on {} dataset *****".format(mode)) for key in sorted(results.keys()): logger.info(" {} = {}".format(key, str(results[key]))) f_w.write(" {} = {}\n".format(key, str(results[key]))) return preds, out_label_ids, results, txt_all