def evaluate(args, model, tokenizer, prefix=''): eval_output_dir = args.output_dir results = {} eval_dataset = load_and_cache_examples(args, tokenizer, set_type='dev') if not os.path.exists(eval_output_dir): os.makedirs(eval_output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! logger.info('***** Running evaluation {} *****'.format(prefix)) logger.info(' Num examples = %d', len(eval_dataset)) logger.info(' Batch size = %d', args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None for batch in eval_dataloader: model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2], 'labels': batch[3], 'ct_clf_input_ids': batch[4], 'ct_clf_attention_mask': batch[5], 'ct_clf_token_type_ids': batch[6], 'categories': batch[7], 'hand_features': batch[8]} outputs = model(**inputs) tmp_eval_loss, logits = outputs[0][:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps preds = np.argmax(preds, axis=1) result = compute_metrics(preds, out_label_ids) results.update(result) output_eval_file = os.path.join(eval_output_dir, 'eval_results.txt') with open(output_eval_file, 'a') as writer: for key in sorted(result.keys()): logger.info(' %s = %s', key, str(result[key])) writer.write('%s = %s\n' % (key, str(result[key]))) writer.write('='*20 + '\n') return results
def main(): weather = Weather() df = process_dataframe(get_dataframe(FIREBASE_URL)) print('Last five rows:\n', df[-5:][['url', 'time']], '\n') save_to_csv(df) text = 'Hi! Here is how you have been doing so far.' sound(text) results = compute_metrics(df) #print(results) time_work = str(10.03) time_media = str(0.69) time_other = str(1.78) top_hosts = ['localhost', 'google', 'firebase console'] text = 'You spend ' + time_work + 'hours on work tab, ' + time_media + ' hours on media and ' + time_other + 'hours on other.' \ + 'Your top three tabs are: ' + top_hosts[0] + ', ' + top_hosts[1] + ',' + top_hosts[2] sound(text)
def evaluate(args, model, tokenizer, prefix="", is_eval=True): # Loop to handle MNLI double evaluation (matched, mis-matched) eval_task_names = ("mnli", "mnli-mm") if args.task_name == "mnli" else ( args.task_name, ) eval_outputs_dirs = (args.output_dir, args.output_dir + '-MM') if args.task_name == "mnli" else ( args.output_dir, ) results = {} for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs): eval_dataset, eval_examples = load_and_cache_examples( args, eval_task, tokenizer, evaluate=is_eval, test=(not is_eval)) if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]: os.makedirs(eval_output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max( 1, args.n_gpu) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler( eval_dataset) if args.local_rank == -1 else DistributedSampler( eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2] if args.model_type in ['bert', 'xlnet'] else None, # XLM don't use segment_ids 'labels': batch[3] } outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps if args.output_mode == "classification": preds = np.argmax(preds, axis=1) elif args.output_mode == "regression": preds = np.squeeze(preds) result = compute_metrics(eval_task, preds, out_label_ids) results.update(result) output_eval_file = os.path.join(eval_output_dir, "eval_results.txt") with open(output_eval_file, "w") as writer: logger.info("***** Eval results {} *****".format(prefix)) for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) lines = [] mapping = ['non-propaganda', 'propaganda'] if args.output_mode == "multi-label-classification": preds = preds[:, 0] for example, pred in zip(eval_examples, preds): line = example.guid.split('-')[1].split('_') if example.text_a == '[EMPTY]': line.append('non-propaganda') else: line.append(mapping[int(pred)]) lines.append(line) import time end_date = time.strftime("%Y-%m-%d(%H:%M:%S)", time.localtime()) output_pred_file = os.path.join(eval_output_dir, "preds_%s.txt" % end_date) with open(output_pred_file, "w") as writer: for line in lines: writer.write('\t'.join(line) + '\n') return results
def prediction(args, model, tokenizer, prefix=""): pred_task_names = (args.task_name,) pred_outputs_dirs = (args.output_dir,) result = {} for p_task, p_output_dir in zip(pred_task_names, pred_outputs_dirs): pred_dataset = load_and_cache_examples( args, p_task, tokenizer, predict=True) if not os.path.exists(p_output_dir): os.makedirs(p_output_dir) args.pred_batch_size = args.per_gpu_eval_batch_size pred_sampler = SequentialSampler(pred_dataset) pred_dataloader = DataLoader( pred_dataset, sampler=pred_sampler, batch_size=args.pred_batch_size) # Prediction logger.info("******* Running prediction ****") logger.info(" Num examples = %d", len(pred_dataset)) logger.info(" Batch size = %d", args.pred_batch_size) nb_eval_steps = 0 preds = None for batch in tqdm(pred_dataloader, desc="Predicting"): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3]} if args.model_type != "distilbert": inputs["token_type_ids"] = ( batch[2] if args.model_type in [ "bert", "xlnet", "albert"] else None ) # XLM, DistilBERT, RoBERTa, and XLM-RoBERTa don't use segment_ids outputs = model(**inputs) _, logits = outputs[:2] nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs["labels"].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0) if args.output_mode == "regression": preds = np.squeeze(preds) result = compute_metrics(p_task, preds, out_label_ids) # make the prefix dir Path(os.path.join(p_output_dir, prefix)).mkdir(exist_ok=True) output_prediction = os.path.join( p_output_dir, prefix, "prediction.txt") with open(output_prediction, "w") as w: for l in preds: w.write(str(l) + "\n") output_eval_file = os.path.join( p_output_dir, prefix, "pred_results.txt") with open(output_eval_file, "w") as writer: logger.info("***** Eval results {} *****".format(prefix)) writer.write("hyperparameter: batch %s, epoch %s\n" % (args.per_gpu_train_batch_size, args.num_train_epochs)) for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) return result
def evaluate(args, model, tokenizer, prefix=""): # Loop to handle MNLI double evaluation (matched, mis-matched) eval_task_names = (args.task_name,) eval_outputs_dirs = (args.output_dir,) results = {} for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs): eval_dataset = load_and_cache_examples( args, eval_task, tokenizer, evaluate=True) if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]: os.makedirs(eval_output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * \ max(1, args.n_gpu) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader( eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3]} if args.model_type != "distilbert": inputs["token_type_ids"] = ( batch[2] if args.model_type in [ "bert", "xlnet", "albert"] else None ) # XLM, DistilBERT, RoBERTa, and XLM-RoBERTa don't use segment_ids outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs["labels"].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps if args.output_mode == "classification": preds = np.argmax(preds, axis=1) elif args.output_mode == "regression": preds = np.squeeze(preds) result = compute_metrics(eval_task, preds, out_label_ids) results.update(result) # make the prefix dir Path(os.path.join(eval_output_dir, prefix)).mkdir(exist_ok=True) output_eval_file = os.path.join( eval_output_dir, prefix, "eval_results.txt") with open(output_eval_file, "w") as writer: logger.info("***** Eval results {} *****".format(prefix)) writer.write("hyperparameter: batch %s, epoch %s\n" % (args.per_gpu_train_batch_size, args.num_train_epochs)) for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) logger.info(" %s = %s", "eval_loss", str(eval_loss)) writer.write("%s = %s\n" % ("eval_loss", str(eval_loss))) return results