def med_vs_lowres(params, n_art): log = get_medres_vs_lowres_outfile(params['logpath']) low_xs, low_ys, y_to_idx, ids = load_featurized(params['lowres_path']) med_xs, med_ys, y_to_idx, ids = load_featurized(params['medres_path']) n_classes = len(y_to_idx) assert np.array_equal(low_ys, med_ys) ys = low_ys low_model = load_model(params['lowres_id'], params['model_dir']) med_model = load_model(params['medres_id'], params['model_dir']) low_preds = get_pred(low_model, low_xs, n_classes) med_preds = get_pred(med_model, med_xs, n_classes) low_acc = accuracy_score(ys, low_preds) med_acc = accuracy_score(ys, med_preds) p_diff = art_test(ys, low_preds, med_preds, absolute=True, n=n_art, scoring=precision_recall_fscore_micro, return_distribution=False)[2] r = [ n_art, med_acc, low_acc, med_acc - low_acc, p_diff, p_diff * params['bonferroni'] ] csv.writer(log).writerow(r) log.close()
def per_vs_cnn(state, n_art): log = get_per_vs_cnn_outfile(state['logpath']) low_xs, low_ys, y_to_idx, ids = load_featurized(state['lowres_path']) med_xs, med_ys, y_to_idx, ids = load_featurized(state['medres_path']) n_classes = len(y_to_idx) assert np.array_equal(low_ys, med_ys) ys = med_ys per_medres = load_model(state['medres_id'], state['per_dir']) per_lowres = load_model(state['lowres_id'], state['per_dir']) cnn_medres = load_model(state['medres_id'], state['cnn_dir']) cnn_lowres = load_model(state['lowres_id'], state['cnn_dir']) preds_per_medres = get_pred(per_medres, med_xs, n_classes) preds_per_lowres = get_pred(per_lowres, low_xs, n_classes) preds_cnn_medres = get_pred(cnn_medres, med_xs, n_classes) preds_cnn_lowres = get_pred(cnn_lowres, low_xs, n_classes) acc_per_medres = accuracy_score(ys, preds_per_medres) acc_per_lowres = accuracy_score(ys, preds_per_lowres) acc_cnn_medres = accuracy_score(ys, preds_cnn_medres) acc_cnn_lowres = accuracy_score(ys, preds_cnn_lowres, n_classes) p_diff_ch = art_test(ys, preds_per_lowres, preds_cnn_lowres, absolute=True, n=n_art, scoring=precision_recall_fscore_micro, return_distribution=False)[2] p_diff_plain = art_test(ys, preds_per_medres, preds_cnn_medres, absolute=True, n=n_art, scoring=precision_recall_fscore_micro, return_distribution=False)[2] r = [ n_art, 'low_res', acc_cnn_lowres, acc_per_lowres, acc_cnn_lowres - acc_per_lowres, p_diff_ch, p_diff_ch * state['bonferroni'] ] csv.writer(log).writerow(r) r = [ n_art, 'med_res', acc_cnn_medres, acc_per_medres, acc_cnn_medres - acc_per_medres, p_diff_ch, p_diff_plain * state['bonferroni'] ] csv.writer(log).writerow(r) log.close()
def __init__(self, model_to_use, params): self.params = params # self.params["device"]='cuda' self.embeddings = None if (self.params['bert_tokens']): self.train, self.val, self.test = createDatasetSplit(params) self.vocab = None vocab_size = 0 padding_idx = 0 else: self.train, self.val, self.test, vocab_own = createDatasetSplit( params) self.params['embed_size'] = vocab_own.embeddings.shape[1] self.params['vocab_size'] = vocab_own.embeddings.shape[0] self.vocab = vocab_own self.embeddings = vocab_own.embeddings if torch.cuda.is_available() and self.params['device'] == 'cuda': # Tell PyTorch to use the GPU. self.device = torch.device("cuda") deviceID = get_gpu(self.params) torch.cuda.set_device(deviceID[0]) else: print('Since you dont want to use GPU, using the CPU instead.') self.device = torch.device("cpu") self.model = select_model(self.params, self.embeddings) if (self.params['bert_tokens'] == False): #pass self.model = load_model(self.model, self.params) if (self.params["device"] == 'cuda'): self.model.cuda() self.model.eval()
def train_pd_models(get_model, params, params_dtypes): if not params['pretrained_dir']: params['deafness_type'] = 'normally_hearing' params['resolution'] = 'hires' train_model(get_model, params, params_dtypes) params['pretrained_dir'] = params['model_dir'] else: params['deafness_type'] = 'dummy_model' train_dummy_model(get_model, params, params_dtypes) for r in ('medres', 'lores'): params['deafness_type'] = 'postlingually_deaf' params['resolution'] = r pretrained = load_model(0, params['pretrained_dir']) _, _, n_epochs = train_model(get_model, params, params_dtypes, pre_trained_model=pretrained)
def standaloneEval_with_rational(params, test_data=None, extra_data_path=None, topk=2, use_ext_df=False): # device = torch.device("cpu") if torch.cuda.is_available() and params['device'] == 'cuda': # Tell PyTorch to use the GPU. device = torch.device("cuda") deviceID = get_gpu(params) torch.cuda.set_device(deviceID[0]) else: print('Since you dont want to use GPU, using the CPU instead.') device = torch.device("cpu") embeddings = None if (params['bert_tokens']): train, val, test = createDatasetSplit(params) vocab_own = None vocab_size = 0 padding_idx = 0 else: train, val, test, vocab_own = createDatasetSplit(params) params['embed_size'] = vocab_own.embeddings.shape[1] params['vocab_size'] = vocab_own.embeddings.shape[0] embeddings = vocab_own.embeddings if (params['auto_weights']): y_test = [ele[2] for ele in test] encoder = LabelEncoder() encoder.classes_ = np.load('Data/classes.npy') params['weights'] = class_weight.compute_class_weight( 'balanced', np.unique(y_test), y_test).astype('float32') if (extra_data_path != None): params_dash = {} params_dash['num_classes'] = 3 params_dash['data_file'] = extra_data_path params_dash['class_names'] = dict_data_folder[str( params['num_classes'])]['class_label'] temp_read = get_annotated_data(params_dash) with open('Data/post_id_divisions.json', 'r') as fp: post_id_dict = json.load(fp) temp_read = temp_read[ temp_read['post_id'].isin(post_id_dict['test']) & (temp_read['final_label'].isin(['hatespeech', 'offensive']))] test_data = get_test_data(temp_read, params, message='text') test_extra = encodeData(test_data, vocab_own, params) test_dataloader = combine_features(test_extra, params, is_train=False) elif (use_ext_df): test_extra = encodeData(test_data, vocab_own, params) test_dataloader = combine_features(test_extra, params, is_train=False) else: test_dataloader = combine_features(test, params, is_train=False) model = select_model(params, embeddings) if (params['bert_tokens'] == False): model = load_model(model, params) if (params["device"] == 'cuda'): model.cuda() model.eval() # Put the model in evaluation mode--the dropout layers behave differently # during evaluation. # Tracking variables if ((extra_data_path != None) or (use_ext_df == True)): post_id_all = list(test_data['Post_id']) else: post_id_all = list(test['Post_id']) print("Running eval on test data...") t0 = time.time() true_labels = [] pred_labels = [] logits_all = [] attention_all = [] input_mask_all = [] # Evaluate data for one epoch for step, batch in tqdm(enumerate(test_dataloader), total=len(test_dataloader)): # Progress update every 40 batches. if step % 40 == 0 and not step == 0: # Calculate elapsed time in minutes. elapsed = format_time(time.time() - t0) # `batch` contains three pytorch tensors: # [0]: input ids # [1]: attention vals # [2]: attention mask # [3]: labels b_input_ids = batch[0].to(device) b_att_val = batch[1].to(device) b_input_mask = batch[2].to(device) b_labels = batch[3].to(device) # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch) #model.zero_grad() outputs = model(b_input_ids, attention_vals=b_att_val, attention_mask=b_input_mask, labels=None, device=device) # m = nn.Softmax(dim=1) logits = outputs[0] # Move logits and labels to CPU logits = logits.detach().cpu().numpy() label_ids = b_labels.detach().cpu().numpy() if (params['bert_tokens']): attention_vectors = np.mean( outputs[1][11][:, :, 0, :].detach().cpu().numpy(), axis=1) else: attention_vectors = outputs[1].detach().cpu().numpy() # Calculate the accuracy for this batch of test sentences. # Accumulate the total accuracy. pred_labels += list(np.argmax(logits, axis=1).flatten()) true_labels += list(label_ids.flatten()) logits_all += list(logits) attention_all += list(attention_vectors) input_mask_all += list(batch[2].detach().cpu().numpy()) logits_all_final = [] for logits in logits_all: logits_all_final.append(softmax(logits)) if (use_ext_df == False): testf1 = f1_score(true_labels, pred_labels, average='macro') testacc = accuracy_score(true_labels, pred_labels) #testrocauc=roc_auc_score(true_labels, logits_all_final,multi_class='ovo',average='macro') testprecision = precision_score(true_labels, pred_labels, average='macro') testrecall = recall_score(true_labels, pred_labels, average='macro') # Report the final accuracy for this validation run. print(" Accuracy: {0:.3f}".format(testacc)) print(" Fscore: {0:.3f}".format(testf1)) print(" Precision: {0:.3f}".format(testprecision)) print(" Recall: {0:.3f}".format(testrecall)) #print(" Roc Auc: {0:.3f}".format(testrocauc)) print(" Test took: {:}".format(format_time(time.time() - t0))) attention_vector_final = [] for x, y in zip(attention_all, input_mask_all): temp = [] for x_ele, y_ele in zip(x, y): if (y_ele == 1): temp.append(x_ele) attention_vector_final.append(temp) list_dict = [] for post_id, attention, logits, pred, ground_truth in zip( post_id_all, attention_vector_final, logits_all_final, pred_labels, true_labels): # if(ground_truth==1): # continue temp = {} encoder = LabelEncoder() encoder.classes_ = np.load('Data/classes.npy') pred_label = encoder.inverse_transform([pred])[0] ground_label = encoder.inverse_transform([ground_truth])[0] temp["annotation_id"] = post_id temp["classification"] = pred_label temp["classification_scores"] = { "hatespeech": logits[0], "normal": logits[1], "offensive": logits[2] } topk_indicies = sorted(range(len(attention)), key=lambda i: attention[i])[-topk:] temp_hard_rationales = [] for ind in topk_indicies: temp_hard_rationales.append({ 'end_token': ind + 1, 'start_token': ind }) temp["rationales"] = [{ "docid": post_id, "hard_rationale_predictions": temp_hard_rationales, "soft_rationale_predictions": attention, #"soft_sentence_predictions":[1.0], "truth": ground_truth }] list_dict.append(temp) return list_dict, test_data