def __call__(self, net, data, trust_model=None): """ Runs a trained neural network classifier on validation data, and iterates through the top prediction for each datum. TODO: write some unit tests for this function """ net.eval() net = cudaify(net) with torch.no_grad(): for inst_ids, targets, evidence, response, zones in tqdm( data, total=len(data)): output, conf = net(cudaify(evidence), zones) ps = F.softmax(output.clamp(min=-25, max=25), dim=-1) abs_i = output.shape[1] - 1 # last class is abstention class preds = ps[:, :-1].argmax(dim=-1) max_weight_class = ps.argmax(dim=-1) is_abs = (max_weight_class == abs_i) for element in zip(preds, response, conf, is_abs): (pred, gold, c, abstained) = element pkg = { 'pred': pred.item(), 'gold': gold.item(), 'confidence': c.item(), 'abstained': abstained.item() } yield pkg
def forward(self, contexts, glosses, pos): scores = [] context_inputs = contexts['input_ids'] if self.gpu: context_inputs = cudaify(context_inputs) context_masks = contexts['attention_mask'] if self.gpu: context_masks = cudaify(context_masks) context_rep = self.context_encoder( input_ids=context_inputs, attention_mask=context_masks)[0] # last hidden state target_rep = self.target_representation(context_rep, pos) for i, g in enumerate(glosses): input_ids = g['input_ids'] if self.gpu: input_ids = cudaify(input_ids) attention_mask = g['attention_mask'] if self.gpu: attention_mask = cudaify(attention_mask) last_layer = self.gloss_encoder(input_ids=input_ids, attention_mask=attention_mask)[0] if 'span' not in g: gloss_reps = last_layer[:, 0, :] # the vector that corresponds to CLS else: span = g['span'] gloss_reps = self.target_representation(last_layer, span) score = target_rep[i] * gloss_reps score = score.sum(dim=1) scores.append(score) result = pad_sequence(scores, batch_first=True) return result
def __call__(self, net, data): """ Runs a trained neural network classifier on validation data, and iterates through the top prediction for each datum. TODO: write some unit tests for this function """ net.eval() net = cudaify(net) with torch.no_grad(): for inst_ids, targets, evidence, response, zones in data: output, conf = net(cudaify(evidence), zones) abs_i = len( output.shape[1]) - 1 # last class is abstention class preds = output.argmax(dim=-1) preds[preds == abs_i] = -1 for element in zip(preds, response, conf): (pred, gold, c) = element pkg = { 'pred': pred, 'gold': gold.item(), 'confidence': c.item() } yield pkg
def _epoch_step(self, model): running_loss = 0. denom = 0 for images, labels in tqdm(self.train_loader, total=len(self.train_loader)): self.optimizer.zero_grad() output, conf = model(cudaify(images)) loss = self.criterion(output, conf, cudaify(labels)) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1) self.optimizer.step() running_loss += loss.item() denom += 1 return running_loss / denom
def __call__(self, net, data, loss_f=None): net.eval() self.running_loss_total = 0.0 self.running_loss_denom = 0 for images, labels in tqdm(data, total=len(data)): with torch.no_grad(): outputs, conf = net(cudaify(images)) if loss_f is not None: loss = loss_f(outputs, conf, cudaify(labels)) self.running_loss_total += loss.item() self.running_loss_denom += 1 # TODO: why 1 and not len(images)? for pred in self.make_predictions(outputs, labels, conf): yield pred
def _epoch_step(self, model): model = cudaify(model) running_loss = 0.0 denom = 0 for (_, _, evidence, response, zones) in tqdm(self.train_loader, total=len(self.train_loader)): self.optimizer.zero_grad() outputs, conf = model(cudaify(evidence), zones) loss_size = self.criterion(outputs, conf, cudaify(response)) loss_size.backward() self.optimizer.step() running_loss += loss_size.data.item() denom += 1 return running_loss / denom
def _epoch_step(self, model): running_loss = 0. denom = 0 for img_x, img_y, lbl_x, lbl_y in tqdm(self.train_loader, total=len(self.train_loader)): self.optimizer.zero_grad() output_x, conf_x = model(cudaify(img_x)) output_y, conf_y = model(cudaify(img_y)) loss = self.criterion(output_x, output_y, cudaify(lbl_x), cudaify(lbl_y), conf_x, conf_y) loss.backward() self.optimizer.step() running_loss += loss.item() denom += 1 return running_loss / denom
def __init__(self, input_size=784, hidden_sizes=[128, 64], output_size=10, confidence_extractor=None): super().__init__(input_size, hidden_sizes, output_size) self.confidence_layer = cudaify(nn.Linear(hidden_sizes[1], 1))
def __call__(self, net, data, trust_model): net.eval() for images, labels in tqdm(data, total=len(data)): with torch.no_grad(): output, conf = net(cudaify(images)) ps = F.softmax(output.clamp(min=-25, max=25), dim=1) preds = ps.argmax(dim=1) if trust_model is not None: trust_score = trust_model.get_score(images.cpu().numpy(), preds.cpu().numpy()) trust_score = trust_score.astype(np.float64) trust_score = torch.from_numpy(trust_score) else: trust_score = [None] * labels.shape[0] for element in zip(preds, labels, conf, trust_score): p, g, c, t = element if t is not None: yield { 'pred': p.item(), 'gold': g.item(), 'confidence': t.item(), 'abstained': False } else: yield { 'pred': p.item(), 'gold': g.item(), 'confidence': c.item(), 'abstained': False }
def _epoch_step(self, model): model = cudaify(model) running_loss = 0.0 denom = 0 for batch in tqdm(self.train_loader, total=len(self.train_loader)): contexts = batch['contexts'] glosses = batch['glosses'] span = batch['span'] gold = batch['gold'] scores = model(contexts, glosses, span) loss_size = self.criterion(scores, cudaify(torch.tensor(gold))) loss_size.backward() self.optimizer.step() running_loss += loss_size.data.item() denom += 1 return running_loss / denom
def __init__(self, input_size=784, hidden_sizes=[128, 64], output_size=10, confidence_extractor='max_prob'): super().__init__() self.input_size = input_size self.output_size = output_size self.confidence_extractor = confidence_extractor_lookup[ confidence_extractor] self.dropout = nn.Dropout(p=0.2) self.linear1 = cudaify(nn.Linear(input_size, hidden_sizes[0])) self.linear2 = cudaify(nn.Linear(hidden_sizes[0], hidden_sizes[1])) self.final = cudaify(nn.Linear(hidden_sizes[1], output_size)) self.softmax = cudaify(nn.Softmax(dim=1)) self.relu1 = nn.ReLU() self.relu2 = nn.ReLU()
def __init__(self, input_size=784, hidden_sizes=[128, 64], output_size=10, confidence_extractor='max_non_abs'): super().__init__(input_size, hidden_sizes, output_size, confidence_extractor) self.final = cudaify(nn.Linear(hidden_sizes[1], output_size + 1))
def __init__(self, input_size, output_size, zone_applicant='max_prob'): super().__init__() self.input_size = input_size self.output_size = output_size self.linear = cudaify(nn.Linear(input_size, output_size)) self.zone_applicant = apply_zones_lookup[zone_applicant] torch.nn.init.xavier_uniform_(self.linear.weight) print('confidence:', self.zone_applicant.__name__)
def __call__(self, input_batch, confidence, target_batch): if self.epoch <= self.learn_epochs: loss = F.cross_entropy(input_batch, target_batch, reduction='none') h_c = F.cross_entropy(input_batch[:, :-1], target_batch).detach() p_out = torch.exp(F.log_softmax(input_batch, dim=1)).detach() p_out_abstain = p_out[:, -1].detach() # update instantaneous alpha_thresh self.alpha_thresh = Variable( ((1. - p_out_abstain) * h_c).mean().data) # update alpha_thresh_ewma if self.alpha_thresh_ewma is None: self.alpha_thresh_ewma = self.alpha_thresh else: self.alpha_thresh_ewma = Variable(self.ewma_mu * self.alpha_thresh.data + \ (1. - self.ewma_mu) * self.alpha_thresh_ewma.data) return loss.mean() else: # calculate cross entropy only over true classes h_c = F.cross_entropy(input_batch[:, 0:-1], target_batch, reduce='none') # probabilities of abstention class p_out = torch.exp(F.log_softmax(input_batch, dim=1)) p_out_abstain = torch.min( p_out[:, -1], cudaify(Variable(torch.tensor([1. - epsilon])))) # update instantaneous alpha_thresh self.alpha_thresh = Variable( ((1. - p_out_abstain) * h_c).mean().data) try: # update alpha_thresh_ewma if self.alpha_thresh_ewma is None: self.alpha_thresh_ewma = self.alpha_thresh else: self.alpha_thresh_ewma = Variable(self.ewma_mu * self.alpha_thresh.data + \ (1. - self.ewma_mu) * self.alpha_thresh_ewma.data) if self.alpha_var is None: # hasn't been initialized. do it now # we create a freshVariable here so that the history of alpha_var # computation (which depends on alpha_thresh_ewma) is forgotten. This # makes self.alpha_var a leaf variable, which will not be differentiated. # aggressive initialization of alpha to jump start abstention self.alpha_var = Variable(self.alpha_thresh_ewma.data / self.alpha_init_factor) self.alpha_inc = (self.alpha_final - self.alpha_var.data ) / (self.total_epochs - self.epoch) self.alpha_set_epoch = self.epoch else: # we only update alpha every epoch if self.epoch > self.alpha_set_epoch: self.alpha_var = Variable(self.alpha_var.data + self.alpha_inc) self.alpha_set_epoch = self.epoch loss = (1. - p_out_abstain) * h_c - self.alpha_var * torch.log( 1. - p_out_abstain) self.vars = [h_c, p_out_abstain] return loss.mean() except RuntimeError as e: print(e)
def initial_layers(self, input_vec): nextout = cudaify(input_vec) nextout = self.linear1(nextout) nextout = self.relu1(nextout) nextout = self.dropout(nextout) nextout = self.linear2(nextout) nextout = self.relu2(nextout) nextout = self.dropout(nextout) return nextout
def __call__(self, sent): # Add special tokens takes care of adding [CLS], [SEP], <s>... tokens # in the right way for each model. input_ids = self.tokenizer.encode(sent, add_special_tokens=True) bert_toks = self.tokenizer.convert_ids_to_tokens(input_ids) input_ids = torch.tensor([input_ids]) with torch.no_grad(): last_hidden_states = self.model(cudaify(input_ids))[0] last_hidden_states = last_hidden_states.squeeze(0) return bert_toks, last_hidden_states
def passage_to_vec(passage, tokenizer, bert, max_seq_length=512): tknz_output = tokenizer(passage, add_special_tokens=True, return_tensors='pt', verbose=False, truncation=True) tknz_output = cudaify(tknz_output) with torch.no_grad(): last_hidden_states = bert(**tknz_output)[0].squeeze(0) cls_embedding = last_hidden_states[0] return cls_embedding
def __call__(self, net, data, trust_model): """ Runs a trained neural network classifier on validation data, and iterates through the top prediction for each datum. TODO: write some unit tests for this function """ net.eval() net = cudaify(net) with torch.no_grad(): for inst_ids, targets, evidence, response, zones in tqdm( data, total=len(data)): output, conf = net(cudaify(evidence), zones) ps = F.softmax(output.clamp(min=-25, max=25), dim=-1) preds = ps.argmax(dim=-1) if trust_model is not None: trust_score = trust_model.get_score( evidence.cpu().numpy(), preds.cpu().numpy()) trust_score = trust_score.astype(np.float64) trust_score = torch.from_numpy(trust_score) else: trust_score = [None] * len(targets) for element in zip(preds, response, conf, trust_score): (pred, gold, c, t) = element if t is not None: pkg = { 'pred': pred.item(), 'gold': gold.item(), 'confidence': t.item(), 'abstained': False } else: pkg = { 'pred': pred, 'gold': gold.item(), 'confidence': c.item(), 'abstained': False } yield pkg
def main(data_dir): batch_size = 16 print("Initializing data loader.") dev_loader = init_loader(data_dir, "dev", batch_size) input_size = 768 # TODO: what is it in general? output_size = dev_loader.num_senses() print("Loading saved neural network.") net = AffineClassifier(input_size, output_size) net.load_state_dict(torch.load(join(file_dir, "../saved/bert_simple.pt"))) net = cudaify(net) print("Computing PR curve.") py_curve = precision_yield_curve(net, dev_loader) print(py_curve) return py_curve
def __call__(self, model): print("Training with config:") print(self.config) model = cudaify(model) epoch_results = [] for e in range(1, self.n_epochs + 1): self.criterion.notify(e) batch_loss = self._epoch_step(model) if self.scheduler is not None: self.scheduler.step() eval_result = self.validate_and_analyze(model) epoch_results.append(EpochResult(e, batch_loss, eval_result)) print("epoch {}:".format(e)) print(" training loss: ".format(e) + str(batch_loss)) print(str(eval_result)) result = ExperimentResult(self.config, epoch_results) result.show_training_dashboard() return model, ExperimentResult(self.config, epoch_results)
def __call__(self, net, data, trust_model=None): # note that trust_model here is a dummy argument # a correct usage of the function should not pass values other than None to trust_model net.eval() for images, labels in tqdm(data, total=len(data)): output, conf = net(cudaify(images)) ps = F.softmax(output.clamp(min=-25, max=25), dim=1) abs_i = output.shape[1] - 1 max_weight_class = ps.argmax(dim=-1) is_abs = (max_weight_class == abs_i) preds = ps[:, :-1].argmax(dim=-1) for e in zip(preds, labels, conf, is_abs): pred, gold, c, abstained = e result = { 'pred': pred.item(), 'gold': gold.item(), 'confidence': c.item(), 'abstained': abstained.item() } yield result
def __call__(self, model): model = cudaify(model) abs_rate_graph = [] best_model = None best_summary = 0 best_validation = None coverages = [] for e in range(self.n_epochs): #log(self.optimizer.param_groups[0]['lr']) self.criterion.notify(e) batch_loss = self._epoch_step(model) analytics, validation = self.validate_and_analyze(model) coverages.append(analytics['coverage']) if best_validation == None: best_validation = validation """ summary = (analytics['auroc'] / 50 + 0.5 * (analytics['aupr/succ'] / analytics['precision'] + analytics['aupr/err'] / (1 - analytics['precision'])) + analytics['capacity'] / analytics['precision']) """ summary = e log(analytics) log("epoch {} training loss: ".format(e) + str(batch_loss)) if summary > best_summary: best_model = copy.deepcopy(model) best_analytics = analytics best_summary = summary best_validation = validation if self.scheduler is not None: self.scheduler.step() abs_rate_graph.append([e, 1 - analytics['coverage']]) log("\n") #print(abs_rate_graph) log("Best model performance\n" + str(best_analytics)) curr_time = datetime.now().strftime("%Y_%b,%d_%H:%M:%S") graph_path = curr_time + ".conf_distr.png" # plot_and_save_confidence_distr(results, graph_path) ''' with open("mnist_dac_coverages.json", "w") as f: json.dump(coverages, f) ''' return best_model, best_analytics, best_validation
def _epoch_step(self, model): model = cudaify(model) running_loss = 0.0 denom = 0 for (pkg1, pkg2) in tqdm(self.train_loader, total=len(self.train_loader)): (_, _, evidence1, response1, zones1) = pkg1 (_, _, evidence2, response2, zones2) = pkg2 zones1 = cudaify(torch.tensor(zones1)) zones2 = cudaify(torch.tensor(zones2)) self.optimizer.zero_grad() outputs1, conf1 = model(cudaify(evidence1), zones1) outputs2, conf2 = model(cudaify(evidence2), zones2) loss_size = self.criterion(outputs1, outputs2, cudaify(response1), cudaify(response2), conf1, conf2) loss_size.backward() self.optimizer.step() running_loss += loss_size.data.item() denom += 1 return running_loss / denom
def __init__(self, input_size=784, hidden_sizes=(128, 64), output_size=10): super().__init__(input_size, hidden_sizes, output_size) self.confidence_layer = cudaify(nn.Linear(hidden_sizes[1], 1))
def __init__(self, input_size, output_size, zone_applicant='max_non_abs'): super().__init__(input_size, output_size, zone_applicant) self.linear = cudaify(nn.Linear(input_size, output_size + 1))
def __init__(self): self.bert = cudaify(BertModel.from_pretrained('bert-base-uncased')) self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') self.train_path = path.join(imdb_path, 'train') self.test_path = path.join(imdb_path, 'test')
def __init__(self): self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') self.model = cudaify(BertModel.from_pretrained('bert-base-uncased'))