def compute_loss_and_accuracy( dataloader: torch.utils.data.DataLoader, model: torch.nn.Module, loss_criterion: torch.nn.modules.loss._Loss): """ Computes the average loss and the accuracy over the whole dataset in dataloader. Args: dataloder: Validation/Test dataloader model: torch.nn.Module loss_criterion: The loss criterion, e.g: torch.nn.CrossEntropyLoss() Returns: [average_loss, accuracy]: both scalar. #test """ average_loss = 0 accuracy = 0 with torch.no_grad(): for (X_batch, Y_batch) in dataloader: # Transfer images/labels to GPU VRAM, if possible X_batch = utils.to_cuda(X_batch) Y_batch = utils.to_cuda(Y_batch) # Forward pass the images through our model output_probs = model(X_batch) # Compute Loss and Accuracy return average_loss, accuracy
def comb_mats_no_which_dataset(self, rel_tensor, in_batch=True, cpu=False): stack_mats = to_cuda( torch.zeros(rel_tensor.shape[-1], 2, self.embedding_size)) i = 0 for rel_tent in rel_tensor: rel = rel_tent.item() if rel in self.equiv_flip_dict: rel = self.equiv_flip_dict[rel] p_rel = self.r_id2_vrr[rel][0] #no need to stack if p_rel == rel or (self.rank_start[rel] == self.rank_start[p_rel] and self.rank_end[rel] == self.rank_end[p_rel]): if self.rank_end[rel] == 0: result = to_cuda(torch.zeros(2, self.embedding_size)) result[0] = self.dict_of_random_mats[str(p_rel)] else: result = self.dict_of_random_mats[str(p_rel)] if cpu == True: stack_mats[i] = result.cpu() stack_mats[i] = result else: #stack everything from above and return result = torch.mm(self.dict_of_linenar_comb[str(rel)], self.dict_of_random_mats[str(p_rel)]) result_num_rank = result.shape[0] if cpu == True: stack_mats[i][:result_num_rank] = result.cpu() stack_mats[i][:result_num_rank] = result i += 1 return stack_mats
def compute_loss_and_accuracy(dataloader: torch.utils.data.DataLoader, model: torch.nn.Module, loss_criterion: torch.nn.modules.loss._Loss): """ Computes the average loss and the accuracy over the whole dataset in dataloader. Args: dataloder: Validation/Test dataloader model: torch.nn.Module loss_criterion: The loss criterion, e.g: torch.nn.CrossEntropyLoss() Returns: [average_loss, accuracy]: both scalar. """ average_loss = 0 accuracy = 0 i = 0 # TODO: Implement this function (Task 2a) with torch.no_grad(): for (X_batch, Y_batch) in dataloader: i += 1 # Transfer images/labels to GPU VRAM, if possible X_batch = utils.to_cuda(X_batch) Y_batch = utils.to_cuda(Y_batch) # Forward pass the images through our model output_probs = model(X_batch) # Compute Loss and Accuracy average_loss += loss_criterion(output_probs, Y_batch) _, predicted = torch.max( output_probs.data, 1) # use dim=1, since batch_size is dim = 0 accuracy += (predicted == Y_batch).sum() / (Y_batch.shape[0]) average_loss = average_loss / i accuracy = accuracy / i return average_loss.detach().cpu().float(), accuracy.detach().cpu().float()
def compute_loss_and_accuracy(dataloader: torch.utils.data.DataLoader, model: torch.nn.Module, loss_criterion: torch.nn.modules.loss._Loss): """ Computes the average loss and the accuracy over the whole dataset in dataloader. Args: dataloder: Validation/Test dataloader model: torch.nn.Module loss_criterion: The loss criterion, e.g: torch.nn.CrossEntropyLoss() Returns: [average_loss, accuracy]: both scalar. """ average_loss = [] accuracy = [] with torch.no_grad(): for (X_batch, Y_batch) in dataloader: # Transfer images/labels to GPU VRAM, if possible X_batch = utils.to_cuda(X_batch) Y_batch = utils.to_cuda(Y_batch) # Forward pass the images through our model output_probs = model(X_batch) # Compute Loss and Accuracy average_loss.append(loss_criterion(output_probs, Y_batch)) accuracy.append( (output_probs.argmax(dim=-1) == Y_batch).float().mean()) return sum(average_loss) / len(average_loss), sum(accuracy) / len( accuracy)
def compute_class_accuracy(testloader, model, name, use_cuda=True): class_correct, class_total = [0] * 10, [0] * 10 confusion_matrix = np.zeros((10, 10), dtype=np.int32) with torch.no_grad(): for data in testloader: images, labels = data images = to_cuda(images, use_cuda) labels = to_cuda(labels, use_cuda) outputs = F.softmax(model(images), dim=1) _, predicted = torch.max(outputs, dim=1) for i in range(predicted.shape[0]): class_total[labels[i].item()] += 1 if labels[i].item() == predicted[i].item(): class_correct[predicted[i].item()] += 1 confusion_matrix[predicted[i].item(), labels[i].item()] += 1 save_confusion_matrix(confusion_matrix, name) print(sum(class_correct)) return class_correct, confusion_matrix
def get_candidate_labels(self, candidate_starts, candidate_ends, labeled_starts, labeled_ends, labels): same_start = torch.eq(to_cuda(labeled_starts.view(-1, 1)), to_cuda(candidate_starts.view(1, -1))) # [num_labeled, num_candidates] same_end = torch.eq(to_cuda(labeled_ends.view(-1, 1)), to_cuda(candidate_ends.view(1, -1))) # [num_labeled, num_candidates] same_span = same_start & same_end # [num_labeled, num_candidates] candidate_labels = torch.matmul(to_cuda(labels.view(1, -1).float()), same_span.float()) # [1, num_candidates] candidate_labels = candidate_labels.squeeze(0) # [num_candidates] return candidate_labels
def compute_loss_and_accuracy(dataloader: torch.utils.data.DataLoader, model: torch.nn.Module, loss_criterion: torch.nn.modules.loss._Loss): """ Computes the average loss and the accuracy over the whole dataset in dataloader. Args: dataloder: Validation/Test dataloader model: torch.nn.Module loss_criterion: The loss criterion, e.g: torch.nn.CrossEntropyLoss() Returns: [average_loss, accuracy]: both scalar. """ average_loss = 0 accuracy = 0 total_loss = 0 total_correct = 0 total_images = 0 # TODO: Implement this function (Task 2a) with torch.no_grad(): for (X_batch, Y_batch) in dataloader: # Transfer images/labels to GPU VRAM, if possible X_batch = utils.to_cuda(X_batch) Y_batch = utils.to_cuda(Y_batch) # Forward pass the images through our model output_probs = model(X_batch) # Compute Loss and Accuracy total_loss += loss_criterion(output_probs, Y_batch) total_correct += num_correct_preds(output_probs, Y_batch) total_images += X_batch.shape[0] average_loss = total_loss / len(dataloader) accuracy = total_correct / total_images return average_loss, accuracy
def train_step(self, X_batch, Y_batch): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # X_batch is the CIFAR10 images. Shape: [batch_size, 3, 32, 32] # Y_batch is the CIFAR10 image label. Shape: [batch_size] # Transfer images / labels to GPU VRAM, if possible X_batch = utils.to_cuda(X_batch) Y_batch = utils.to_cuda(Y_batch) # Perform the forward pass predictions = self.model(X_batch) # Compute the cross entropy loss for the batch loss = self.loss_criterion(predictions, Y_batch) # Backpropagation loss.backward() # Gradient descent step self.optimizer.step() # Reset all computed gradients to 0 self.optimizer.zero_grad() return loss.detach().cpu().item()
def compute_loss_and_accuracy(dataloader: torch.utils.data.DataLoader, model: torch.nn.Module, loss_criterion: torch.nn.modules.loss._Loss): """ Computes the average loss and the accuracy over the whole dataset in dataloader. Args: dataloder: Validation/Test dataloader model: torch.nn.Module loss_criterion: The loss criterion, e.g: torch.nn.CrossEntropyLoss() Returns: [average_loss, accuracy]: both scalar. """ average_loss = 0 correct_predictions = 0 num_imgs = 0 with torch.no_grad(): for i, (X_batch, Y_batch) in enumerate(dataloader): # Transfer images/labels to GPU VRAM, if possible X_batch = utils.to_cuda(X_batch) Y_batch = utils.to_cuda(Y_batch) # Forward pass the images through our model output_probs = model(X_batch) # Compute Loss and Accuracy _, prediction = torch.max(output_probs, 1, keepdim=False) correct_predictions += (prediction == Y_batch).sum().item() num_imgs += Y_batch.size(0) average_loss += loss_criterion(output_probs, Y_batch).item() accuracy = correct_predictions / num_imgs return average_loss / i, accuracy
def set_forward_adaptation(self, x, is_feature = True): #further adaptation, default is fixing feature and train a new softmax clasifier assert is_feature == True, 'Feature is fixed in further adaptation' z_support, z_query = self.parse_feature(x,is_feature) z_support = z_support.contiguous().view(self.n_way* self.n_support, -1 ) z_query = z_query.contiguous().view(self.n_way* self.n_query, -1 ) y_support = torch.from_numpy(np.repeat(range( self.n_way ), self.n_support )) y_support = Variable(to_cuda(y_support)) linear_clf = nn.Linear(self.feat_dim, self.n_way) linear_clf = to_cuda(linear_clf) set_optimizer = torch.optim.SGD(linear_clf.parameters(), lr = 0.01, momentum=0.9, dampening=0.9, weight_decay=0.001) loss_function = nn.CrossEntropyLoss() loss_function = to_cuda(loss_function) batch_size = 4 support_size = self.n_way* self.n_support for epoch in range(100): rand_id = np.random.permutation(support_size) for i in range(0, support_size , batch_size): set_optimizer.zero_grad() selected_id = to_cuda(torch.from_numpy( rand_id[i: min(i+batch_size, support_size) ])) z_batch = z_support[selected_id] y_batch = y_support[selected_id] scores = linear_clf(z_batch) loss = loss_function(scores,y_batch) loss.backward() set_optimizer.step() scores = linear_clf(z_query) return scores
def run(): parser = create_arg_parser() args = parser.parse_args() save_dir = create_model_id(args) generator = utils.to_cuda(Generator()) discriminator = utils.to_cuda(Discriminator()) # optimizer optim_gen = optim.Adam(generator.parameters(), lr=args.lr, betas=(0.5, 0.999)) optim_dis = optim.Adam(discriminator.parameters(), lr=args.lr, betas=(0.5, 0.999)) # loss criterion = nn.BCELoss() # dataset loader transform = transforms.Compose([transforms.ToTensor()]) dataset = datasets.MNIST('datasets/mnist', train=True, download=True, transform=transform) data_loader = DataLoader(dataset, batch_size=args.batch, shuffle=True) train_loop(args, discriminator, generator, criterion, optim_dis, optim_gen, data_loader, save_dir) plot_loss(save_dir)
def train(self): """ Trains the model for [self.epochs] epochs. """ # Track initial loss/accuracy self.validation_epoch() for epoch in range(self.epochs): # Perform a full pass through all the training samples for batch_it, (X_batch, Y_batch) in enumerate(self.dataloader_train): # X_batch is the CIFAR10 images. Shape: [batch_size, 3, 32, 32] # Y_batch is the CIFAR10 image label. Shape: [batch_size] # Transfer images / labels to GPU VRAM, if possible X_batch = to_cuda(X_batch) Y_batch = to_cuda(Y_batch) # Perform the forward pass predictions = self.model(X_batch) # Compute the cross entropy loss for the batch loss = self.loss_criterion(predictions, Y_batch) # Backpropagation loss.backward() # Gradient descent step self.optimizer.step() # Reset all computed gradients to 0 self.optimizer.zero_grad() # Compute loss/accuracy for all three datasets. if batch_it % self.validation_check == 0: self.validation_epoch() # Check early stopping criteria. if self.should_early_stop(): print("Early stopping.") return
def train(): if args.resume: model.load_state_dict(torch.load(args.checkpoint)) for epoch in range(args.start_epoch, 1000): # loop over the dataset multiple times print("=== Epoch", epoch, "===") scheduler.step() running_loss, epoch_avg = 0.0, 0.0 for i, data in enumerate(trainloader, start=1): # get the inputs images, volumes, landmarks = data images = to_cuda(images, True) volumes = to_cuda(volumes, True) # landmarks = to_cuda(landmarks, True) # l_shape = landmarks.shape # m = landmarks.view(l_shape[0], l_shape[1], l_shape[2] * l_shape[3]).argmax(2) # idx_gt = to_cuda(torch.stack((m // 128, m % 128), dim=2), True) / 128.0 # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize # out_volumes, landmarks_predictions = net(images, landmarks) out_volumes = F.sigmoid(model(images)) loss = F.binary_cross_entropy(out_volumes, volumes) # loss2 = F.mse_loss(landmarks_predictions, idx_gt) # loss = loss1 + 0.5 * loss2 loss.backward() torch.nn.utils.clip_grad_value_(model.parameters(), 5) optimizer.step() # print statistics running_loss += loss.item() epoch_avg += loss.item() if i % 1 == 0: print("[%2d, %5d/%5d] loss: %.8f lr %.8f" % ( epoch, i, len(trainloader), running_loss / 1, scheduler.get_lr()[0], )) running_loss = 0.0 print("EPOCH AVG", epoch_avg / len(trainloader)) if epoch % 5 == 0: torch.save(model.state_dict(), "../checkpoints/2hourglass_%d_schd_with_aug" % epoch)
def train_student_normal(model, args, trainloader, testloader, seed): if torch.cuda.is_available() and args.use_cuda: device = torch.device("cuda:0") else: device = torch.device("cpu") model.to(device) model.set_train_mode() #get loss function criterion = nn.CrossEntropyLoss(reduction='mean') optimizer = get_optimizer(model, args) loss_values = [] total_accuracy = [] epoch_eval = [] #train the student network for epoch in range(args.nr_epochs): loss_epoch = 0.0 for i, data in enumerate(trainloader, 0): samples, labels = data samples = to_cuda(samples, args.use_cuda) labels = to_cuda(labels, args.use_cuda) #zero the gradients of network params optimizer.zero_grad() #define loss output_logits = model(samples) loss = criterion(output_logits, labels) loss.backward() optimizer.step() loss_epoch += loss.item() loss_epoch /= float(i) loss_values.append(loss_epoch) print("Loss at epoch {} is {}".format(epoch, loss_epoch)) if epoch % args.eval_interval == 0: model.eval() acc = compute_overall_accuracy(testloader, model, args.use_cuda) total_accuracy.append(acc) epoch_eval.append(epoch) model.train() print("Accuracy at epoch {} is {}".format(epoch, acc)) if epoch % args.save_interval == 0: print("Saving model at {} epoch".format(epoch)) with open( args.dataset + "_student_network_simple" + args.student_model + str(seed) + "_" + str(args.id), "wb") as f: torch.save(model.state_dict(), f) return epoch_eval, loss_values, total_accuracy
def triple_classification(self, pos_h, pos_t, pos_r): pos_h_e = self.ent_embeddings(pos_h) pos_t_e = self.ent_embeddings(pos_t) #pos_r_e = self.rel_embeddings(pos_r) #\vv{r} pos_r_e = self.vvrel_embedding_func(pos_r) pos_sub = pos_h_e + pos_r_e - pos_t_e num_ent = pos_h_e.shape[0] #unique_time = time.time() unique_rels_2_unique_proj_idx = self.unique_rels(pos_r) #print("unique time:", unique_time-time.time()) unique_rel_tensor = to_cuda( longTensor([int(key) for key in unique_rels_2_unique_proj_idx])) #comb = time.time() #This has shape [#unique_rel x 2 x emb_dim] unique_bases = self.comb_mats( unique_rel_tensor) #this is AT, A is unique_bases.t() #this will have shape [#unique_rel x 2] ####THIS HERE IS WRONG #print("comb time :", time.time()-comb) #inv_time = time.time() ATA_inverse = self.inverse( torch.bmm(unique_bases, unique_bases.transpose(1, 2))) #print("ATA inverse time :", time.time()-inv_time) #proj_time = time.time() unique_projmat = to_cuda( torch.cat( [torch.eye(self.embedding_size)] * len(unique_rels_2_unique_proj_idx)).view( len(unique_rels_2_unique_proj_idx), self.embedding_size, self.embedding_size)) - torch.bmm( torch.bmm(unique_bases.transpose(1, 2), ATA_inverse), unique_bases) #print("Multiplying for projection :", time.time()-proj_time) #Projection Matrices #Make in the shape of num_ent x emb_dim x emb_dim (every proj matrix at every num_ent) #assign_time = time.time() unique_proj_idx = [ unique_rels_2_unique_proj_idx[pos_r[i].item()] for i in range(num_ent) ] P_pos_r = unique_projmat[unique_proj_idx] #need to fix here bmmed = torch.bmm( P_pos_r, torch.cat(pos_sub).view(num_ent, self.embedding_size, 1)).squeeze(2) #print("Time for bmm-ing:", time.time()-bmm_time) #print("time for bmm:", time.time() - bmm_tim) pos_sub = bmmed[:num_ent] if self.L1_flag: pos = torch.sum(torch.abs(pos_sub), 1) else: pos = torch.sum((pos_sub)**2, 1) return pos, neg
def select_vectors_from_pairs(x_src, y_tgt, pairs, gpuid): n = len(pairs) d = x_src.shape[1] x = to_cuda(torch.zeros([n, d]), gpuid) y = to_cuda(torch.zeros([n, d]), gpuid) for k, ij in enumerate(pairs): i, j = ij x[k, :] = x_src[i, :] y[k, :] = y_tgt[j, :] return x, y
def batch_projection_colvec_transINT(column_vectors, list_of_basis_of_H_in_rows): list_proj = projection_matrix_transINT(list_of_basis_of_H_in_rows) return_tensor = floatTensor(column_vectors.shape[1], column_vectors.shape[0]) for i in range(column_vectors.shape[1]): single_col_vec = column_vectors[:, i].view(-1, 1) return_tensor[i] = torch.mm(to_cuda(list_proj[i]), to_cuda(single_col_vec)).t() return return_tensor #return tensor of dim [num_ent x embed_dim] (embed vectors in row, not column)
def run_iteration(self, data_generator, do_backprop=True, run_online_evaluation=False): data_dict = next(data_generator) data = data_dict['data'] target = data_dict['target'] data = maybe_to_torch(data) target = maybe_to_torch(target) if torch.cuda.is_available(): data = to_cuda(data) target = to_cuda(target) self.optimizer.zero_grad() if self.fp16: with autocast(): ret = self.network(data, target, return_hard_tp_fp_fn=run_online_evaluation) if run_online_evaluation: ces, tps, fps, fns, tp_hard, fp_hard, fn_hard = ret self.run_online_evaluation(tp_hard, fp_hard, fn_hard) else: ces, tps, fps, fns = ret del data, target l = self.compute_loss(ces, tps, fps, fns) if do_backprop: self.amp_grad_scaler.scale(l).backward() self.amp_grad_scaler.unscale_(self.optimizer) clip_grad_norm_(self.network.parameters(), 12) self.amp_grad_scaler.step(self.optimizer) self.amp_grad_scaler.update() else: ret = self.network(data, target, return_hard_tp_fp_fn=run_online_evaluation) if run_online_evaluation: ces, tps, fps, fns, tp_hard, fp_hard, fn_hard = ret self.run_online_evaluation(tp_hard, fp_hard, fn_hard) else: ces, tps, fps, fns = ret del data, target l = self.compute_loss(ces, tps, fps, fns) if do_backprop: l.backward() clip_grad_norm_(self.network.parameters(), 12) self.optimizer.step() return l.detach().cpu().numpy()
def get_masked_mention_word_scores(self, encoded_doc, span_starts, span_ends): num_words = encoded_doc.shape[0] num_c = span_starts.shape[0] doc_range = torch.arange(num_words).view(1, -1).repeat(num_c, 1) mention_mask = (doc_range >= (span_starts.view(-1, 1))) & (doc_range <= span_ends.view(-1, 1)) word_attn = self.masked_mention_score(encoded_doc) mention_word_attn = F.softmax(torch.log(to_cuda(mention_mask.float())) + to_cuda(word_attn.view(1, -1)), dim=-1) return mention_word_attn
def set_forward_loss(self, x): y = torch.from_numpy(np.repeat(range(self.n_way), self.n_query)) scores = self.set_forward(x) if self.loss_type == 'mse': y_oh = utils.one_hot(y, self.n_way) y_oh = Variable(to_cuda(y_oh)) return self.loss_fn(scores, y_oh) else: y = Variable(to_cuda(y)) return self.loss_fn(scores, y)
def batch_projection_colvec_transINT_given_proj_mat_list( column_vectors, list_proj): list_proj = list_proj return_tensor = floatTensor(column_vectors.shape[1], column_vectors.shape[0]) #print("return_tensor:", return_tensor) for i in range(column_vectors.shape[1]): single_col_vec = column_vectors[:, i].view(-1, 1) #print("single_col_vec :", single_col_vec) #print("list_proj[i] :", list_proj[i]) return_tensor[i] = torch.mm(to_cuda(list_proj[i]), to_cuda(single_col_vec)).t() return return_tensor #return tensor of dim [num_ent x embed_dim] (embed vectors in row, not column)
def beamstep(self, decoder, ifcuda = False, ifAttn = False, *args, **kwargs): # one step of beam search tmp1 = to_cuda(torch.Tensor(1), ifcuda) tmp2 = to_cuda(torch.LongTensor(1), ifcuda) tmp3 = torch.LongTensor(1) m = [] att = [] hid = [] for k in range(self.K): if self.beamseq[k][-1].self_id is not self.eos_id: m.append(k) word_id = Variable(torch.LongTensor([self.beamseq[k][-1].self_id])) word_id = to_cuda(word_id, ifcuda) hidden = self.beamseq[k][-1].hidden_next if ifAttn: output, hidden, attn_dist = decoder.predict(word_id, hidden = hidden, *args, **kwargs) att.append(attn_dist) else: output, hidden = decoder.predict(word_id, hidden = hidden, *args, **kwargs) att.append([]) hid.append(hidden) scores, inds = output.data.topk(self.K) tmp1 = torch.cat([tmp1, scores + self.beamseq[k][-1].score]) tmp2 = torch.cat([tmp2, inds]) tmp3 = torch.cat([tmp3, torch.LongTensor([k] * self.K)]) if len(m) == 0: print('All beams have met <EOS>!') return tmp1 = tmp1[1:] tmp2 = tmp2[1:] tmp3 = tmp3[1:] order = tmp1.topk(len(m))[1] for k, d in enumerate(m): score = tmp1[order[k]] self_id = tmp2[order[k]] pre_loc = tmp3[order[k]] if ifAttn: self.beamseq[d].append(BeamUnit(self_id, pre_loc, score, hid[tmp3[order[k]]], att[tmp3[order[k]]])) else: self.beamseq[d].append(BeamUnit(self_id, pre_loc, score, hid[tmp3[order[k]]])) self.Kscores[d] = score self.step += 1 return
def compute_loss_and_accuracy( dataloader: torch.utils.data.DataLoader, model: torch.nn.Module, loss_criterion: torch.nn.modules.loss._Loss): """ Computes the average loss and the accuracy over the whole dataset in dataloader. Args: dataloder: Validation/Test dataloader model: torch.nn.Module loss_criterion: The loss criterion, e.g: torch.nn.CrossEntropyLoss() Returns: [average_loss, accuracy]: both scalar. """ average_loss = 0 accuracy = 0 N = 0 #divide by this in the end, summing variable. iterations = 0 # TODO: Implement this function (Task 2a) with torch.no_grad(): for (X_batch, Y_batch) in dataloader: # Transfer images/labels to GPU VRAM, if possible X_batch = utils.to_cuda(X_batch) Y_batch = utils.to_cuda(Y_batch) # Forward pass the images through our model pred = model.forward(X_batch) _,predicted = torch.max(pred,axis=1) accuracy += (predicted == Y_batch).sum().item() N += X_batch.size(0) iterations += 1 loss = loss_criterion(pred,Y_batch) average_loss += torch.sum(loss) try: accuracy /=N average_loss /=iterations except ZeroDivisionError: print(r"Dividing by 0") return average_loss, accuracy
def train(self): """ Trains the model for [self.epochs] epochs. """ # Track initial loss/accuracy def should_validate_model(): return self.global_step % self.num_steps_per_val == 0 #model 2 scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( self.optimizer, 20) for epoch in range(self.epochs): self.epoch = epoch # Perform a full pass through all the training samples for X_batch, Y_batch in self.dataloader_train: # X_batch is the CIFAR10 images. Shape: [batch_size, 3, 32, 32] # Y_batch is the CIFAR10 image label. Shape: [batch_size] # Transfer images / labels to GPU VRAM, if possible X_batch = utils.to_cuda(X_batch) Y_batch = utils.to_cuda(Y_batch) # Perform the forward pass predictions = self.model(X_batch) # Compute the cross entropy loss for the batch loss = self.loss_criterion(predictions, Y_batch) self.TRAIN_LOSS[self.global_step] = loss.detach().cpu().item() # Backpropagation loss.backward() # Gradient descent step self.optimizer.step() # Reset all computed gradients to 0 self.optimizer.zero_grad() self.global_step += 1 # Compute loss/accuracy for all three datasets. if should_validate_model(): self.validation_epoch() self.save_model() if self.should_early_stop(): print("Early stopping.") return #model 2 scheduler.step()
def smoothTransformer2D(inp): if len(inp) == 3: [im, defgrad, affine] = inp # defgrad in range [-1,1] else: [im, defgrad] = inp # defgrad in range [-1,1] defgrad = logisticGrowth(defgrad, 2.0) base_grid = U.to_cuda( integralImage( torch.ones((defgrad.shape[0], defgrad.shape[1], defgrad.shape[2], defgrad.shape[3])))) sampling_grid = integralImage(defgrad) samples = im.shape[0] channels = im.shape[1] height = im.shape[2] width = im.shape[3] try: identity = U.to_cuda( torch.cat(samples * [torch.Tensor([[1, 0, 0, 0, 1, 0, 0, 0, 1]])])) affine = affine + identity affine = torch.reshape(affine, (samples, 3, 3)) sampling_grid = torch.cat( (sampling_grid, U.to_cuda(torch.ones( (samples, 1, height, width)))), 1) sampling_grid = sampling_grid.permute(0, 2, 3, 1) sampling_grid = torch.matmul(sampling_grid.view(samples, -1, 3), torch.transpose(affine, 1, 2)) #******** sampling_grid = sampling_grid.view(samples, height, width, 3) #******** sampling_grid = sampling_grid.permute(0, 3, 1, 2) sampling_grid = sampling_grid[:, 0:2, :, :] except: pass sampling_grid_norm = normalize(sampling_grid, height, width) sampling_grid_inverse = 2 * base_grid - sampling_grid_norm mov_def = resample2D(im, sampling_grid_norm, height, width, samples, channels) ref_def = resample2D(mov_def, sampling_grid_inverse, height, width, samples, channels) return mov_def, ref_def, sampling_grid_norm, sampling_grid_inverse
def triple_classification(mod, pos_h, pos_t, orig_r, proj_r): pos_h_e = mod.ent_embeddings(pos_h) pos_t_e = mod.ent_embeddings(pos_t) #pos_r_e = self.rel_embeddings(pos_r) #\vv{r} pos_sub = pos_h_e - pos_t_e num_ent = pos_h_e.shape[0] #unique_time = time.time() #print("unique time:", unique_time-time.time()) #comb = time.time() #This has shape [#unique_rel x 2 x emb_dim] unique_bases = torch.tensor( null_spaces[proj_r]).cuda().t() #this is AT, A is unique_bases.t() orig_bases = torch.tensor(null_spaces[orig_r]).cuda().t() #this will have shape [#unique_rel x 2] ####THIS HERE IS WRONG #print("comb time :", time.time()-comb) #inv_time = time.time() ATA_inverse = torch.inverse(torch.mm(unique_bases, unique_bases.t())).cuda() orig_ATA_inverse = torch.inverse(torch.mm(orig_bases, orig_bases.t())).cuda() #print("ATA inverse time :", time.time()-inv_time) #proj_time = time.time() unique_projmat = to_cuda(torch.eye( mod.embedding_size).type(floatTensor)) - torch.mm( torch.mm(unique_bases.t(), ATA_inverse), unique_bases).type(floatTensor) orig_unique_projmat = to_cuda( torch.eye(mod.embedding_size).type(floatTensor)) - torch.mm( torch.mm(orig_bases.t(), orig_ATA_inverse), orig_bases).type(floatTensor) orig_P_pos_r = torch.cat([orig_unique_projmat.unsqueeze(0)] * pos_h_e.shape[0], dim=0) P_pos_r = torch.cat([unique_projmat.unsqueeze(0)] * pos_h_e.shape[0], dim=0) #need to fix here pos_sub = torch.bmm(orig_P_pos_r, pos_sub.view(num_ent, mod.embedding_size, 1)).squeeze(2) bmmed = torch.bmm(P_pos_r, pos_sub.view(num_ent, mod.embedding_size, 1)).squeeze(2) #print("Time for bmm-ing:", time.time()-bmm_time) #print("time for bmm:", time.time() - bmm_tim) proj_sub = bmmed[:num_ent] dist_l1 = torch.sum(torch.abs(proj_sub - pos_sub), 1) dist_l2 = torch.sum((proj_sub - pos_sub)**2, 1) return dist_l1, dist_l2
def set_forward_loss(self, x): y_query = torch.from_numpy(np.repeat(range(self.n_way), self.n_query)) y_query = Variable(to_cuda(y_query)) scores = self.set_forward(x) return self.loss_fn(scores, y_query)
def enc_dec_step(self): """ Encoding / decoding step. """ transformer = self.model transformer.train() task = self.config.task # batch (x, len_x), (y, len_y), _ = next(self.dataloader) # target words to predict alen = torch.arange(len_y.max(), dtype=torch.long, device=len_y.device) pred_mask = alen[:, None] < len_y[None] - 1 # do not predict anything given the last target word t = y[1:].masked_select(pred_mask[:-1]) assert len(t) == (len_y - 1).sum().item() # cuda x, len_x, y, len_y, t = to_cuda(self.config, x, len_x, y, len_y, t) # forward / loss encoded = transformer(mode = 'encode', x = x, len_x = len_x) decoded = transformer(mode = 'decode', y = y, len_y = len_y, encoded = encoded.transpose(0,1), len_enc = len_x) _, loss = transformer(mode = 'predict', tensor = decoded, pred_mask = pred_mask, y = t, get_scores = False) self.stats[task].append(loss.item()) # optimize self.optimize(loss) # number of processed sequences / words self.n_equations += self.config.batch_size self.stats['processed_e'] += len_x.size(0) self.stats['processed_w'] += (len_x + len_y - 2).sum().item() # Deletes data on CUDA to free its memory del x, len_x, y, len_y, t, alen, pred_mask
def create_model(): """ Initializes the mode. Edit the code below if you would like to change the model. """ model = nn.Sequential( # First convolution nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), # Second convolution nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), # Third convolution nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), # Neural network nn.Flatten( ), # Flattens the image from shape (batch_size, C, Height, width) to (batch_size, C*height*width) # nn.Linear(4194304, 64), nn.Linear(32 * 32 * 2, 64), nn.ReLU(), nn.Linear(64, 10) # No need to include softmax, as this is already combined in the loss function ) # Transfer model to GPU memory if a GPU is available model = utils.to_cuda(model) return model
def visualize(self): image = plt.imread("horse.jpg") image = to_tensor(image) image = normalize(image.data, mean, std) image = image.view(1, *image.shape) image = nn.functional.interpolate(image, size=(256, 256)) image = to_cuda(image) # print(self.model.model) # print(self.model.model.children()) #Save weights visualization weights = self.model.model.conv1.weight.data #print(weights.shape) torchvision.utils.save_image(weights, "weights_first_layer.png") #Save First Layer Activations visualization first_layer_out = self.model.model.conv1(image) #print(first_layer_out.shape) to_visualize = first_layer_out.view(first_layer_out.shape[1], 1, *first_layer_out.shape[2:]) #print(to_visualize.shape) torchvision.utils.save_image(to_visualize, "filters_first_layer.png") #Pass image trought all layers but the last 2 for name, child in self.model.model.named_children(): if name not in ['avgpool', 'fc']: #print("Passing image through layer ", name) image = child(image) #Save Last Conv. Layer Activations visualization to_visualize = image.view(image.shape[1], 1, *image.shape[2:])[:64] torchvision.utils.save_image(to_visualize, "filters_last_layer.png") return