def update_metrics(raw_metrics, input_mu, input_sigma, sample_mu, labels, predict_start, samples=None, relative=False): raw_metrics['ND'] = raw_metrics['ND'] + net.accuracy_ND( sample_mu, labels[:, predict_start:], relative=relative) raw_metrics['RMSE'] = raw_metrics['RMSE'] + net.accuracy_RMSE( sample_mu, labels[:, predict_start:], relative=relative) input_time_steps = input_mu.numel() raw_metrics['test_loss'] = raw_metrics['test_loss'] + [ net.loss_fn(input_mu, input_sigma, labels[:, :predict_start]) * input_time_steps, input_time_steps ] if samples is not None: raw_metrics['rou90'] = raw_metrics['rou90'] + net.accuracy_ROU( 0.9, samples, labels[:, predict_start:], relative=relative) raw_metrics['rou50'] = raw_metrics['rou50'] + net.accuracy_ROU( 0.5, samples, labels[:, predict_start:], relative=relative) raw_metrics['rou10'] = raw_metrics['rou10'] + net.accuracy_ROU( 0.1, samples, labels[:, predict_start:], relative=relative) return raw_metrics
def test_every_layer_updated_after_training(Net, fetch_train_batch): """ check model parameters are updated after one batch training """ # configure Adam optimizer; use a large lr optimizer = optim.Adam(Net.parameters(), lr=10) # make a copy of network parameters before training before = [t.clone() for t in list(Net.parameters())] # do training on one batch images, labels = fetch_train_batch outputs = Net(images) optimizer.zero_grad() loss = net.loss_fn(outputs, labels) loss.backward() optimizer.step() # make a copy of network parameter after training after = [t.clone() for t in list(Net.parameters())] # assert that any elements in the weights are updated by training for i in range(len(before)): assert (before[i] != after[i]).any(), "- layer {} not updated".format(i + 1)
def train(data, model, optimizer, storage, args, params, epoch): print("training epoch {}".format(epoch), end="") x = Variable(data["x"]) l = Variable(data["target"]) out = model(x) out_z = out["z"] if params.use_weights: out_w = out["w"] else: out_w = torch.ones_like(l) if (epoch == 1) or (epoch % params.c_update_interval == 0): print(" updating centres".format(epoch), end="") c = Variable(out["z"]) storage["c"] = c else: # TODO find a way to get this c into global scope between training iterations c = storage["c"] model.train() loss = torch.zeros((1, )) for i in torch.arange(x.shape[0]): include = np.delete(np.arange(x.shape[0]), i) # p = get_class_probs(out_z[i,:], c[include,:], l[include], out_w[include], params) p = get_class_probs(out_z[i, :], c[include, :], l[include], None, params) loss += loss_fn(p, l[i], params) print(", loss: {}".format(loss.item())) optimizer.zero_grad() loss.backward() optimizer.step()
def evaluate(model, loss_fn, dataloader, metrics, params): """Evaluate the model on `num_steps` batches. Args: model: (torch.nn.Module) the neural network loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches data metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch params: (Params) hyper parameters """ # set model to evaluation mode model.eval() # summary for current eval loop summ = [] # compute metrics over the dataset for i, (highlight_batch, non_highlight_batch, text_feature_batch, user_history_batch) in enumerate(dataloader): highlight_batch = highlight_batch.reshape(highlight_batch.shape[0], -1).float() non_highlight_batch = non_highlight_batch.reshape( non_highlight_batch.shape[0], -1).float() user_history_batch = user_history_batch.reshape( user_history_batch.shape[0], -1).float() positive_batch = torch.cat((highlight_batch, user_history_batch), dim=1) negative_batch = torch.cat((non_highlight_batch, user_history_batch), dim=1) # move to GPU if available if params.cuda: positive_batch, negative_batch = positive_batch.cuda( async=True), negative_batch.cuda(async=True) device = torch.device("cuda") positive_batch, negative_batch = Variable(positive_batch), Variable( negative_batch) positive_batch_output = model(positive_batch) negative_batch_output = model(negative_batch) if params.cuda: loss = loss_fn( positive_batch_output, negative_batch_output, torch.ones(positive_batch.shape[0], 1, device=device)) else: loss = loss_fn(positive_batch_output, negative_batch_output, torch.ones(positive_batch.shape[0], 1)) # compute all metrics on this batch summary_batch = { metric: metrics[metric](positive_batch_output, negative_batch_output) for metric in metrics } summary_batch['loss'] = loss.item() summ.append(summary_batch) """ I need to understand this block more. - Just computing the mean for each matrices in the metrics dictionary from net.py file """ # compute mean of all metrics in summary metrics_mean = { metric: np.mean([x[metric] for x in summ]) for metric in summ[0] } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Eval metrics : " + metrics_string) return metrics_mean
def compute_loss(fetch_train_batch, train_net_batch): """ compute the loss """ outputs = train_net_batch labels = fetch_train_batch[1] return net.loss_fn(outputs, labels)
def train(model, optimizer, loss_fn, dataloader, metrics, params): """Train the model on `num_steps` batches Args: model: (torch.nn.Module) the neural network optimizer: (torch.optim) optimizer for parameters of model loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch params: (Params) hyperparameters num_steps: (int) number of batches to train on, each of size params.batch_size """ # set model to training mode model.train() # summary for current training loop and a running average object for loss summary = [] loss_over_batch = [] loss_avg = utils.RunningAverage() for i, (highlight_batch, highlight_distance_batch, non_highlight_batch, non_highlight_distance_batch, text_feature_batch) in enumerate(dataloader): highlight_batch = highlight_batch.reshape(highlight_batch.shape[0], -1).float() highlight_distance_batch = highlight_batch.reshape( highlight_distance_batch.shape[0], -1).float() non_highlight_batch = non_highlight_batch.reshape( non_highlight_batch.shape[0], -1).float() non_highlight_distance_batch = non_highlight_batch.reshape( non_highlight_distance_batch.shape[0], -1).float() text_feature_batch = text_feature_batch.reshape( text_feature_batch.shape[0], -1).float() positive_batch = torch.cat( (highlight_batch, highlight_distance_batch, text_feature_batch), dim=1) negative_batch = torch.cat( (non_highlight_batch, non_highlight_distance_batch, text_feature_batch), dim=1) if params.cuda: positive_batch, negative_batch = positive_batch.cuda( async=True), negative_batch.cuda(async=True) device = torch.device("cuda") positive_batch, negative_batch = Variable(positive_batch), Variable( negative_batch) positive_batch_output = model(positive_batch) negative_batch_output = model(negative_batch) if params.cuda: loss = loss_fn( positive_batch_output, negative_batch_output, torch.ones(positive_batch.shape[0], 1, device=device)) else: loss = loss_fn(positive_batch_output, negative_batch_output, torch.ones(positive_batch.shape[0], 1)) # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss.backward() # performs updates using calculated gradients optimizer.step() # Evaluate summaries only once in a while if i % params.save_summary_steps == 0: # compute all metrics on this batch summary_batch = { metric: metrics[metric](positive_batch_output, negative_batch_output) for metric in metrics } summary_batch['loss'] = loss.item() # logging.info("- Batch loss: {}".format(summary_batch['loss'])) summary.append(summary_batch) loss_over_batch.append(loss.item()) # update the average loss loss_avg.update(loss.item()) metrics_mean = { metric: np.mean([x[metric] for x in summary]) for metric in summary[0] } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Train metrics: " + metrics_string) return np.array(loss_over_batch)