def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts): x, bl_val = baseline.unwrap_batch(batch) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities if opts.input_transform != None: cost, log_likelihood, transform_loss = model( x, return_transform_loss=True) else: cost, log_likelihood = model(x) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss if opts.input_transform != None: loss += opts.input_transform * transform_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts): # vec: (batch_size, graph_size, embed_dim) # bl_val: (batch_size) or None vec, bl_val = baseline.unwrap_batch(batch) vec = move_to(vec, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities cost, log_likelihood = model(vec) # normalize cost # cost = (cost - cost.mean()) / (cost.std() + 1e-5) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(vec, cost) if bl_val is None else (bl_val, 0) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts, extra): x, bl_val = baseline.unwrap_batch(batch) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities cost, log_likelihood, entropy = model(x) # print('Cost on train', cost) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0) if bl_loss == True and opts.baseline == 'constant': extra["updates"] += 1 # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() entropy_loss = entropy.mean() loss = reinforce_loss + bl_loss - opts.entropy * entropy_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, entropy_loss, tb_logger, opts, extra)
def train_batch(model, optimizer, baseline, epoch, batch_id, step, interactions_count, batch, tb_logger, opts): start_time = time.time() get_inner_model(model).decoder.count_interactions = True get_inner_model(baseline.baseline.model).decoder.count_interactions = True x, bl_val = baseline.unwrap_batch(batch) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities cost, log_likelihood = model(x, only_encoder=False) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() global sum_batch_time sum_batch_time += (time.time() - start_time) # Logging interactions_so_far = interactions_count +\ 2*get_inner_model(model).decoder.interactions*torch.cuda.device_count() if step % int(opts.log_step) == 0: #with torch.no_grad(): # opt_nll = get_inner_model(model).supervised_log_likelihood(x) opt_nll = torch.ones(1) log_values(cost, grad_norms, epoch, interactions_so_far, batch_id, step, log_likelihood, opt_nll, reinforce_loss, bl_loss, tb_logger, opts) #interactions_so_far = 2 * step * opts.graph_size * opts.batch_size \ # if opts.baseline is not None else step * opts.graph_size * opts.batch_size if opts.total_interactions > interactions_so_far: avg_batch_time = sum_batch_time / step if step > 0 else sum_batch_time print('batch time: ', avg_batch_time) seconds = (opts.total_interactions - interactions_so_far) * avg_batch_time #seconds = (opts.n_epochs*(opts.epoch_size // opts.batch_size) - step) * avg_batch_time GetTime(seconds) print('============================') get_inner_model(model).decoder.count_interactions = False get_inner_model(baseline.baseline.model).decoder.count_interactions = False if opts.use_cuda: torch.cuda.empty_cache()
def solve_all_nn(dataset_path, eval_batch_size=1024, no_cuda=False, dataset_n=None, progress_bar_mininterval=0.1): import torch from torch.utils.data import DataLoader from problems import TSP from utils import move_to dataloader = DataLoader(TSP.make_dataset( filename=dataset_path, num_samples=dataset_n if dataset_n is not None else 1000000), batch_size=eval_batch_size) device = torch.device( "cuda:0" if torch.cuda.is_available() and not no_cuda else "cpu") results = [] for batch in tqdm(dataloader, mininterval=progress_bar_mininterval, ascii=True): start = time.time() batch = move_to(batch, device) lengths, tours = nearest_neighbour(batch) lengths_check, _ = TSP.get_costs(batch, tours) assert (torch.abs(lengths - lengths_check.data) < 1e-5).all() duration = time.time() - start results.extend([(cost.item(), np.trim_zeros(pi.cpu().numpy(), 'b'), duration) for cost, pi in zip(lengths, tours)]) return results, eval_batch_size
def evaluate(model, test_loader, criterion, entropy_loss_func, opts): """ Evaluate a single epoch """ y_probs = np.zeros((0, len(test_loader.dataset.CLASSES)), np.float) y_trues = np.zeros((0), np.int) losses = [] # Put model in eval mode model.eval() for i, (x_low, x_high, label) in enumerate(tqdm(test_loader)): x_low, x_high, label = move_to([x_low, x_high, label], opts.device) y, attention_map, patches, x_low = model(x_low, x_high) entropy_loss = entropy_loss_func(attention_map) loss = criterion(y, label) - entropy_loss loss_value = loss.item() losses.append(loss_value) y_prob = F.softmax(y, dim=1) y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()]) y_trues = np.concatenate([y_trues, label.cpu().numpy()]) test_loss_epoch = np.round(np.mean(losses), 4) metrics = calc_cls_measures(y_probs, y_trues) return test_loss_epoch, metrics
def train_model(model, train_loader, optimizer, opts): n_classes = opts.n_classes metric = torch.nn.CrossEntropyLoss() y_probs = np.zeros((0, n_classes), np.float) y_trues = np.zeros((0), np.int) losses = [] model.train() for i, (image, label) in enumerate(tqdm(train_loader)): optimizer.zero_grad() image, label = utils.move_to([image, label], opts.device) prediction = model.forward(image.float()) loss = metric(prediction, label.long()) loss.backward() optimizer.step() loss_value = loss.item() losses.append(loss_value) y_prob = F.softmax(prediction, dim=1) y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()]) y_trues = np.concatenate([y_trues, label.cpu().numpy()]) metric_collects = utils.calc_multi_cls_measures(y_probs, y_trues) train_loss_epoch = np.round(np.mean(losses), 4) return train_loss_epoch, metric_collects
def train(model, optimizer, train_loader, criterion, entropy_loss_func, opts): """ Train for a single epoch """ y_probs = np.zeros((0, len(train_loader.dataset.CLASSES)), np.float) y_trues = np.zeros((0), np.int) losses = [] # Put model in training mode model.train() for i, (x_low, x_high, label) in enumerate(tqdm(train_loader)): x_low, x_high, label = move_to([x_low, x_high, label], opts.device) optimizer.zero_grad() y, attention_map, patches, x_low = model(x_low, x_high) entropy_loss = entropy_loss_func(attention_map) loss = criterion(y, label) - entropy_loss loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), opts.clipnorm) optimizer.step() loss_value = loss.item() losses.append(loss_value) y_prob = F.softmax(y, dim=1) y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()]) y_trues = np.concatenate([y_trues, label.cpu().numpy()]) train_loss_epoch = np.round(np.mean(losses), 4) metrics = calc_cls_measures(y_probs, y_trues) return train_loss_epoch, metrics
def train_batch( model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts ): x, bl_val = baseline.unwrap_batch(batch) ##x are states or nodes - (obs in vpg.py) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities cost, log_likelihood = model(x) ##likelihood/probability is POLICY distribution used to pick actions or paths and cost is a vector with tour times(this could be rewards). Check _compute_policy_entropy in vpg.py eps = { 'padded_observations': x, 'padded_rewards': cost, 'lengths': 1, 'observations': x, 'rewards': cost, 'actions': } # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)
def train_batch(model, optimizer, scaler, baseline, epoch, batch_id, step, batch, opts): x, bl_val = baseline.unwrap_batch(batch) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None if scaler is not None: optimizer.zero_grad() with torch.cuda.amp.autocast(): cost, log_likelihood = model(x) bl_val, bl_loss = baseline.eval( x, cost) if bl_val is None else (bl_val, 0) reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss scaler.scale(loss).backward() scaler.unscale_(optimizer) grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) scaler.step(optimizer) scaler.update() else: # Evaluate model, get costs and log probabilities cost, log_likelihood = model(x) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval( x, cost) if bl_val is None else (bl_val, 0) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0 and torch.distributed.get_rank() == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, opts)
def _eval_dataset(model, dataset, width, softmax_temp, opts, device): model.to(device) model.eval() model.set_decode_type( "greedy" if opts.decode_strategy in ('bs', 'greedy') else "sampling", temp=softmax_temp) dataloader = DataLoader(dataset, batch_size=opts.eval_batch_size) results = [] # print(width, opts.eval_batch_size , opts.max_calc_batch_size) for batch in tqdm(dataloader, disable=opts.no_progress_bar): batch = move_to(batch, device) start = time.time() with torch.no_grad(): if opts.decode_strategy in ('sample', 'greedy'): if opts.decode_strategy == 'greedy': assert width == 0, "Do not set width when using greedy" assert opts.eval_batch_size <= opts.max_calc_batch_size, \ "eval_batch_size should be smaller than calc batch size" batch_rep = 1 iter_rep = 1 elif width * opts.eval_batch_size > opts.max_calc_batch_size: assert opts.eval_batch_size == 1 assert width % opts.max_calc_batch_size == 0 batch_rep = opts.max_calc_batch_size iter_rep = width // opts.max_calc_batch_size else: batch_rep = width iter_rep = 1 assert batch_rep > 0 # This returns (batch_size, iter_rep shape) # print(width, opts.eval_batch_size , opts.max_calc_batch_size, batch_rep) top_k_sequences, top_k_costs = model.sample_many_top_k( batch, opts.k, batch_rep=batch_rep, iter_rep=iter_rep) batch_size = len(top_k_costs) ids = torch.arange(batch_size, dtype=torch.int64, device=top_k_costs.device) else: assert opts.decode_strategy == 'bs' cum_log_p, sequences, costs, ids, batch_size = model.beam_search( batch, beam_size=width, compress_mask=opts.compress_mask, max_calc_batch_size=opts.max_calc_batch_size) duration = time.time() - start for seq, cost in zip(top_k_sequences, top_k_costs): results.append((cost, seq, duration)) pickle.dump(results, open(opts.output_filename, "wb"))
def train_batch_sl(model, optimizer, epoch, batch_id, step, batch, tb_logger, opts): # Optionally move Tensors to GPU x = move_to(batch['nodes'], opts.device) graph = move_to(batch['graph'], opts.device) if opts.model == 'nar': targets = move_to(batch['tour_edges'], opts.device) # Compute class weights for NAR decoder _targets = batch['tour_edges'].numpy().flatten() class_weights = compute_class_weight("balanced", classes=np.unique(_targets), y=_targets) class_weights = move_to(torch.FloatTensor(class_weights), opts.device) else: class_weights = None targets = move_to(batch['tour_nodes'], opts.device) # Evaluate model, get costs and loss cost, loss = model(x, graph, supervised=True, targets=targets, class_weights=class_weights) # Normalize loss for gradient accumulation loss = loss / opts.accumulation_steps # Perform backward pass loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) # Perform optimization step after accumulating gradients if step % opts.accumulation_steps == 0: optimizer.step() optimizer.zero_grad() # Logging if step % int(opts.log_step) == 0: log_values_sl(cost, grad_norms, epoch, batch_id, step, loss, tb_logger, opts)
def train_batch_sl(model, optimizer, epoch, batch_id, step, batch, tb_logger, opts): nodes_coord = move_to(batch['nodes_coord'], opts.device) tour_nodes = move_to(batch['tour_nodes'], opts.device) cost, loss = model(nodes_coord, supervised_mode=True, targets=tour_nodes) loss = loss.mean() # Take mean of loss across multiple GPUs # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0: log_values_sl(cost, grad_norms, epoch, batch_id, step, loss, tb_logger, opts)
def eval_model_bat(bat): with torch.no_grad(): output = model(move_to(bat, opts.device), return_pi=False) cost = output[0] # print('Dataset on valid', bat) # print('Cost on valid', cost) if len(output) > 3: print() print(output[0]) print(output[-1]) return cost.data.cpu()
def val_epoch(self, dataloader=None): self.callback("before_val_epoch") self.model.eval() dataloader = dataloader if dataloader is not None else self.val_dataloader for batch in dataloader: self.state["val_batch"] = move_to(batch, self.device) self.callback("before_val_step") with torch.no_grad(): self.val_step(self) self.callback("after_val_step") self.callback("after_val_epoch") return self
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts): # Unwrap baseline bat, bl_val = baseline.unwrap_batch(batch) # Optionally move Tensors to GPU x = move_to(bat['nodes'], opts.device) graph = move_to(bat['graph'], opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities cost, log_likelihood = model(x, graph) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(x, graph, cost) if bl_val is None else (bl_val, 0) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss # Normalize loss for gradient accumulation loss = loss / opts.accumulation_steps # Perform backward pass loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) # Perform optimization step after accumulating gradients if step % opts.accumulation_steps == 0: optimizer.step() optimizer.zero_grad() # Logging if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)
def run_all_tsiligirides(dataset_path, sample, num_samples, eval_batch_size, max_calc_batch_size, no_cuda=False, dataset_n=None, progress_bar_mininterval=0.1, seed=1234): import torch from torch.utils.data import DataLoader from problems.op.problem_op import OP from problems.op.tsiligirides import op_tsiligirides from utils import move_to, sample_many torch.manual_seed(seed) dataloader = DataLoader(OP.make_dataset( filename=dataset_path, num_samples=dataset_n if dataset_n is not None else 1000000), batch_size=eval_batch_size) device = torch.device( "cuda:0" if torch.cuda.is_available() and not no_cuda else "cpu") results = [] for batch in tqdm(dataloader, mininterval=progress_bar_mininterval): start = time.time() batch = move_to(batch, device) with torch.no_grad(): if num_samples * eval_batch_size > max_calc_batch_size: assert eval_batch_size == 1 assert num_samples % max_calc_batch_size == 0 batch_rep = max_calc_batch_size iter_rep = num_samples // max_calc_batch_size else: batch_rep = num_samples iter_rep = 1 sequences, costs = sample_many( lambda inp: (None, op_tsiligirides(inp, sample)), OP.get_costs, batch, batch_rep=batch_rep, iter_rep=iter_rep) duration = time.time() - start results.extend([(cost.item(), np.trim_zeros(pi.cpu().numpy(), 'b'), duration) for cost, pi in zip(costs, sequences)]) return results, eval_batch_size
def train_epoch(self): self.model.train() self.callback("before_train_epoch") for batch in self.train_dataloader: self.state["epoch_no"] = self.state["train_it"] // self.state[ "epoch_len"] + 1 if self.state["train_it"] % self.state["epoch_len"] == 0: print(f"\nEpoch [{self.state['epoch_no']}]: ") self.state["train_batch"] = move_to(batch, self.device) self.state["train_it"] += 1 self.callback("before_train_step") self.train_step(self) self.callback("after_train_step") if self.state["train_it"] % self.state["epoch_len"] == 0: # End of training epoch. self.callback("after_train_epoch") self.val_epoch() self.callback("after_epoch") self.model.train() self.state["progress"].update(1) print("\n") if "early_stopping" in self.state and self.state[ "early_stopping"].should_stop(self): if self.state.get("min_train_iterations", 0) <= self.state["train_it"]: print("Stopping due to early stopping condition.") self.state["STOP_TRAINING"] = True break if self.state.get("max_train_iterations", float("inf")) <= self.state["train_it"]: print("Stopping due to max epochs condition.") self.state["STOP_TRAINING"] = True break return self
def rollout(problem, model, x_input, batch, solution, value, opts, T, do_sample = False, record = False): solutions = solution.clone() best_so_far = solution.clone() cost = value exchange = None best_val = cost.clone() improvement = [] reward = [] solution_history = [best_so_far] for t in tqdm(range(T), disable = opts.no_progress_bar, desc = 'rollout', bar_format='{l_bar}{bar:20}{r_bar}{bar:-20b}'): exchange, _ = model( x_input, solutions, exchange, do_sample = do_sample) # new solution solutions = problem.step(solutions, exchange) solutions = move_to(solutions, opts.device) obj = problem.get_costs(batch, solutions) #calc improve improvement.append(cost - obj) cost = obj #calc reward new_best = torch.cat((best_val[None,:], obj[None,:]),0).min(0)[0] r = best_val - new_best reward.append(r) #update best solution best_val = new_best best_so_far[(r > 0)] = solutions[(r > 0)] #record solutions if record: solution_history.append(best_so_far.clone()) return best_val.view(-1,1), torch.stack(improvement,1), torch.stack(reward,1), None if not record else torch.stack(solution_history,1)
def train_epoch(model, criterion, optimizer, train_loader, opts, scaler=None): # Put model in train mode model.train() start_time = None num_images_processed = 0 for batch_id, batch in enumerate(tqdm(train_loader)): # Skip the first N batches due to data loader initialization if batch_id == opts.skip_batches: start_time = time.time() inputs, label = move_to(batch, opts.device) label = label.long() optimizer.zero_grad() if opts.mixed_precision: with autocast(): prediction = model(inputs) loss = criterion(prediction, label) scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: prediction = model(inputs) loss = criterion(prediction, label) loss.backward() optimizer.step() if start_time is not None: num_images_processed += inputs.shape[0] end_time = time.time() return num_images_processed / (end_time - start_time)
def evaluateMultiResBatches(model, test_loader, criterion, entropy_loss_func, opts): """ Train for a single epoch """ y_probs = np.zeros((0, len(test_loader.dataset.CLASSES)), np.float) y_trues = np.zeros((0), np.int) losses = [[] for s in opts.scales] metrics = [] # Put model in eval mode model.eval() all_patches = [] all_maps = [] all_x_low = [] all_sampled_ats = [] for i, (x_low, x_high, label) in enumerate(tqdm(test_loader)): # high res batch x_low, x_high, label = move_to([x_low, x_high, label], opts.device) y, attention_map, patches, x_low_out, sampled_attention = model( x_low, x_high) if opts.visualize: all_patches.append(patches) all_maps.append(attention_map) all_x_low.append(x_low_out) all_sampled_ats.append(sampled_attention) entropy_loss = entropy_loss_func(attention_map) loss = criterion(y, label) - entropy_loss loss_value = loss.item() losses[0].append(loss_value) y_prob = F.softmax(y, dim=1) y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()]) y_trues = np.concatenate([y_trues, label.cpu().numpy()]) metric = calc_cls_measures(y_probs, y_trues) metrics.append(metric) # scale-2 low res batch for i in range(1, len(opts.scales)): s = opts.scales[i] x_low_i = F.interpolate(x_low, scale_factor=s, mode='bilinear') x_high_i = F.interpolate(x_high, scale_factor=s, mode='bilinear') x_low_i, x_high_i = move_to([x_low_i, x_high_i], opts.device) y, attention_map, patches, x_low_i_out, sampled_attention = model( x_low_i, x_high_i) if opts.visualize: all_patches.append(patches) all_maps.append(attention_map) all_x_low.append(x_low_i_out) all_sampled_ats.append(sampled_attention) entropy_loss = entropy_loss_func(attention_map) loss = criterion(y, label) - entropy_loss loss_value = loss.item() losses[i].append(loss_value) y_prob = F.softmax(y, dim=1) y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()]) y_trues = np.concatenate([y_trues, label.cpu().numpy()]) metric = calc_cls_measures(y_probs, y_trues) metrics.append(metric) if opts.visualize: all_patches_tensor = torch.cat(all_patches, dim=1) # all_maps_tensor = torch.stack(all_maps, dim=1) for b in range(patches.shape[0]): batch_patches = all_patches_tensor[b] batch_maps = [ attention_map[b].cpu().numpy() for attention_map in all_maps ] for ats in batch_maps: print(ats) # print(torch.min()) batch_imgs = [x_low_i[b] for x_low_i in all_x_low] batch_sampled_ats = [ sampled_attetion[b].cpu().numpy() for sampled_attetion in all_sampled_ats ] print(batch_sampled_ats) patchGrid(batch_patches, batch_maps, batch_imgs, (3, 5)) # mapGrid(batch_maps, batch_imgs, opts.scales) test_loss_epoch = [np.round(np.mean(loss_s), 4) for loss_s in losses] # metrics = calc_cls_measures(y_probs, y_trues) return test_loss_epoch, metrics
def trainMultiResBatches(model, optimizer, train_loader, criterion, entropy_loss_func, opts): """ Train for a single epoch """ y_probs = np.zeros((0, len(train_loader.dataset.CLASSES)), np.float) y_trues = np.zeros((0), np.int) losses = [[] for s in opts.scales] metrics = [] # Put model in training mode model.train() for i, (x_low, x_high, label) in enumerate(tqdm(train_loader)): # high res batch x_low, x_high, label = move_to([x_low, x_high, label], opts.device) optimizer.zero_grad() y, attention_map, patches, x_low_out = model(x_low, x_high) entropy_loss = entropy_loss_func(attention_map) loss = criterion(y, label) - entropy_loss loss.backward() # for p in model.parameters(): # print(p.grad) torch.nn.utils.clip_grad_norm_(model.parameters(), opts.clipnorm) optimizer.step() loss_value = loss.item() losses[0].append(loss_value) y_prob = F.softmax(y, dim=1) y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()]) y_trues = np.concatenate([y_trues, label.cpu().numpy()]) metric = calc_cls_measures(y_probs, y_trues) metrics.append(metric) # scale-2 low res batch for i in range(1, len(opts.scales)): s = opts.scales[i] x_low_i = F.interpolate(x_low, scale_factor=s, mode='bilinear') x_high_i = F.interpolate(x_high, scale_factor=s, mode='bilinear') x_low_i, x_high_i = move_to([x_low_i, x_high_i], opts.device) optimizer.zero_grad() y, attention_map, patches, x_low_i_out = model(x_low_i, x_high_i) entropy_loss = entropy_loss_func(attention_map) loss = criterion(y, label) - entropy_loss loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), opts.clipnorm) optimizer.step() loss_value = loss.item() losses[i].append(loss_value) y_prob = F.softmax(y, dim=1) y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()]) y_trues = np.concatenate([y_trues, label.cpu().numpy()]) metric = calc_cls_measures(y_probs, y_trues) metrics.append(metric) train_loss_epoch = [np.round(np.mean(loss_s), 4) for loss_s in losses] # metrics = calc_cls_measures(y_probs, y_trues) return train_loss_epoch, metrics
def evaluateMultiRes(model, test_loader, criterion, entropy_loss_func, opts): """ Evaluate a single epoch """ y_probs = np.zeros((0, len(test_loader.dataset.CLASSES)), np.float) y_trues = np.zeros((0), np.int) losses = [] # Put model in eval mode model.eval() for i, (x_lows, x_highs, label) in enumerate(tqdm(test_loader)): x_lows, x_highs, label = move_to([x_lows, x_highs, label], opts.device) y, attention_maps, patches, x_lows = model(x_lows, x_highs) ## visualize # for i, (scale, x_low) in enumerate(zip(model.scales, x_lows)): # if type(attention_maps) is list: # ats_map = attention_maps[i] # showPatch() if type(attention_maps) is list: entropy_loss = torch.tensor([ entropy_loss_func(attention_map) for attention_map in attention_maps ]).sum() / len(opts.scales) loss = criterion(y, label) - entropy_loss else: entropy_loss = entropy_loss_func(attention_maps) loss = criterion(y, label) - entropy_loss loss_value = loss.item() losses.append(loss_value) y_prob = F.softmax(y, dim=1) y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()]) y_trues = np.concatenate([y_trues, label.cpu().numpy()]) if opts.visualize: for b in range(patches.shape[0]): batch_patches = patches[b] # patchGrid(batch_patches, (3, 5)) if type(attention_maps) is list: batch_maps = [ attention_map[b].cpu().numpy() for attention_map in attention_maps ] for attention_map in batch_maps: print(np.max(attention_map)) print(np.min(attention_map)) # batch_maps = [attention_maps[i][b] for i in range(len(model.scales))] else: # batch_maps = [attention_maps[b] for i in range(len(model.scales))] batch_maps = [attention_maps[b].cpu().numpy()] batch_imgs = [x_lows[i][b] for i in range(len(model.scales))] # mapGrid(batch_maps, batch_imgs, model.scales) patchGrid(batch_patches, batch_maps, batch_imgs, (3, 5)) test_loss_epoch = np.round(np.mean(losses), 4) metrics = calc_cls_measures(y_probs, y_trues) return test_loss_epoch, metrics
def eval_model_bat(bat): with torch.no_grad(): cost, _ = model(move_to(bat, opts.device), only_encoder=False) return cost.data.cpu()
def _eval_dataset(model, dataset, width, softmax_temp, opts, device): model.to(device) model.eval() model.set_decode_type( "greedy" if opts.decode_strategy in ('bs', 'greedy') else "sampling", temp=softmax_temp) dataloader = DataLoader(dataset, batch_size=opts.eval_batch_size) results = [] for batch in tqdm(dataloader, disable=opts.no_progress_bar): batch = move_to(batch, device) start = time.time() with torch.no_grad(): if opts.decode_strategy in ('sample', 'greedy'): if opts.decode_strategy == 'greedy': assert width == 0, "Do not set width when using greedy" assert opts.eval_batch_size <= opts.max_calc_batch_size, \ "eval_batch_size should be smaller than calc batch size" batch_rep = 1 iter_rep = 1 elif width * opts.eval_batch_size > opts.max_calc_batch_size: assert opts.eval_batch_size == 1 assert width % opts.max_calc_batch_size == 0 batch_rep = opts.max_calc_batch_size iter_rep = width // opts.max_calc_batch_size else: batch_rep = width iter_rep = 1 assert batch_rep > 0 # This returns (batch_size, iter_rep shape) sequences, costs = model.sample_many(batch, batch_rep=batch_rep, iter_rep=iter_rep) batch_size = len(costs) ids = torch.arange(batch_size, dtype=torch.int64, device=costs.device) else: assert opts.decode_strategy == 'bs' cum_log_p, sequences, costs, ids, batch_size = model.beam_search( batch, beam_size=width, compress_mask=opts.compress_mask, max_calc_batch_size=opts.max_calc_batch_size ) if sequences is None: sequences = [None] * batch_size costs = [math.inf] * batch_size else: sequences, costs = get_best( sequences.cpu().numpy(), costs.cpu().numpy(), ids.cpu().numpy() if ids is not None else None, batch_size ) duration = time.time() - start for seq, cost in zip(sequences, costs): if model.problem.NAME == "tsp": seq = seq.tolist() # No need to trim as all are same length elif model.problem.NAME in ("cvrp", "sdvrp"): seq = np.trim_zeros(seq).tolist() + [0] # Add depot elif model.problem.NAME in ("op", "pctsp"): seq = np.trim_zeros(seq) # We have the convention to exclude the depot else: assert False, "Unkown problem: {}".format(model.problem.NAME) # Note VRP only results.append((cost, seq, duration)) return results
def train_batch( model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts ): x, bl_val = baseline.unwrap_batch(batch) ##x are states or nodes - (obs in vpg.py) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities cost, log_likelihood = model(x) ##likelihood/probability is POLICY distribution used to pick actions or paths and cost is a vector with tour times(this could be rewards). Check _compute_policy_entropy in vpg.py #Check sizes print('---Checking data Sizes---') for key_x in x: print('Batch ID:', batch_id, '->', key_x, '->', x[key_x].shape) print('Batch ID:',batch_id,'-> Cost ->',cost.shape) #Synthetic construction of required Garage input obs_garage = x['loc'].clone().detach().cpu() rewards_garage = cost.clone().detach().cpu().numpy() #padded_rewards_garage = pad_tensor(rewards_garage, len(rewards_garage), mode='last') padded_rewards_garage = rewards_garage.reshape(rewards_garage.shape[0],1) lens = [(obs_garage.shape[1]-1) for i in range(obs_garage.shape[0])] eps_dict = { 'padded_observations': obs_garage, 'padded_rewards': padded_rewards_garage, 'lengths': lens, 'observations': obs_garage, 'rewards': rewards_garage, 'actions': obs_garage } eps = SimpleNamespace(**eps_dict) env_spec_dict = { 'max_episode_length': 20 } env_spec = SimpleNamespace(**env_spec_dict) vpg = VPG( env_spec = env_spec, policy = None, value_function = None, sampler = None, policy_optimizer = optimizer, vf_optimizer = optimizer ) print('VPG run' + str(vpg._train_once(1, eps))) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0) #print('VPG run loss: ' + str(vpg._compute_advantage(cost, lens, bl_val))) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)
def move(self, to_path=None): utils.move_to(self.path, to_path)
def train_batch( problem, model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts, pbar ): solution = move_to( problem.get_initial_solutions(opts.init_val_met, batch), opts.device) if problem.NAME == 'tsp': x = batch else: assert False, "Unsupported problem: {}".format(problem.NAME) x_input = move_to(x, opts.device) # batch_size, graph_size, 2 batch = move_to(batch, opts.device) # batch_size, graph_size, 2 exchange = None #update best_so_far best_so_far = problem.get_costs(batch, solution) initial_cost = best_so_far.clone() # params gamma = opts.gamma n_step = opts.n_step T = opts.T_train t = 0 while t < T: baseline_val = [] baseline_val_detached = [] log_likelihood = [] reward = [] t_s = t total_cost = 0 exchange_history = [] while t - t_s < n_step and not (t == T): # get estimated value from baseline bl_val_detached, bl_val = baseline.eval(x_input, solution) baseline_val_detached.append(bl_val_detached) baseline_val.append(bl_val) # get model output exchange, log_lh = model( x_input, solution, exchange, do_sample = True) exchange_history.append(exchange) log_likelihood.append(log_lh) # state transient solution = problem.step(solution, exchange) solution = move_to(solution, opts.device) # calc reward cost = problem.get_costs(batch, solution) total_cost = total_cost + cost best_for_now = torch.cat((best_so_far[None,:], cost[None,:]),0).min(0)[0] reward.append(best_so_far - best_for_now) best_so_far = best_for_now # next t = t + 1 # Get discounted R Reward = [] total_cost = total_cost / (t-t_s) reward_reversed = reward[::-1] next_return, _ = baseline.eval(x_input, solution) for r in range(len(reward_reversed)): R = next_return * gamma + reward_reversed[r] Reward.append(R) next_return = R Reward = torch.stack(Reward[::-1], 0) baseline_val = torch.stack(baseline_val,0) baseline_val_detached = torch.stack(baseline_val_detached,0) log_likelihood = torch.stack(log_likelihood,0) # calculate loss criteria = torch.nn.MSELoss() baseline_loss = criteria(Reward, baseline_val) reinforce_loss = - ((Reward - baseline_val_detached)*log_likelihood).mean() loss = baseline_loss + reinforce_loss # update gradient step optimizer.zero_grad() loss.backward() #Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging to tensorboard if(not opts.no_tb): current_step = int(step * T / n_step + t // n_step) if current_step % int(opts.log_step) == 0: log_to_tb_train(tb_logger, optimizer, model, baseline, total_cost, grad_norms, reward, exchange_history, reinforce_loss, baseline_loss, log_likelihood, initial_cost, current_step) pbar.update(1)
def eval_model_bat(batch): with torch.no_grad(): cost, _ = model(move_to(batch, opts.device)) return cost.data.cpu()
def validate(problem, model, val_dataset, tb_logger, opts, _id = None): # Validate mode print('\nValidating...', flush=True) model.eval() init_value = [] best_value = [] improvement = [] reward = [] time_used = [] for batch in tqdm(DataLoader(val_dataset, batch_size = opts.eval_batch_size), disable = opts.no_progress_bar or opts.val_size == opts.eval_batch_size, desc = 'validate', bar_format='{l_bar}{bar:20}{r_bar}{bar:-20b}'): #initial solutions initial_solution = move_to( problem.get_initial_solutions(opts.init_val_met, batch), opts.device) if problem.NAME == 'tsp': x = batch else: assert False, "Unsupported problem: {}".format(problem.NAME) x_input = move_to(x, opts.device) # batch_size, graph_size, 2 batch = move_to(batch, opts.device) # batch_size, graph_size, 2 initial_value = problem.get_costs(batch, initial_solution) init_value.append(initial_value) # run the model s_time = time.time() bv, improve, r, _ = rollout(problem, model, x_input, batch, initial_solution, initial_value, opts, T=opts.T_max, do_sample = True) duration = time.time() - s_time time_used.append(duration) best_value.append(bv.clone()) improvement.append(improve.clone()) reward.append(r.clone()) best_value = torch.cat(best_value,0) improvement = torch.cat(improvement,0) reward = torch.cat(reward,0) init_value = torch.cat(init_value,0).view(-1,1) time_used = torch.tensor(time_used) # log to screen log_to_screen(time_used, init_value, best_value, reward, improvement, batch_size = opts.eval_batch_size, dataset_size = len(val_dataset), T = opts.T_max) # log to tb if(not opts.no_tb): log_to_tb_val(tb_logger, time_used, init_value, best_value, reward, improvement, batch_size = opts.eval_batch_size, dataset_size = len(val_dataset), T = opts.T_max, epoch = _id) # save to file if _id is not None: torch.save( { 'init_value': init_value, 'best_value': best_value, 'improvement': improvement, 'reward': reward, 'time_used': time_used, }, os.path.join(opts.save_dir, 'validate-{}.pt'.format(_id)))