def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts): # vec: (batch_size, graph_size, embed_dim) # bl_val: (batch_size) or None vec, bl_val = baseline.unwrap_batch(batch) vec = move_to(vec, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities cost, log_likelihood = model(vec) # normalize cost # cost = (cost - cost.mean()) / (cost.std() + 1e-5) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(vec, cost) if bl_val is None else (bl_val, 0) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts, extra): x, bl_val = baseline.unwrap_batch(batch) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities cost, log_likelihood, entropy = model(x) # print('Cost on train', cost) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0) if bl_loss == True and opts.baseline == 'constant': extra["updates"] += 1 # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() entropy_loss = entropy.mean() loss = reinforce_loss + bl_loss - opts.entropy * entropy_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, entropy_loss, tb_logger, opts, extra)
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts): x, bl_val = baseline.unwrap_batch(batch) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities if opts.input_transform != None: cost, log_likelihood, transform_loss = model( x, return_transform_loss=True) else: cost, log_likelihood = model(x) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss if opts.input_transform != None: loss += opts.input_transform * transform_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)
def train_batch(model, optimizer, baseline, epoch, batch_id, step, interactions_count, batch, tb_logger, opts): start_time = time.time() get_inner_model(model).decoder.count_interactions = True get_inner_model(baseline.baseline.model).decoder.count_interactions = True x, bl_val = baseline.unwrap_batch(batch) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities cost, log_likelihood = model(x, only_encoder=False) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() global sum_batch_time sum_batch_time += (time.time() - start_time) # Logging interactions_so_far = interactions_count +\ 2*get_inner_model(model).decoder.interactions*torch.cuda.device_count() if step % int(opts.log_step) == 0: #with torch.no_grad(): # opt_nll = get_inner_model(model).supervised_log_likelihood(x) opt_nll = torch.ones(1) log_values(cost, grad_norms, epoch, interactions_so_far, batch_id, step, log_likelihood, opt_nll, reinforce_loss, bl_loss, tb_logger, opts) #interactions_so_far = 2 * step * opts.graph_size * opts.batch_size \ # if opts.baseline is not None else step * opts.graph_size * opts.batch_size if opts.total_interactions > interactions_so_far: avg_batch_time = sum_batch_time / step if step > 0 else sum_batch_time print('batch time: ', avg_batch_time) seconds = (opts.total_interactions - interactions_so_far) * avg_batch_time #seconds = (opts.n_epochs*(opts.epoch_size // opts.batch_size) - step) * avg_batch_time GetTime(seconds) print('============================') get_inner_model(model).decoder.count_interactions = False get_inner_model(baseline.baseline.model).decoder.count_interactions = False if opts.use_cuda: torch.cuda.empty_cache()
def train_batch( model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts ): x, bl_val = baseline.unwrap_batch(batch) ##x are states or nodes - (obs in vpg.py) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities cost, log_likelihood = model(x) ##likelihood/probability is POLICY distribution used to pick actions or paths and cost is a vector with tour times(this could be rewards). Check _compute_policy_entropy in vpg.py eps = { 'padded_observations': x, 'padded_rewards': cost, 'lengths': 1, 'observations': x, 'rewards': cost, 'actions': } # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)
def train_batch(model, optimizer, scaler, baseline, epoch, batch_id, step, batch, opts): x, bl_val = baseline.unwrap_batch(batch) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None if scaler is not None: optimizer.zero_grad() with torch.cuda.amp.autocast(): cost, log_likelihood = model(x) bl_val, bl_loss = baseline.eval( x, cost) if bl_val is None else (bl_val, 0) reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss scaler.scale(loss).backward() scaler.unscale_(optimizer) grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) scaler.step(optimizer) scaler.update() else: # Evaluate model, get costs and log probabilities cost, log_likelihood = model(x) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval( x, cost) if bl_val is None else (bl_val, 0) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0 and torch.distributed.get_rank() == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, opts)
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts): x, bl_val = baseline.unwrap_batch(batch) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None cost, log_likelihood = model(x) bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0) reiforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reiforce_loss + bl_loss optimizer.zero_grad() loss.backward() grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reiforce_loss, bl_loss, tb_logger, opts)
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts): # Unwrap baseline bat, bl_val = baseline.unwrap_batch(batch) # Optionally move Tensors to GPU x = move_to(bat['nodes'], opts.device) graph = move_to(bat['graph'], opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities cost, log_likelihood = model(x, graph) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(x, graph, cost) if bl_val is None else (bl_val, 0) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss # Normalize loss for gradient accumulation loss = loss / opts.accumulation_steps # Perform backward pass loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) # Perform optimization step after accumulating gradients if step % opts.accumulation_steps == 0: optimizer.step() optimizer.zero_grad() # Logging if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)
def train(model, optimizer, train_dataset, val_dataset, tb_logger, opts): training_dataloader = DataLoader(dataset=train_dataset, batch_size=opts.batch_size, shuffle=True, num_workers=16) step = 0 model.train() for epoch in tqdm(range(opts.n_epochs)): running_loss = 0.0 running_accu = 0.0 for train_batch in training_dataloader: train_batch.to(opts.device) start_time = time.time() optimizer.zero_grad() train_batch_out, _ = model(train_batch, compute_embeddings=False) # regresssion loss(MSE) # train_batch_loss = F.mse_loss(train_batch_out, train_batch.y) # classification code by binarizing score # train_batch_y = torch.tensor(train_batch.y > 0.0) # train_batch_y.to(opts.device) # train_batch_y = train_batch_y.type(torch.cuda.FloatTensor) # train_batch_loss = F.binary_cross_entropy(train_batch_out, train_batch_y) # train_batch_accu = accuracy(output=train_batch_out, target=train_batch_y, threshold=0.5) # classification code # node classification by binary labels train_batch_loss = F.binary_cross_entropy(train_batch_out, train_batch.y) train_batch_accu = accuracy(output=train_batch_out, target=train_batch.y, threshold=0.5) train_batch_loss.backward() optimizer.step() running_accu += train_batch_accu.item() running_loss += train_batch_loss.item() # train loss at each epoch train_loss = running_loss / len(training_dataloader) # train accuracy at each epoch train_accu = running_accu / len(training_dataloader) * 100 # val loss and accuracy at each epoch val_loss, val_accu = evaluate(model, val_dataset, opts) # Logging if step % int(opts.log_step) == 0: # wandb logging wandb.log({ "train_loss": train_loss, "val_loss": val_loss, "train_accu": train_accu, "val_accu": val_accu }) # Tensorboard logging log_values(epoch, step, tb_logger, opts, train_loss, val_loss) step += 1 epoch_duration = time.time() - start_time print("Finished epoch {}, took {} s".format( epoch, time.strftime('%H:%M:%S', time.gmtime(epoch_duration)))) return "Training Completed!"
def train_batch( model, optimizer, baseline, epoch, batch_id, step, batch, tb_logger, opts ): x, bl_val = baseline.unwrap_batch(batch) ##x are states or nodes - (obs in vpg.py) x = move_to(x, opts.device) bl_val = move_to(bl_val, opts.device) if bl_val is not None else None # Evaluate model, get costs and log probabilities cost, log_likelihood = model(x) ##likelihood/probability is POLICY distribution used to pick actions or paths and cost is a vector with tour times(this could be rewards). Check _compute_policy_entropy in vpg.py #Check sizes print('---Checking data Sizes---') for key_x in x: print('Batch ID:', batch_id, '->', key_x, '->', x[key_x].shape) print('Batch ID:',batch_id,'-> Cost ->',cost.shape) #Synthetic construction of required Garage input obs_garage = x['loc'].clone().detach().cpu() rewards_garage = cost.clone().detach().cpu().numpy() #padded_rewards_garage = pad_tensor(rewards_garage, len(rewards_garage), mode='last') padded_rewards_garage = rewards_garage.reshape(rewards_garage.shape[0],1) lens = [(obs_garage.shape[1]-1) for i in range(obs_garage.shape[0])] eps_dict = { 'padded_observations': obs_garage, 'padded_rewards': padded_rewards_garage, 'lengths': lens, 'observations': obs_garage, 'rewards': rewards_garage, 'actions': obs_garage } eps = SimpleNamespace(**eps_dict) env_spec_dict = { 'max_episode_length': 20 } env_spec = SimpleNamespace(**env_spec_dict) vpg = VPG( env_spec = env_spec, policy = None, value_function = None, sampler = None, policy_optimizer = optimizer, vf_optimizer = optimizer ) print('VPG run' + str(vpg._train_once(1, eps))) # Evaluate baseline, get baseline loss if any (only for critic) bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0) #print('VPG run loss: ' + str(vpg._compute_advantage(cost, lens, bl_val))) # Calculate loss reinforce_loss = ((cost - bl_val) * log_likelihood).mean() loss = reinforce_loss + bl_loss # Perform backward pass and optimization step optimizer.zero_grad() loss.backward() # Clip gradient norms and get (clipped) gradient norms for logging grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm) optimizer.step() # Logging if step % int(opts.log_step) == 0: log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)