Esempio n. 1
0
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch,
                tb_logger, opts):
    x, bl_val = baseline.unwrap_batch(batch)
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    # Evaluate model, get costs and log probabilities
    if opts.input_transform != None:
        cost, log_likelihood, transform_loss = model(
            x, return_transform_loss=True)
    else:
        cost, log_likelihood = model(x)
    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0)

    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reinforce_loss + bl_loss
    if opts.input_transform != None:
        loss += opts.input_transform * transform_loss

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood,
                   reinforce_loss, bl_loss, tb_logger, opts)
Esempio n. 2
0
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch,
                tb_logger, opts):
    # vec: (batch_size, graph_size, embed_dim)
    # bl_val: (batch_size) or None
    vec, bl_val = baseline.unwrap_batch(batch)
    vec = move_to(vec, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    # Evaluate model, get costs and log probabilities
    cost, log_likelihood = model(vec)
    # normalize cost
    # cost = (cost - cost.mean()) / (cost.std() + 1e-5)

    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(vec, cost) if bl_val is None else (bl_val,
                                                                       0)

    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reinforce_loss + bl_loss

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood,
                   reinforce_loss, bl_loss, tb_logger, opts)
Esempio n. 3
0
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch,
                tb_logger, opts, extra):
    x, bl_val = baseline.unwrap_batch(batch)
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    # Evaluate model, get costs and log probabilities
    cost, log_likelihood, entropy = model(x)
    # print('Cost on train', cost)

    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0)
    if bl_loss == True and opts.baseline == 'constant':
        extra["updates"] += 1

    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    entropy_loss = entropy.mean()
    loss = reinforce_loss + bl_loss - opts.entropy * entropy_loss

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood,
                   reinforce_loss, bl_loss, entropy_loss, tb_logger, opts,
                   extra)
Esempio n. 4
0
def train_batch(model, optimizer, baseline, epoch, batch_id, step,
                interactions_count, batch, tb_logger, opts):
    start_time = time.time()
    get_inner_model(model).decoder.count_interactions = True
    get_inner_model(baseline.baseline.model).decoder.count_interactions = True
    x, bl_val = baseline.unwrap_batch(batch)
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    # Evaluate model, get costs and log probabilities

    cost, log_likelihood = model(x, only_encoder=False)

    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0)
    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reinforce_loss + bl_loss

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    global sum_batch_time
    sum_batch_time += (time.time() - start_time)
    # Logging
    interactions_so_far = interactions_count +\
                          2*get_inner_model(model).decoder.interactions*torch.cuda.device_count()

    if step % int(opts.log_step) == 0:
        #with torch.no_grad():
        #    opt_nll = get_inner_model(model).supervised_log_likelihood(x)
        opt_nll = torch.ones(1)
        log_values(cost, grad_norms, epoch, interactions_so_far, batch_id,
                   step, log_likelihood, opt_nll, reinforce_loss, bl_loss,
                   tb_logger, opts)

        #interactions_so_far = 2 * step * opts.graph_size * opts.batch_size \
        #    if opts.baseline is not None else step * opts.graph_size * opts.batch_size

        if opts.total_interactions > interactions_so_far:
            avg_batch_time = sum_batch_time / step if step > 0 else sum_batch_time
            print('batch time: ', avg_batch_time)

            seconds = (opts.total_interactions -
                       interactions_so_far) * avg_batch_time
            #seconds = (opts.n_epochs*(opts.epoch_size // opts.batch_size) - step) * avg_batch_time
            GetTime(seconds)
            print('============================')

    get_inner_model(model).decoder.count_interactions = False
    get_inner_model(baseline.baseline.model).decoder.count_interactions = False

    if opts.use_cuda:
        torch.cuda.empty_cache()
Esempio n. 5
0
def solve_all_nn(dataset_path,
                 eval_batch_size=1024,
                 no_cuda=False,
                 dataset_n=None,
                 progress_bar_mininterval=0.1):
    import torch
    from torch.utils.data import DataLoader
    from problems import TSP
    from utils import move_to

    dataloader = DataLoader(TSP.make_dataset(
        filename=dataset_path,
        num_samples=dataset_n if dataset_n is not None else 1000000),
                            batch_size=eval_batch_size)
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and not no_cuda else "cpu")
    results = []
    for batch in tqdm(dataloader,
                      mininterval=progress_bar_mininterval,
                      ascii=True):
        start = time.time()
        batch = move_to(batch, device)

        lengths, tours = nearest_neighbour(batch)
        lengths_check, _ = TSP.get_costs(batch, tours)

        assert (torch.abs(lengths - lengths_check.data) < 1e-5).all()

        duration = time.time() - start
        results.extend([(cost.item(), np.trim_zeros(pi.cpu().numpy(),
                                                    'b'), duration)
                        for cost, pi in zip(lengths, tours)])

    return results, eval_batch_size
Esempio n. 6
0
def evaluate(model, test_loader, criterion, entropy_loss_func, opts):
    """ Evaluate a single epoch """

    y_probs = np.zeros((0, len(test_loader.dataset.CLASSES)), np.float)
    y_trues = np.zeros((0), np.int)
    losses = []

    # Put model in eval mode
    model.eval()

    for i, (x_low, x_high, label) in enumerate(tqdm(test_loader)):

        x_low, x_high, label = move_to([x_low, x_high, label], opts.device)

        y, attention_map, patches, x_low = model(x_low, x_high)

        entropy_loss = entropy_loss_func(attention_map)
        loss = criterion(y, label) - entropy_loss

        loss_value = loss.item()
        losses.append(loss_value)

        y_prob = F.softmax(y, dim=1)
        y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()])
        y_trues = np.concatenate([y_trues, label.cpu().numpy()])

    test_loss_epoch = np.round(np.mean(losses), 4)
    metrics = calc_cls_measures(y_probs, y_trues)
    return test_loss_epoch, metrics
Esempio n. 7
0
def train_model(model, train_loader, optimizer, opts):
    n_classes = opts.n_classes
    metric = torch.nn.CrossEntropyLoss()

    y_probs = np.zeros((0, n_classes), np.float)
    y_trues = np.zeros((0), np.int)
    losses = []
    model.train()

    for i, (image, label) in enumerate(tqdm(train_loader)):
        optimizer.zero_grad()
        image, label = utils.move_to([image, label], opts.device)

        prediction = model.forward(image.float())
        loss = metric(prediction, label.long())
        loss.backward()
        optimizer.step()

        loss_value = loss.item()
        losses.append(loss_value)
        y_prob = F.softmax(prediction, dim=1)

        y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()])
        y_trues = np.concatenate([y_trues, label.cpu().numpy()])

    metric_collects = utils.calc_multi_cls_measures(y_probs, y_trues)

    train_loss_epoch = np.round(np.mean(losses), 4)
    return train_loss_epoch, metric_collects
Esempio n. 8
0
def train(model, optimizer, train_loader, criterion, entropy_loss_func, opts):
    """ Train for a single epoch """

    y_probs = np.zeros((0, len(train_loader.dataset.CLASSES)), np.float)
    y_trues = np.zeros((0), np.int)
    losses = []

    # Put model in training mode
    model.train()

    for i, (x_low, x_high, label) in enumerate(tqdm(train_loader)):
        x_low, x_high, label = move_to([x_low, x_high, label], opts.device)

        optimizer.zero_grad()
        y, attention_map, patches, x_low = model(x_low, x_high)

        entropy_loss = entropy_loss_func(attention_map)

        loss = criterion(y, label) - entropy_loss
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), opts.clipnorm)
        optimizer.step()

        loss_value = loss.item()
        losses.append(loss_value)

        y_prob = F.softmax(y, dim=1)
        y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()])
        y_trues = np.concatenate([y_trues, label.cpu().numpy()])

    train_loss_epoch = np.round(np.mean(losses), 4)
    metrics = calc_cls_measures(y_probs, y_trues)
    return train_loss_epoch, metrics
def train_batch(
        model,
        optimizer,
        baseline,
        epoch,
        batch_id,
        step,
        batch,
        tb_logger,
        opts
):
    x, bl_val = baseline.unwrap_batch(batch) ##x are states or nodes - (obs in vpg.py) 
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None
    
    # Evaluate model, get costs and log probabilities
    cost, log_likelihood = model(x) ##likelihood/probability is POLICY distribution used to pick actions or paths and cost is a vector with tour times(this could be rewards). Check _compute_policy_entropy in vpg.py

    eps = {
        'padded_observations': x,
        'padded_rewards': cost,
        'lengths': 1,
        'observations': x,
        'rewards': cost,
        'actions': 
    }

    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0)

    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reinforce_loss + bl_loss

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step,
                   log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)
Esempio n. 10
0
def train_batch(model, optimizer, scaler, baseline, epoch, batch_id, step,
                batch, opts):
    x, bl_val = baseline.unwrap_batch(batch)
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    if scaler is not None:
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            cost, log_likelihood = model(x)
            bl_val, bl_loss = baseline.eval(
                x, cost) if bl_val is None else (bl_val, 0)
            reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
            loss = reinforce_loss + bl_loss
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        grad_norms = clip_grad_norms(optimizer.param_groups,
                                     opts.max_grad_norm)
        scaler.step(optimizer)
        scaler.update()

    else:
        # Evaluate model, get costs and log probabilities
        cost, log_likelihood = model(x)

        # Evaluate baseline, get baseline loss if any (only for critic)
        bl_val, bl_loss = baseline.eval(
            x, cost) if bl_val is None else (bl_val, 0)

        # Calculate loss
        reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
        loss = reinforce_loss + bl_loss

        # Perform backward pass and optimization step
        optimizer.zero_grad()
        loss.backward()
        # Clip gradient norms and get (clipped) gradient norms for logging
        grad_norms = clip_grad_norms(optimizer.param_groups,
                                     opts.max_grad_norm)
        optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0 and torch.distributed.get_rank() == 0:
        log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood,
                   reinforce_loss, bl_loss, opts)
Esempio n. 11
0
def _eval_dataset(model, dataset, width, softmax_temp, opts, device):

    model.to(device)
    model.eval()

    model.set_decode_type(
        "greedy" if opts.decode_strategy in ('bs', 'greedy') else "sampling",
        temp=softmax_temp)

    dataloader = DataLoader(dataset, batch_size=opts.eval_batch_size)

    results = []
    #    print(width, opts.eval_batch_size , opts.max_calc_batch_size)
    for batch in tqdm(dataloader, disable=opts.no_progress_bar):
        batch = move_to(batch, device)

        start = time.time()
        with torch.no_grad():
            if opts.decode_strategy in ('sample', 'greedy'):
                if opts.decode_strategy == 'greedy':
                    assert width == 0, "Do not set width when using greedy"
                    assert opts.eval_batch_size <= opts.max_calc_batch_size, \
                        "eval_batch_size should be smaller than calc batch size"
                    batch_rep = 1
                    iter_rep = 1
                elif width * opts.eval_batch_size > opts.max_calc_batch_size:
                    assert opts.eval_batch_size == 1
                    assert width % opts.max_calc_batch_size == 0
                    batch_rep = opts.max_calc_batch_size
                    iter_rep = width // opts.max_calc_batch_size
                else:
                    batch_rep = width
                    iter_rep = 1
                assert batch_rep > 0
                # This returns (batch_size, iter_rep shape)
                #                print(width, opts.eval_batch_size , opts.max_calc_batch_size, batch_rep)
                top_k_sequences, top_k_costs = model.sample_many_top_k(
                    batch, opts.k, batch_rep=batch_rep, iter_rep=iter_rep)
                batch_size = len(top_k_costs)
                ids = torch.arange(batch_size,
                                   dtype=torch.int64,
                                   device=top_k_costs.device)
            else:
                assert opts.decode_strategy == 'bs'

                cum_log_p, sequences, costs, ids, batch_size = model.beam_search(
                    batch,
                    beam_size=width,
                    compress_mask=opts.compress_mask,
                    max_calc_batch_size=opts.max_calc_batch_size)

        duration = time.time() - start
        for seq, cost in zip(top_k_sequences, top_k_costs):
            results.append((cost, seq, duration))

    pickle.dump(results, open(opts.output_filename, "wb"))
Esempio n. 12
0
def train_batch_sl(model, optimizer, epoch, batch_id, step, batch, tb_logger,
                   opts):
    # Optionally move Tensors to GPU
    x = move_to(batch['nodes'], opts.device)
    graph = move_to(batch['graph'], opts.device)

    if opts.model == 'nar':
        targets = move_to(batch['tour_edges'], opts.device)
        # Compute class weights for NAR decoder
        _targets = batch['tour_edges'].numpy().flatten()
        class_weights = compute_class_weight("balanced",
                                             classes=np.unique(_targets),
                                             y=_targets)
        class_weights = move_to(torch.FloatTensor(class_weights), opts.device)
    else:
        class_weights = None
        targets = move_to(batch['tour_nodes'], opts.device)

    # Evaluate model, get costs and loss
    cost, loss = model(x,
                       graph,
                       supervised=True,
                       targets=targets,
                       class_weights=class_weights)

    # Normalize loss for gradient accumulation
    loss = loss / opts.accumulation_steps

    # Perform backward pass
    loss.backward()

    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)

    # Perform optimization step after accumulating gradients
    if step % opts.accumulation_steps == 0:
        optimizer.step()
        optimizer.zero_grad()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values_sl(cost, grad_norms, epoch, batch_id, step, loss, tb_logger,
                      opts)
Esempio n. 13
0
def train_batch_sl(model, optimizer, epoch, batch_id, step, batch, tb_logger,
                   opts):
    nodes_coord = move_to(batch['nodes_coord'], opts.device)
    tour_nodes = move_to(batch['tour_nodes'], opts.device)

    cost, loss = model(nodes_coord, supervised_mode=True, targets=tour_nodes)
    loss = loss.mean()  # Take mean of loss across multiple GPUs

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values_sl(cost, grad_norms, epoch, batch_id, step, loss, tb_logger,
                      opts)
Esempio n. 14
0
 def eval_model_bat(bat):
     with torch.no_grad():
         output = model(move_to(bat, opts.device), return_pi=False)
         cost = output[0]
         # print('Dataset on valid', bat)
         # print('Cost on valid', cost)
         if len(output) > 3:
             print()
             print(output[0])
             print(output[-1])
     return cost.data.cpu()
Esempio n. 15
0
    def val_epoch(self, dataloader=None):
        self.callback("before_val_epoch")
        self.model.eval()
        dataloader = dataloader if dataloader is not None else self.val_dataloader
        for batch in dataloader:
            self.state["val_batch"] = move_to(batch, self.device)
            self.callback("before_val_step")
            with torch.no_grad():
                self.val_step(self)
            self.callback("after_val_step")
        self.callback("after_val_epoch")

        return self
Esempio n. 16
0
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch,
                tb_logger, opts):
    # Unwrap baseline
    bat, bl_val = baseline.unwrap_batch(batch)

    # Optionally move Tensors to GPU
    x = move_to(bat['nodes'], opts.device)
    graph = move_to(bat['graph'], opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    # Evaluate model, get costs and log probabilities
    cost, log_likelihood = model(x, graph)

    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(x, graph,
                                    cost) if bl_val is None else (bl_val, 0)

    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reinforce_loss + bl_loss

    # Normalize loss for gradient accumulation
    loss = loss / opts.accumulation_steps

    # Perform backward pass
    loss.backward()

    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)

    # Perform optimization step after accumulating gradients
    if step % opts.accumulation_steps == 0:
        optimizer.step()
        optimizer.zero_grad()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood,
                   reinforce_loss, bl_loss, tb_logger, opts)
Esempio n. 17
0
def run_all_tsiligirides(dataset_path,
                         sample,
                         num_samples,
                         eval_batch_size,
                         max_calc_batch_size,
                         no_cuda=False,
                         dataset_n=None,
                         progress_bar_mininterval=0.1,
                         seed=1234):
    import torch
    from torch.utils.data import DataLoader

    from problems.op.problem_op import OP
    from problems.op.tsiligirides import op_tsiligirides
    from utils import move_to, sample_many
    torch.manual_seed(seed)

    dataloader = DataLoader(OP.make_dataset(
        filename=dataset_path,
        num_samples=dataset_n if dataset_n is not None else 1000000),
                            batch_size=eval_batch_size)
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and not no_cuda else "cpu")
    results = []
    for batch in tqdm(dataloader, mininterval=progress_bar_mininterval):
        start = time.time()
        batch = move_to(batch, device)

        with torch.no_grad():
            if num_samples * eval_batch_size > max_calc_batch_size:
                assert eval_batch_size == 1
                assert num_samples % max_calc_batch_size == 0
                batch_rep = max_calc_batch_size
                iter_rep = num_samples // max_calc_batch_size
            else:
                batch_rep = num_samples
                iter_rep = 1
            sequences, costs = sample_many(
                lambda inp: (None, op_tsiligirides(inp, sample)),
                OP.get_costs,
                batch,
                batch_rep=batch_rep,
                iter_rep=iter_rep)
            duration = time.time() - start
            results.extend([(cost.item(), np.trim_zeros(pi.cpu().numpy(),
                                                        'b'), duration)
                            for cost, pi in zip(costs, sequences)])
    return results, eval_batch_size
Esempio n. 18
0
    def train_epoch(self):
        self.model.train()
        self.callback("before_train_epoch")

        for batch in self.train_dataloader:

            self.state["epoch_no"] = self.state["train_it"] // self.state[
                "epoch_len"] + 1
            if self.state["train_it"] % self.state["epoch_len"] == 0:
                print(f"\nEpoch [{self.state['epoch_no']}]:  ")

            self.state["train_batch"] = move_to(batch, self.device)
            self.state["train_it"] += 1
            self.callback("before_train_step")
            self.train_step(self)
            self.callback("after_train_step")

            if self.state["train_it"] % self.state["epoch_len"] == 0:
                # End of training epoch.

                self.callback("after_train_epoch")
                self.val_epoch()
                self.callback("after_epoch")
                self.model.train()

                self.state["progress"].update(1)
                print("\n")

                if "early_stopping" in self.state and self.state[
                        "early_stopping"].should_stop(self):
                    if self.state.get("min_train_iterations",
                                      0) <= self.state["train_it"]:
                        print("Stopping due to early stopping condition.")
                        self.state["STOP_TRAINING"] = True
                        break

            if self.state.get("max_train_iterations",
                              float("inf")) <= self.state["train_it"]:
                print("Stopping due to max epochs condition.")
                self.state["STOP_TRAINING"] = True

                break

        return self
Esempio n. 19
0
def rollout(problem, model, x_input, batch, solution, value, opts, T, do_sample = False, record = False):
    
    solutions = solution.clone()
    best_so_far = solution.clone()
    cost = value
    
    exchange = None
    best_val = cost.clone()
    improvement = []
    reward = []
    solution_history = [best_so_far]
    
    for t in tqdm(range(T), disable = opts.no_progress_bar, desc = 'rollout', bar_format='{l_bar}{bar:20}{r_bar}{bar:-20b}'):
        
        exchange, _ = model( x_input, 
                             solutions, 
                             exchange, 
                             do_sample = do_sample)
        
        # new solution
        solutions = problem.step(solutions, exchange)        
        solutions = move_to(solutions, opts.device)
        
        obj = problem.get_costs(batch, solutions)
        
        #calc improve
        improvement.append(cost - obj)
        cost = obj
        
        #calc reward
        new_best = torch.cat((best_val[None,:], obj[None,:]),0).min(0)[0]
        r = best_val - new_best
        reward.append(r)        
        
        #update best solution
        best_val = new_best
        best_so_far[(r > 0)] = solutions[(r > 0)]
        
        #record solutions
        if record: solution_history.append(best_so_far.clone())
        
    return best_val.view(-1,1), torch.stack(improvement,1), torch.stack(reward,1), None if not record else torch.stack(solution_history,1)
Esempio n. 20
0
def train_epoch(model, criterion, optimizer, train_loader, opts, scaler=None):
    # Put model in train mode
    model.train()

    start_time = None
    num_images_processed = 0

    for batch_id, batch in enumerate(tqdm(train_loader)):

        # Skip the first N batches due to data loader initialization
        if batch_id == opts.skip_batches:
            start_time = time.time()

        inputs, label = move_to(batch, opts.device)
        label = label.long()
        optimizer.zero_grad()

        if opts.mixed_precision:
            with autocast():
                prediction = model(inputs)
                loss = criterion(prediction, label)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

        else:
            prediction = model(inputs)
            loss = criterion(prediction, label)

            loss.backward()
            optimizer.step()

        if start_time is not None:
            num_images_processed += inputs.shape[0]

    end_time = time.time()

    return num_images_processed / (end_time - start_time)
Esempio n. 21
0
def evaluateMultiResBatches(model, test_loader, criterion, entropy_loss_func,
                            opts):
    """ Train for a single epoch """

    y_probs = np.zeros((0, len(test_loader.dataset.CLASSES)), np.float)
    y_trues = np.zeros((0), np.int)
    losses = [[] for s in opts.scales]
    metrics = []
    # Put model in eval mode
    model.eval()

    all_patches = []
    all_maps = []
    all_x_low = []
    all_sampled_ats = []
    for i, (x_low, x_high, label) in enumerate(tqdm(test_loader)):
        # high res batch
        x_low, x_high, label = move_to([x_low, x_high, label], opts.device)

        y, attention_map, patches, x_low_out, sampled_attention = model(
            x_low, x_high)
        if opts.visualize:
            all_patches.append(patches)
            all_maps.append(attention_map)
            all_x_low.append(x_low_out)
            all_sampled_ats.append(sampled_attention)
        entropy_loss = entropy_loss_func(attention_map)

        loss = criterion(y, label) - entropy_loss

        loss_value = loss.item()
        losses[0].append(loss_value)

        y_prob = F.softmax(y, dim=1)
        y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()])
        y_trues = np.concatenate([y_trues, label.cpu().numpy()])

        metric = calc_cls_measures(y_probs, y_trues)
        metrics.append(metric)

        # scale-2 low res batch
        for i in range(1, len(opts.scales)):
            s = opts.scales[i]
            x_low_i = F.interpolate(x_low, scale_factor=s, mode='bilinear')
            x_high_i = F.interpolate(x_high, scale_factor=s, mode='bilinear')

            x_low_i, x_high_i = move_to([x_low_i, x_high_i], opts.device)

            y, attention_map, patches, x_low_i_out, sampled_attention = model(
                x_low_i, x_high_i)

            if opts.visualize:
                all_patches.append(patches)
                all_maps.append(attention_map)
                all_x_low.append(x_low_i_out)
                all_sampled_ats.append(sampled_attention)
            entropy_loss = entropy_loss_func(attention_map)

            loss = criterion(y, label) - entropy_loss

            loss_value = loss.item()

            losses[i].append(loss_value)

            y_prob = F.softmax(y, dim=1)
            y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()])
            y_trues = np.concatenate([y_trues, label.cpu().numpy()])

            metric = calc_cls_measures(y_probs, y_trues)
            metrics.append(metric)

        if opts.visualize:
            all_patches_tensor = torch.cat(all_patches, dim=1)
            # all_maps_tensor = torch.stack(all_maps, dim=1)
            for b in range(patches.shape[0]):
                batch_patches = all_patches_tensor[b]
                batch_maps = [
                    attention_map[b].cpu().numpy()
                    for attention_map in all_maps
                ]
                for ats in batch_maps:
                    print(ats)
                    # print(torch.min())
                batch_imgs = [x_low_i[b] for x_low_i in all_x_low]
                batch_sampled_ats = [
                    sampled_attetion[b].cpu().numpy()
                    for sampled_attetion in all_sampled_ats
                ]
                print(batch_sampled_ats)
                patchGrid(batch_patches, batch_maps, batch_imgs, (3, 5))
                # mapGrid(batch_maps, batch_imgs, opts.scales)

    test_loss_epoch = [np.round(np.mean(loss_s), 4) for loss_s in losses]
    # metrics = calc_cls_measures(y_probs, y_trues)
    return test_loss_epoch, metrics
Esempio n. 22
0
def trainMultiResBatches(model, optimizer, train_loader, criterion,
                         entropy_loss_func, opts):
    """ Train for a single epoch """

    y_probs = np.zeros((0, len(train_loader.dataset.CLASSES)), np.float)
    y_trues = np.zeros((0), np.int)
    losses = [[] for s in opts.scales]
    metrics = []
    # Put model in training mode
    model.train()

    for i, (x_low, x_high, label) in enumerate(tqdm(train_loader)):
        # high res batch
        x_low, x_high, label = move_to([x_low, x_high, label], opts.device)

        optimizer.zero_grad()
        y, attention_map, patches, x_low_out = model(x_low, x_high)

        entropy_loss = entropy_loss_func(attention_map)

        loss = criterion(y, label) - entropy_loss
        loss.backward()
        # for p in model.parameters():
        #     print(p.grad)
        torch.nn.utils.clip_grad_norm_(model.parameters(), opts.clipnorm)
        optimizer.step()

        loss_value = loss.item()
        losses[0].append(loss_value)

        y_prob = F.softmax(y, dim=1)
        y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()])
        y_trues = np.concatenate([y_trues, label.cpu().numpy()])

        metric = calc_cls_measures(y_probs, y_trues)
        metrics.append(metric)

        # scale-2 low res batch
        for i in range(1, len(opts.scales)):
            s = opts.scales[i]
            x_low_i = F.interpolate(x_low, scale_factor=s, mode='bilinear')
            x_high_i = F.interpolate(x_high, scale_factor=s, mode='bilinear')

            x_low_i, x_high_i = move_to([x_low_i, x_high_i], opts.device)

            optimizer.zero_grad()
            y, attention_map, patches, x_low_i_out = model(x_low_i, x_high_i)

            entropy_loss = entropy_loss_func(attention_map)

            loss = criterion(y, label) - entropy_loss

            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), opts.clipnorm)

            optimizer.step()

            loss_value = loss.item()

            losses[i].append(loss_value)

            y_prob = F.softmax(y, dim=1)
            y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()])
            y_trues = np.concatenate([y_trues, label.cpu().numpy()])

            metric = calc_cls_measures(y_probs, y_trues)
            metrics.append(metric)

    train_loss_epoch = [np.round(np.mean(loss_s), 4) for loss_s in losses]
    # metrics = calc_cls_measures(y_probs, y_trues)
    return train_loss_epoch, metrics
Esempio n. 23
0
def evaluateMultiRes(model, test_loader, criterion, entropy_loss_func, opts):
    """ Evaluate a single epoch """

    y_probs = np.zeros((0, len(test_loader.dataset.CLASSES)), np.float)
    y_trues = np.zeros((0), np.int)
    losses = []

    # Put model in eval mode
    model.eval()

    for i, (x_lows, x_highs, label) in enumerate(tqdm(test_loader)):

        x_lows, x_highs, label = move_to([x_lows, x_highs, label], opts.device)

        y, attention_maps, patches, x_lows = model(x_lows, x_highs)

        ## visualize
        # for i, (scale, x_low) in  enumerate(zip(model.scales, x_lows)):
        #     if type(attention_maps) is list:
        #         ats_map = attention_maps[i]
        #         showPatch()

        if type(attention_maps) is list:

            entropy_loss = torch.tensor([
                entropy_loss_func(attention_map)
                for attention_map in attention_maps
            ]).sum() / len(opts.scales)

            loss = criterion(y, label) - entropy_loss
        else:
            entropy_loss = entropy_loss_func(attention_maps)
            loss = criterion(y, label) - entropy_loss

        loss_value = loss.item()
        losses.append(loss_value)

        y_prob = F.softmax(y, dim=1)
        y_probs = np.concatenate([y_probs, y_prob.detach().cpu().numpy()])
        y_trues = np.concatenate([y_trues, label.cpu().numpy()])

        if opts.visualize:
            for b in range(patches.shape[0]):
                batch_patches = patches[b]
                # patchGrid(batch_patches, (3, 5))
                if type(attention_maps) is list:
                    batch_maps = [
                        attention_map[b].cpu().numpy()
                        for attention_map in attention_maps
                    ]
                    for attention_map in batch_maps:
                        print(np.max(attention_map))
                        print(np.min(attention_map))
                    # batch_maps = [attention_maps[i][b] for i in range(len(model.scales))]
                else:
                    # batch_maps = [attention_maps[b] for i in range(len(model.scales))]
                    batch_maps = [attention_maps[b].cpu().numpy()]
                batch_imgs = [x_lows[i][b] for i in range(len(model.scales))]
                # mapGrid(batch_maps, batch_imgs, model.scales)
                patchGrid(batch_patches, batch_maps, batch_imgs, (3, 5))

    test_loss_epoch = np.round(np.mean(losses), 4)
    metrics = calc_cls_measures(y_probs, y_trues)
    return test_loss_epoch, metrics
Esempio n. 24
0
 def eval_model_bat(bat):
     with torch.no_grad():
         cost, _ = model(move_to(bat, opts.device), only_encoder=False)
     return cost.data.cpu()
Esempio n. 25
0
def _eval_dataset(model, dataset, width, softmax_temp, opts, device):

    model.to(device)
    model.eval()

    model.set_decode_type(
        "greedy" if opts.decode_strategy in ('bs', 'greedy') else "sampling",
        temp=softmax_temp)

    dataloader = DataLoader(dataset, batch_size=opts.eval_batch_size)

    results = []
    for batch in tqdm(dataloader, disable=opts.no_progress_bar):
        batch = move_to(batch, device)

        start = time.time()
        with torch.no_grad():
            if opts.decode_strategy in ('sample', 'greedy'):
                if opts.decode_strategy == 'greedy':
                    assert width == 0, "Do not set width when using greedy"
                    assert opts.eval_batch_size <= opts.max_calc_batch_size, \
                        "eval_batch_size should be smaller than calc batch size"
                    batch_rep = 1
                    iter_rep = 1
                elif width * opts.eval_batch_size > opts.max_calc_batch_size:
                    assert opts.eval_batch_size == 1
                    assert width % opts.max_calc_batch_size == 0
                    batch_rep = opts.max_calc_batch_size
                    iter_rep = width // opts.max_calc_batch_size
                else:
                    batch_rep = width
                    iter_rep = 1
                assert batch_rep > 0
                # This returns (batch_size, iter_rep shape)
                sequences, costs = model.sample_many(batch, batch_rep=batch_rep, iter_rep=iter_rep)
                batch_size = len(costs)
                ids = torch.arange(batch_size, dtype=torch.int64, device=costs.device)
            else:
                assert opts.decode_strategy == 'bs'

                cum_log_p, sequences, costs, ids, batch_size = model.beam_search(
                    batch, beam_size=width,
                    compress_mask=opts.compress_mask,
                    max_calc_batch_size=opts.max_calc_batch_size
                )

        if sequences is None:
            sequences = [None] * batch_size
            costs = [math.inf] * batch_size
        else:
            sequences, costs = get_best(
                sequences.cpu().numpy(), costs.cpu().numpy(),
                ids.cpu().numpy() if ids is not None else None,
                batch_size
            )
        duration = time.time() - start
        for seq, cost in zip(sequences, costs):
            if model.problem.NAME == "tsp":
                seq = seq.tolist()  # No need to trim as all are same length
            elif model.problem.NAME in ("cvrp", "sdvrp"):
                seq = np.trim_zeros(seq).tolist() + [0]  # Add depot
            elif model.problem.NAME in ("op", "pctsp"):
                seq = np.trim_zeros(seq)  # We have the convention to exclude the depot
            else:
                assert False, "Unkown problem: {}".format(model.problem.NAME)
            # Note VRP only
            results.append((cost, seq, duration))

    return results
def train_batch(
        model,
        optimizer,
        baseline,
        epoch,
        batch_id,
        step,
        batch,
        tb_logger,
        opts
):
    x, bl_val = baseline.unwrap_batch(batch) ##x are states or nodes - (obs in vpg.py) 
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None
    
    # Evaluate model, get costs and log probabilities
    cost, log_likelihood = model(x) ##likelihood/probability is POLICY distribution used to pick actions or paths and cost is a vector with tour times(this could be rewards). Check _compute_policy_entropy in vpg.py
    
    #Check sizes
    print('---Checking data Sizes---')
    for key_x in x:
        print('Batch ID:', batch_id, '->', key_x, '->', x[key_x].shape)

    print('Batch ID:',batch_id,'-> Cost ->',cost.shape)

    #Synthetic construction of required Garage input
    obs_garage =  x['loc'].clone().detach().cpu()
    rewards_garage = cost.clone().detach().cpu().numpy()
    #padded_rewards_garage = pad_tensor(rewards_garage, len(rewards_garage), mode='last')
  
    padded_rewards_garage = rewards_garage.reshape(rewards_garage.shape[0],1)

    lens = [(obs_garage.shape[1]-1) for i in range(obs_garage.shape[0])]
    
    eps_dict = {
        'padded_observations': obs_garage,
        'padded_rewards': padded_rewards_garage,
        'lengths': lens,
        'observations': obs_garage,
        'rewards': rewards_garage,
        'actions': obs_garage
    }

    eps = SimpleNamespace(**eps_dict)

    env_spec_dict = {
      'max_episode_length': 20
    }
    env_spec = SimpleNamespace(**env_spec_dict)

    vpg = VPG(
      env_spec = env_spec,
      policy = None,
      value_function = None,
      sampler = None,
      policy_optimizer = optimizer,
      vf_optimizer = optimizer
    )
    print('VPG run' + str(vpg._train_once(1, eps)))
    

    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0)

    #print('VPG run loss: ' + str(vpg._compute_advantage(cost, lens, bl_val)))

    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reinforce_loss + bl_loss

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step,
                   log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)
Esempio n. 27
0
 def move(self, to_path=None):
     utils.move_to(self.path, to_path)
Esempio n. 28
0
def train_batch(
        problem,
        model,
        optimizer,
        baseline,
        epoch,
        batch_id,
        step,
        batch,
        tb_logger,
        opts,
        pbar
):
    
    solution = move_to(
        problem.get_initial_solutions(opts.init_val_met, batch), opts.device)


    if problem.NAME == 'tsp':
        x = batch
        
    else:
        assert False, "Unsupported problem: {}".format(problem.NAME)
    
    x_input = move_to(x, opts.device) # batch_size, graph_size, 2
    batch = move_to(batch, opts.device) # batch_size, graph_size, 2
    exchange = None	
    
    #update best_so_far
    best_so_far = problem.get_costs(batch, solution)
    initial_cost = best_so_far.clone()
    
    # params
    gamma = opts.gamma
    n_step = opts.n_step
    T = opts.T_train
    t = 0
    
    while t < T:
    
        baseline_val = []
        baseline_val_detached = []
        log_likelihood = []
        reward = []
        
        t_s = t
        
        total_cost = 0
        exchange_history = []              
        
        while t - t_s < n_step and not (t == T):
            
            
            # get estimated value from baseline
            bl_val_detached, bl_val = baseline.eval(x_input, solution)
            
            baseline_val_detached.append(bl_val_detached)
            baseline_val.append(bl_val)
            
            # get model output
            exchange, log_lh = model( x_input, 
                                      solution,
                                      exchange, 
                                      do_sample = True)
            
            exchange_history.append(exchange)
            log_likelihood.append(log_lh)
            
            # state transient
            solution = problem.step(solution, exchange)
            solution = move_to(solution, opts.device)
        
            # calc reward
            cost = problem.get_costs(batch, solution)
            total_cost = total_cost + cost
            best_for_now = torch.cat((best_so_far[None,:], cost[None,:]),0).min(0)[0]
            reward.append(best_so_far - best_for_now)
            best_so_far = best_for_now
            
            # next            
            t = t + 1
        
        # Get discounted R
        Reward = []
        
        total_cost = total_cost / (t-t_s)
        
        reward_reversed = reward[::-1]
        next_return, _  = baseline.eval(x_input, solution)

        for r in range(len(reward_reversed)):     
            R = next_return * gamma + reward_reversed[r]            
            Reward.append(R)
            next_return = R       
        
        Reward = torch.stack(Reward[::-1], 0)
        baseline_val = torch.stack(baseline_val,0)
        baseline_val_detached = torch.stack(baseline_val_detached,0)
        log_likelihood = torch.stack(log_likelihood,0)

        # calculate loss
        criteria = torch.nn.MSELoss() 
        baseline_loss = criteria(Reward, baseline_val)
        reinforce_loss = - ((Reward - baseline_val_detached)*log_likelihood).mean()
        loss =  baseline_loss + reinforce_loss
        
        # update gradient step
        optimizer.zero_grad()
        loss.backward()
        
        #Clip gradient norms and get (clipped) gradient norms for logging
        grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
        
        optimizer.step()
    
        # Logging to tensorboard
        if(not opts.no_tb):
            current_step = int(step * T / n_step + t // n_step)
            if current_step % int(opts.log_step) == 0:
                log_to_tb_train(tb_logger, optimizer, model, baseline, total_cost, grad_norms, reward, 
                   exchange_history, reinforce_loss, baseline_loss, log_likelihood, initial_cost, current_step)
        
        pbar.update(1)
Esempio n. 29
0
 def eval_model_bat(batch):
     with torch.no_grad():
         cost, _ = model(move_to(batch, opts.device))
     return cost.data.cpu()
Esempio n. 30
0
def validate(problem, model, val_dataset, tb_logger, opts, _id = None):
    # Validate mode
    print('\nValidating...', flush=True)
    model.eval()
    
    init_value = []
    best_value = []
    improvement = []
    reward = []
    time_used = []
    
    for batch in tqdm(DataLoader(val_dataset, batch_size = opts.eval_batch_size), 
                        disable = opts.no_progress_bar or opts.val_size == opts.eval_batch_size, 
                        desc = 'validate', bar_format='{l_bar}{bar:20}{r_bar}{bar:-20b}'):
        
        #initial solutions
        initial_solution = move_to(
                problem.get_initial_solutions(opts.init_val_met, batch), opts.device)
        
        if problem.NAME == 'tsp':
            x = batch
            
        else:
            assert False, "Unsupported problem: {}".format(problem.NAME)
        
        x_input = move_to(x, opts.device) # batch_size, graph_size, 2
        batch = move_to(batch, opts.device) # batch_size, graph_size, 2
        
        initial_value = problem.get_costs(batch, initial_solution)
        init_value.append(initial_value)
        
        # run the model
        s_time = time.time()
        bv, improve, r, _  = rollout(problem, 
                                     model, 
                                     x_input,
                                     batch, 
                                     initial_solution,
                                     initial_value,
                                     opts,
                                     T=opts.T_max,
                                     do_sample = True)
        
       
        duration = time.time() - s_time
        time_used.append(duration)
        best_value.append(bv.clone())
        improvement.append(improve.clone())
        reward.append(r.clone())

    best_value = torch.cat(best_value,0)
    improvement = torch.cat(improvement,0)
    reward = torch.cat(reward,0)
    init_value = torch.cat(init_value,0).view(-1,1)
    time_used = torch.tensor(time_used)
    
    # log to screen
    log_to_screen(time_used, 
                  init_value, 
                  best_value, 
                  reward, 
                  improvement, 
                  batch_size = opts.eval_batch_size, 
                  dataset_size = len(val_dataset), 
                  T = opts.T_max)
    
    # log to tb
    if(not opts.no_tb):
        log_to_tb_val(tb_logger,
                      time_used, 
                      init_value, 
                      best_value, 
                      reward, 
                      improvement, 
                      batch_size = opts.eval_batch_size, 
                      dataset_size = len(val_dataset), 
                      T = opts.T_max,
                      epoch = _id)
    
    # save to file
    if _id is not None:
        torch.save(
        {
            'init_value': init_value,
            'best_value': best_value,
            'improvement': improvement,
            'reward': reward,
            'time_used': time_used,
        },
        os.path.join(opts.save_dir, 'validate-{}.pt'.format(_id)))