예제 #1
0
    def forward(self, s_features, p_features, n_features, margin):
        dist_p = L1Loss()(s_features, p_features)
        dist_n = L1Loss()(s_features, n_features)

        triplet_loss = margin + dist_p - dist_n

        return triplet_loss if triplet_loss > 0 else triplet_loss * 0
예제 #2
0
def train(num_epochs, model, device, train_loader, val_loader, optimizer,
          lr_scheduler, prediction_dir, print_iter):
    criterion = MAELoss()
    criterion.to(device)
    model.to(device)
    for epoch in range(num_epochs):
        print(epoch)
        count = 0
        for i, datas in enumerate(train_loader):
            datas, labels = datas
            datas, labels = handler(datas, labels, device)
            for j in range(len(datas)):
                pred = model(datas[j])
                loss = criterion(pred, labels[j])

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                if count % print_iter == 0:
                    print('epoch : {} [{}/{}], loss : {}'.format(
                        epoch, count, len(train_loader), loss))
                count += 1
        validation(model, device, val_loader, prediction_dir)
        save_model('{}'.format(epoch), model, optimizer, lr_scheduler)

        lr_scheduler.step()
예제 #3
0
def get_correct_num(sample_features, positive_features, negative_features):
    correct_num = 0
    batch_size = len(sample_features)

    for i in range(batch_size):
        dist_p = L1Loss()(sample_features[i], positive_features[i])
        dist_n = L1Loss()(sample_features[i], negative_features[i])

        if dist_p < dist_n:
            correct_num += 1

    return correct_num
예제 #4
0
def main():
    args = parse_training_args("ESRGAN")
    epochs = args.epochs
    load_path = args.load
    init_path = args.init
    out_path = args.out
    cuda = args.cuda
    device = torch.device(
        'cuda' if torch.cuda.is_available() and cuda else 'cpu')

    g_net = DenseGenerator().to(device)
    g_criterion = PerceptualLoss(
        feature_extractor=TruncatedVgg(with_activation_layer=False),
        content_criterion=L1Loss(),
        adversarial_criterion=BCEWithLogitsLoss(),
    ).to(device)
    g_optimizer = Adam(params=filter(lambda p: p.requires_grad,
                                     g_net.parameters()),
                       lr=1e-4)
    g_scheduler = ReduceLROnPlateau(optimizer=g_optimizer,
                                    factor=0.5,
                                    patience=3,
                                    verbose=True)

    d_net = Discriminator().to(device)
    d_criterion = DiscriminatorLoss(criterion=BCEWithLogitsLoss()).to(device)
    d_optimizer = Adam(params=filter(lambda p: p.requires_grad,
                                     d_net.parameters()),
                       lr=1e-4)
    d_scheduler = ReduceLROnPlateau(optimizer=d_optimizer,
                                    factor=0.5,
                                    patience=3,
                                    verbose=True)

    converter = Converter()
    dataset = ImageNetDataset(json_path='data/train.json', converter=converter)
    data_loader = DataLoader(dataset=dataset,
                             batch_size=4,
                             num_workers=4,
                             pin_memory=True,
                             shuffle=True)

    trainer = ReGANTrainer(g_net=g_net,
                           g_criterion=g_criterion,
                           g_optimizer=g_optimizer,
                           g_scheduler=g_scheduler,
                           d_net=d_net,
                           d_criterion=d_criterion,
                           d_optimizer=d_optimizer,
                           d_scheduler=d_scheduler,
                           data_loader=data_loader,
                           device=device)

    if init_path:
        trainer.load_pretrained_generator(init_path)

    if load_path:
        trainer.load(load_path)

    trainer.train(max_epochs=epochs, save_path=out_path)
예제 #5
0
    def test_edges(self, z, batch):
        r"""Given latent variables :obj:`z`, positive edges
        :obj:`pos_edge_index` and negative edges :obj:`neg_edge_index`,
        computes the L1loss of the predicted edges vs the real edges.
        Args:
            z (Tensor): The latent space :math:`\mathbf{Z}`.
            pos_edge_index (LongTensor): The positive edges to evaluate
                against.
            neg_edge_index (LongTensor): The negative edges to evaluate
                against.
        """
        # Do not include self-loops in negative samples
        pos_edge_index, _ = remove_self_loops(batch.edge_index)
        pos_edge_index, _ = add_self_loops(batch.edge_index)

        neg_edge_index = negative_sampling(batch.edge_index, z.size(0))

        neg_y = z.new_zeros(neg_edge_index.size(1))
        y = torch.cat([batch.edge_attr, neg_y], dim=0)

        pos_pred = self.edge_decoder.get_means(z, batch.edge_index)
        neg_pred = self.edge_decoder.get_means(z, neg_edge_index)

        pred = torch.cat([pos_pred, neg_pred], dim=0)

        y, pred = y.detach().cpu().numpy(), pred.detach().cpu().numpy()

        return L1Loss(pred, y)
def create_loss(args):
    name = args.loss.lower()

    if name == 'l1':
        from torch.nn import L1Loss
        loss = L1Loss(reduction='sum')

    elif name == 'l2':
        from torch.nn import MSELoss
        loss = MSELoss(reduction='sum')

    elif name == 'bce':
        from torch.nn import BCELoss
        loss = BCELoss(reduction='sum')

    elif name == 'diceloss':
        loss = DiceLoss()
    else:
        raise ValueError('loss must be one of l1, l2, bce,diceloss')

    return loss


# def dice_loss(probs,target):
#     """
#     input is a torch variable of size BatchxnclassesxHxWxD representing log probabilities for each class
#     target is a 1-hot representation of the groundtruth, shoud have same size as the input
#     """
#     eps = 1e-6
#     dims = (2,3,4)

#     intersection = th.sum(probs*target,dims)
#     cardinality = th.sum(probs+target,dims)
#     dice_score = 2. * intersection/(cardinality+eps)
#     return th.mean(1-dice_score)
예제 #7
0
def L1(output, label, reduction="mean", scaling_factor=1):
    """
    Calculate the mean square error between the output variable from the network and the target

    Parameters
    ----------
    output : torch.Tensor
        The output generated usually by the network
    target : torch.Tensor
        The label for the corresponding Tensor for which the output was generated
    reduction : string, optional
        DESCRIPTION. The default is 'mean'.
    scaling_factor : integer, optional
        The scaling factor to multiply the label with

    Returns
    -------
    loss : torch.Tensor
        Computed Mean Squared Error loss for the output and label

    """
    scaling_factor = torch.as_tensor(scaling_factor)
    label = label.float()
    label = label * scaling_factor
    loss_fn = L1Loss(reduction=reduction)
    iflat = output.contiguous().view(-1)
    tflat = label.contiguous().view(-1)
    loss = loss_fn(iflat, tflat)
    return loss
예제 #8
0
    def __init__(self,
                 hidden_size=100,
                 num_layers=1,
                 num_roads=192,
                 prev_timesteps=6,
                 prediction_steps=6):
        super().__init__(name="Sequence2Sequence")

        self.prev_timesteps = prev_timesteps
        self.num_roads = num_roads
        self.prediction_steps = prediction_steps

        self.num_layers = num_layers
        self.hidden_size = hidden_size

        self.encoder = GRU(num_roads,
                           hidden_size,
                           batch_first=True,
                           num_layers=num_layers)

        self.decoder = GRU(num_roads,
                           hidden_size,
                           batch_first=True,
                           num_layers=num_layers)
        #self.activation = Sig()
        self.decoder_l1 = Linear(hidden_size, num_roads)

        self.criterion = L1Loss()
예제 #9
0
def energy_mad(net, X, y):
    l1_loss = L1Loss(reduction="none")
    energy_pred, _ = net.forward(X)
    device = energy_pred.device
    if not hasattr(X, "scalings"):
        X = X.dataset
    num_atoms = torch.FloatTensor(np.concatenate(y[1::3])).reshape(
        -1, 1).to(device)
    energy_target = torch.tensor(np.concatenate(y[0::3])).to(device).reshape(
        -1, 1)
    if X.scaling_scheme is not "log":
        sd_scaling = X.scalings[0]
        mean_scaling = X.scalings[1]
        raw_preds = (energy_pred * sd_scaling) + mean_scaling
        raw_preds_per_atom = torch.div(raw_preds, num_atoms)
        raw_targets = (energy_target * sd_scaling) + mean_scaling
        target_per_atom = torch.div(raw_targets, num_atoms)
        energy_loss = l1_loss(raw_preds_per_atom, target_per_atom)
        energy_mad_loss = torch.median(energy_loss)
    else:
        raw_preds = torch.exp(energy_pred) - 1
        raw_preds_per_atom = torch.div(raw_preds, num_atoms)
        raw_targets = torch.exp(energy_target) - 1
        target_per_atom = torch.div(raw_targets, num_atoms)
        energy_loss = l1_loss(raw_preds_per_atom, target_per_atom)
        energy_mad_loss = torch.median(energy_loss)
    return energy_mad_loss
    def test_combined_loss(self):
        nodes = [
            Node(Attribute(Tensor([-4., -8.]))),
            Node(Attribute(Tensor([1., 5.]))),
            Node(Attribute(Tensor([4., 4.]))),
            Node(Attribute(Tensor([0., 1., 5.])))
        ]
        edges = [
            Edge(nodes[0], nodes[1], Attribute(Tensor([1., 2., 3.]))),
            Edge(nodes[1], nodes[2], Attribute(Tensor([1., 2.]))),
            Edge(nodes[2], nodes[1], Attribute(Tensor([5.]))),
            Edge(nodes[1], nodes[3], Attribute(Tensor([1., 2., 3., 4.])))
        ]
        u = Attribute(
            Tensor([[1., 2., 4., 3.], [8., 3., 0., 3.], [1., 7., 5., 3.]]))
        g1 = Graph(nodes, edges, attr=u)

        g2 = deepcopy(g1)
        g2.ordered_nodes[0].attr.val = Tensor([-4., -8.1])
        g2.ordered_nodes[1].attr.val = Tensor([2., 6.])
        g2.ordered_nodes[3].attr.val = Tensor([1., 1.5, 5.])
        g2.ordered_edges[0].attr.val = Tensor([2., 3., 4.])
        g2.ordered_edges[1].attr.val = Tensor([5., 10.])
        g2.attr.val = Tensor([[2., 2., 4., 3.], [100, 3., 1., 3.],
                              [1., 14., 5., 3.]])

        loss = GraphLoss(e_fn=MSELoss(), v_fn=L1Loss(), u_fn=MSELoss())
        loss_val = loss(g1, g2).detach().numpy()
        e_loss = (1. + (4**2 + 8**2) / 2) / 4
        v_loss = (.1 / 2 + 2. / 2 + (1 + .5) / 3) / 4
        u_loss = (1 + (8 - 100)**2 + 1 + 7**2) / 12 / 1
        target_loss_val = v_loss + e_loss + u_loss

        self.assertTrue(np.isclose(loss_val, target_loss_val))
예제 #11
0
def train(param, device):
    model = Model(param)
    state_dict = torch.load(CKPT)
    new_dict = model.state_dict().copy()
    for k, v in state_dict.items():
        if k.startswith('t_encoder'):
            new_dict[k] = state_dict[k]
    model.load_state_dict(new_dict)
    for parameter in model.t_encoder.parameters():
        parameter.requires_grad = False
    optimizer = AdamW(model.parameters(), lr=param.lr, eps=1e-8)
    update_steps = MAX_EPOCH * len(train_loader)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=update_steps)
    loss_fn = L1Loss()
    trainer = create_trainer(model, optimizer, scheduler, loss_fn, MAX_GRAD_NORM, device)
    dev_evaluator = create_evaluator(model, val_metrics, device)
    trainer.add_event_handler(Events.ITERATION_COMPLETED(every=10), log_training_loss)
    trainer.add_event_handler(Events.EPOCH_COMPLETED, log_results, *[dev_evaluator, dev_loader, 'Dev'])
    es_handler = EarlyStopping(patience=PATIENCE, score_function=score_fn, trainer=trainer)
    dev_evaluator.add_event_handler(Events.COMPLETED, es_handler)
    ckpt_handler = ModelCheckpoint(SAVE_PATH, f'lr_{param.lr}', score_function=score_fn,
                                   score_name='score', require_empty=True)
    dev_evaluator.add_event_handler(Events.COMPLETED, ckpt_handler, {SAVE_PATH.split("/")[-1]: model})
    print(f'Start running {SAVE_PATH.split("/")[-1]} at device: {DEVICE}\tlr: {param.lr}')
    trainer.run(train_loader, max_epochs=MAX_EPOCH)
예제 #12
0
def model_loss(model, dataset, train=False, optimizer=None):
    performance = L1Loss()
    score_metric = R2Score()

    avg_loss = 0
    avg_score = 0
    avg_mse = 0
    count = 0

    for input, output in iter(dataset):
        predictions = model.feed(input)

        loss = performance(predictions, output)

        score_metric.update([predictions, output])
        score = score_metric.compute()

        mse = mean_squared_error(output.cpu(),
                                 predictions.cpu().detach().numpy())

        if (train):
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        avg_loss += loss.item()
        avg_score += score
        count += 1
        avg_mse += mse

    return avg_loss / count, avg_score / count, avg_mse / count
예제 #13
0
def model_loss(model, dataset, train=False, optimizer= None):
  #cycle through batches and get avg L1loss
  performance=L1Loss()
  score_metric=R2Score()

  avg_loss=0
  avg_score=0
  count=0
  for input, output in iter(dataset):
    # get the model predictions for training dataset
    predictions=model.feed(input)
    #get the model loss
    loss= performance(predictions, output)
    #get the model r2 score
    score_metric.update([predictions,output])
    score= score_metric.compute()

    if(train):
      #clear any errors so that they dont commulate
      optimizer.zero_grad()
      #compute gradiennts for our optimizer
      loss.backward()
      # use the optimizer to update the model parameters based on gradients
      optimizer.step()

    avg_loss +=loss.item()
    avg_score +=score
    count +=1
  return avg_loss / count, avg_score/count
예제 #14
0
    def __init__(self):
        self.log_dir = settings.log_dir
        self.model_dir = settings.model_dir
        ensure_dir(settings.log_dir)
        ensure_dir(settings.model_dir)
        logger.info('set log dir as %s' % settings.log_dir)
        logger.info('set model dir as %s' % settings.model_dir)

        self.net = TFN().cuda()
        self.crit = L1Loss().cuda()
        self.ssim = SSIM().cuda()
        self.msssim = MSSSIM().cuda()

        self.step = 0
        self.perceptual_weight = settings.perceptual_weight
        self.loss_weight = settings.loss_weight
        self.total_variation_weight = settings.total_variation_weight
        self.ssim_loss_weight = settings.ssim_loss_weight
        self.save_steps = settings.save_steps
        self.num_workers = settings.num_workers
        self.batch_size = settings.batch_size
        self.writers = {}
        self.dataloaders = {}

        self.opt = Adam(self.net.parameters(), lr=settings.lr)
        self.sche = MultiStepLR(
            self.opt,
            milestones=[11000, 70000, 90000, 110000, 130000],
            gamma=0.1)
def model_loss(model, dataset, train=False, optimizer=None):
  performance=L1Loss()
  score_metric=R2Score()

  avg_loss=0
  avg_score=0
  count=0

  for input, output in iter(dataset):
    prediction=model.feed(input)
    loss=performance(prediction,output)
    score_metric.update([prediction,output])
    score=score_metric.compute()
    

    if(train):
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

    avg_loss+=loss.item()
    avg_score+=score
    count+=1

  return avg_loss / count, avg_score / count
예제 #16
0
 def _define_loss(self):
     loss_functions = {
         "Generator": BCELoss(),
         "Adversary": BCELoss(),
         "L1": L1Loss()
     }
     return loss_functions
 def __init__(self, hparams: AttributeDict):
     super(LitModelLongitudinal, self).__init__()
     self.hparams = hparams
     self.model = UNet(
         in_channels=hparams.in_channels,
         out_classes=1,
         dimensions=3,
         padding_mode="zeros",
         activation=hparams.activation,
         conv_num_in_layer=[1, 2, 3, 3, 3],
         residual=False,
         out_channels_first_layer=16,
         kernel_size=5,
         normalization=hparams.normalization,
         downsampling_type="max",
         use_sigmoid=False,
         use_bias=True,
     )
     self.sigmoid = Sigmoid()
     if self.hparams.loss == "l2":
         self.criterion = MSELoss()
     elif self.hparams.loss == "l1":
         self.criterion = L1Loss()
     elif self.hparams.loss == "smoothl1":
         self.criterion = SmoothL1Loss()
     self.train_log_step = random.randint(1, 500)
     self.val_log_step = random.randint(1, 100)
     self.clip_min = self.hparams.clip_min
     self.clip_max = self.hparams.clip_max
예제 #18
0
def create_loss(name, weight, ignore_index=None, pos_weight=None):
    if name == 'BCEWithLogitsLoss':
        return nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    elif name == 'BCEDiceLoss':
        return BCEDiceLoss(alpha=1, beta=1)
    elif name == 'CrossEntropyLoss':
        if ignore_index is None:
            ignore_index = -100  # use the default 'ignore_index' as defined in the CrossEntropyLoss
        return nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index)
    elif name == 'WeightedCrossEntropyLoss':
        if ignore_index is None:
            ignore_index = -100  # use the default 'ignore_index' as defined in the CrossEntropyLoss
        return WeightedCrossEntropyLoss(ignore_index=ignore_index)
    elif name == 'PixelWiseCrossEntropyLoss':
        return PixelWiseCrossEntropyLoss(class_weights=weight,
                                         ignore_index=ignore_index)
    elif name == 'GeneralizedDiceLoss':

        return GeneralizedDiceLoss(sigmoid_normalization=False)
    elif name == 'DiceLoss':
        return DiceLoss(weight=weight, sigmoid_normalization=False)
    elif name == 'TagsAngularLoss':
        return TagsAngularLoss()
    elif name == 'MSELoss':
        return MSELoss()
    elif name == 'SmoothL1Loss':
        return SmoothL1Loss()
    elif name == 'L1Loss':
        return L1Loss()
    elif name == 'WeightedSmoothL1Loss':
        return WeightedSmoothL1Loss()
    else:
        raise RuntimeError(
            f"Unsupported loss function: '{name}'. Supported losses: {SUPPORTED_LOSSES}"
        )
예제 #19
0
    def __init__(self,
                 optimizer=Adam,
                 optim_args={},
                 L2_loss=MSELoss(),
                 L1_loss=L1Loss()):
        """Constrcutor for solver class.

        Parameters
        ----------
        optimizer : torch.optim
            Optimizer to use (default: Adam)
        optim_args : dict
            Arguments for optimizer which are merged with default_adam_args
        L2_loss : torch.nn Loss
            L2 loss function to use (default: MSELoss)
        L1_loss : torch.nn Loss
            L1 loss function to use (default: L1Loss), only used as comparision 
        """

        optim_args_merged = self.default_adam_args.copy()
        optim_args_merged.update(optim_args)
        self.optim_args = optim_args_merged
        self.optimizer = optimizer
        self.L1_loss = L1_loss
        self.L2_loss = L2_loss

        self._reset_histories()
예제 #20
0
 def __init__(self,env):
     self.env = env
     self.args = env.args
     # initialization
     self.ConvBoundarySeg = FPNSeg()
     self.ConvBoundaryAgent = FPNAgent(self.args.device)
     self.ConvBoundarySeg.to(device=self.args.device)
     self.ConvBoundaryAgent.to(device=self.args.device)
     # tensorboard
     if not self.args.test:
         self.writer = SummaryWriter('./records/tensorboard')
     # ====================optimizer=======================
     self.optimizer_seg = optim.Adam(list(self.ConvBoundarySeg.parameters()), lr=self.args.lr_rate, weight_decay=self.args.weight_decay)
     self.optimizer_agent = optim.Adam(list(self.ConvBoundaryAgent.parameters()), lr=self.args.lr_rate, weight_decay=self.args.weight_decay)
     # =====================init losses=======================
     criterion_l1 = L1Loss(reduction='mean')
     criterion_bce = BCEWithLogitsLoss()
     criterion_ce = CrossEntropyLoss()
     self.criterions = {"ce":criterion_ce,'l1':criterion_l1,"bce": criterion_bce,'cos':cos_loss()}
     # =====================Load data========================
     self.dataset_train = DatasetConvBoundary(self.args,mode='train')
     dataset_valid = DatasetConvBoundary(self.args,mode="valid")
     self.dataloader_train = DataLoader(self.dataset_train, batch_size=1, shuffle=True,collate_fn=self.ConvBoundary_collate)
     self.dataloader_valid = DataLoader(dataset_valid, batch_size=1, shuffle=False,collate_fn=self.ConvBoundary_collate)
     print("Dataset modes -> Train: {} | Valid: {}\n".format(len(self.dataset_train), len(dataset_valid)))
     #================recorded list for backpropagation==============
     self.best_f1 = 0
     self.load_checkpoints()
     self._freeze()
예제 #21
0
 def loss_fn(self, output_distr, targets):
     output = output_distr
     l_depth = L1Loss()(output, targets)
     l_ssim = t_clamp((1 - ssim(output, targets, val_range=80.0)) * 0.5, 0,
                      1)
     l_grad = image_gradient_loss(output, targets)
     return (1.0 * l_ssim) + (1.0 * l_grad) + (0.1 * l_depth)
예제 #22
0
def train_model(train_dl, model):
    # define the optimization
    print("training begin")
    criterion = L1Loss() #Check other  loss functions
    #optimizer = Adam(model.parameters(), lr=0.001, betas=(0.09, 0.999), eps=1e-08, weight_decay=0) # amsgrad=False)
    optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)#check if other optimizaters work
    # enumerate epochs
    for epoch in range(300):
        # enumerate mini batches
        #print ('epoch',epoch)
        for i, (inputs, targets) in enumerate(train_dl):
            #to gpu
            #inputs, targets = inputs, targets
            # clear the gradients
            optimizer.zero_grad()
            # compute the model output
            yhat = model(inputs)
            # calculate loss
            loss = criterion(yhat, targets)
            # credit assignment
            loss.backward()
            #print loss
            # update model weights
            optimizer.step()
            #print('epoch {}, loss {}'.format(epoch, loss.item()))
        print('epoch {}, loss {}'.format(epoch, loss.data))
        print(evaluate_model(test_dl,model))
예제 #23
0
파일: 1116552.py 프로젝트: palakbh18/NLP
def model_loss(model, dataset, train = False, optimizer = None):
# Cycle through the batches and get the average L1 loss
  performance = L1Loss()
  score_metric = R2Score()
  avg_loss = 0
  avg_score = 0
  count = 0
  for input, output in iter(dataset):
# Get the model's predictions for the training dataset
    predictions = model.feed(input)
# Get the model's loss
    loss = performance(predictions, output)
# Get the model's R^2 score
    score_metric.update([predictions, output])
    score = score_metric.compute()
    if(train):
# Clear any errors so they don't cummulate
      optimizer.zero_grad()
# Compute the gradients for our optimizer
      loss.backward()
# Use the optimizer to update the model's parameters based on the gradients
      optimizer.step()
# Store the loss and update the counter
    avg_loss += loss.item()
    avg_score += score
    count += 1
  return avg_loss / count, avg_score / count
예제 #24
0
    def __init__(self, cfg=None, mode="train"):
        super().__init__(cfg, mode)

        if mode == "train":
            self.loss = L1Loss()
            self.pixel_loss_param = cfg['CNN'].getfloat('PixelLossParam',
                                                        fallback=1)
def model_loss(model,dataset,train = False, optimizer = None):
  performance = L1Loss()
  score_metric = R2Score()
  avg_loss = 0
  avg_score = 0
  count = 0

  for input,output in iter(dataset):
    predictions = model.feed(input)

    loss = performance(predictions,output)

    score_metric.update([predictions,output])
    score = score_metric.compute()

    if(train):
      #clear any errors so they dont cummulate
      optimizer.zero_grad()

      loss.backward()

      #use the optimizer to update the models parameters based on the gradients
      optimizer.step()

    avg_loss += loss.item()
    avg_score += score
    count += 1

  return avg_loss/count, avg_score/count
예제 #26
0
def update_weights(model, target_model, optimizer, replay_buffer, config):
    batch = ray.get(
        replay_buffer.sample_batch.remote(
            config.num_unroll_steps,
            config.td_steps,
            model=target_model if config.use_target_model else None,
            config=config))
    obs_batch, action_batch, target_reward, target_value, target_policy, indices, weights = batch

    obs_batch = obs_batch.to(config.device)
    action_batch = action_batch.to(config.device).unsqueeze(-1)
    target_reward = target_reward.to(config.device)
    target_value = target_value.to(config.device)
    target_policy = target_policy.to(config.device)
    weights = weights.to(config.device)

    value, _, policy_logits, hidden_state = model.initial_inference(obs_batch)
    predicted_values, predicted_rewards = value, None

    value_loss = config.scalar_loss(value.squeeze(-1), target_value[:, 0])
    new_priority = L1Loss(reduction='none')(
        value.squeeze(-1), target_value[:, 0]).data.cpu().numpy() + 1e-5
    policy_loss = -(torch.log_softmax(policy_logits, dim=1) *
                    target_policy[:, 0]).sum(1)
    reward_loss = torch.zeros(config.batch_size, device=config.device)

    gradient_scale = 1 / config.num_unroll_steps
    for step_i in range(config.num_unroll_steps):
        value, reward, policy_logits, hidden_state = model.recurrent_inference(
            hidden_state, action_batch[:, step_i])
        policy_loss += -(torch.log_softmax(policy_logits, dim=1) *
                         target_policy[:, step_i + 1]).sum(1)
        value_loss += config.scalar_value_loss(value.squeeze(-1),
                                               target_value[:, step_i + 1])
        reward_loss += config.scalar_reward_loss(reward.squeeze(-1),
                                                 target_reward[:, step_i])
        hidden_state.register_hook(lambda grad: grad * 0.5)

        # collected for logging
        predicted_values = torch.cat((predicted_values, value))
        predicted_rewards = reward if predicted_rewards is None else torch.cat(
            (predicted_rewards, reward))

    # optimize
    loss = (policy_loss + config.value_loss_coeff * value_loss + reward_loss)
    weighted_loss = (weights * loss).mean()
    weighted_loss.register_hook(lambda grad: grad * gradient_scale)
    loss = loss.mean()

    optimizer.zero_grad()
    weighted_loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm)
    optimizer.step()

    # update priorities
    replay_buffer.update_priorities.remote(indices, new_priority)

    return weighted_loss.item(), loss.item(), policy_loss.mean().item(), reward_loss.mean().item(), \
           value_loss.mean().item(), target_reward, target_value, predicted_rewards, predicted_values, weights, indices
예제 #27
0
def get_loss_criterion(config):
    """
    Returns the loss function based on provided configuration
    :param config: (dict) a top level configuration object containing the 'loss' key
    :return: an instance of the loss function
    """
    assert 'loss' in config, 'Could not find loss function configuration'
    loss_config = config['loss']
    name = loss_config['name']

    ignore_index = loss_config.get('ignore_index', None)
    weight = loss_config.get('weight', None)

    if weight is not None:
        # convert to cuda tensor if necessary
        weight = torch.tensor(weight).to(config['device'])

    if name == 'BCEWithLogitsLoss':
        skip_last_target = loss_config.get('skip_last_target', False)
        if ignore_index is None and not skip_last_target:
            return nn.BCEWithLogitsLoss()
        else:
            return BCELossWrapper(nn.BCEWithLogitsLoss(),
                                  ignore_index=ignore_index,
                                  skip_last_target=skip_last_target)
    elif name == 'CrossEntropyLoss':
        if ignore_index is None:
            ignore_index = -100  # use the default 'ignore_index' as defined in the CrossEntropyLoss
        return nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index)
    elif name == 'WeightedCrossEntropyLoss':
        if ignore_index is None:
            ignore_index = -100  # use the default 'ignore_index' as defined in the CrossEntropyLoss
        return WeightedCrossEntropyLoss(weight=weight,
                                        ignore_index=ignore_index)
    elif name == 'PixelWiseCrossEntropyLoss':
        return PixelWiseCrossEntropyLoss(class_weights=weight,
                                         ignore_index=ignore_index)
    elif name == 'GeneralizedDiceLoss':
        return GeneralizedDiceLoss(weight=weight, ignore_index=ignore_index)
    elif name == 'DiceLoss':
        sigmoid_normalization = loss_config.get('sigmoid_normalization', True)
        skip_last_target = loss_config.get('skip_last_target', False)
        return DiceLoss(weight=weight,
                        ignore_index=ignore_index,
                        sigmoid_normalization=sigmoid_normalization,
                        skip_last_target=skip_last_target)
    elif name == 'TagsAngularLoss':
        tags_coefficients = loss_config['tags_coefficients']
        return TagsAngularLoss(tags_coefficients)
    elif name == 'MSEWithLogitsLoss':
        return MSEWithLogitsLoss()
    elif name == 'MSELoss':
        return MSELoss()
    elif name == 'SmoothL1Loss':
        return SmoothL1Loss()
    elif name == 'L1Loss':
        return L1Loss()
    else:
        return None
예제 #28
0
def get_loss_criterion(config):
    """
    Returns the loss function based on provided configuration
    :param config: (dict) a top level configuration object containing the 'loss' key
    :return: an instance of the loss function
    """
    assert 'loss' in config, 'Could not find loss function configuration'
    loss_config = config['loss']
    name = loss_config['name']

    ignore_index = loss_config.get('ignore_index', None)
    weight = loss_config.get('weight', None)

    if weight is not None:
        # convert to cuda tensor if necessary
        weight = torch.tensor(weight).to(config['device'])

    if name == 'BCEWithLogitsLoss':
        skip_last_target = loss_config.get('skip_last_target', False)
        if ignore_index is None and not skip_last_target:
            return nn.BCEWithLogitsLoss()
        else:
            return BCELossWrapper(nn.BCEWithLogitsLoss(), ignore_index=ignore_index, skip_last_target=skip_last_target)
    elif name == 'CrossEntropyLoss':
        if ignore_index is None:
            ignore_index = -100  # use the default 'ignore_index' as defined in the CrossEntropyLoss
        return nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index)
    elif name == 'WeightedCrossEntropyLoss':
        if ignore_index is None:
            ignore_index = -100  # use the default 'ignore_index' as defined in the CrossEntropyLoss
        return WeightedCrossEntropyLoss(weight=weight, ignore_index=ignore_index)
    elif name == 'PixelWiseCrossEntropyLoss':
        return PixelWiseCrossEntropyLoss(class_weights=weight, ignore_index=ignore_index)
    elif name == 'GeneralizedDiceLoss':
        return GeneralizedDiceLoss(weight=weight, ignore_index=ignore_index)
    elif name == 'DiceLoss':
        sigmoid_normalization = loss_config.get('sigmoid_normalization', True)
        skip_last_target = loss_config.get('skip_last_target', False)
        return DiceLoss(weight=weight, ignore_index=ignore_index, sigmoid_normalization=sigmoid_normalization,
                        skip_last_target=skip_last_target)
    elif name == 'TagsAngularLoss':
        tags_coefficients = loss_config['tags_coefficients']
        return TagsAngularLoss(tags_coefficients)
    elif name == 'MSEWithLogitsLoss':
        return MSEWithLogitsLoss()
    elif name == 'MSELoss':
        return MSELoss()
    elif name == 'SmoothL1Loss':
        return SmoothL1Loss()
    elif name == 'L1Loss':
        return L1Loss()
    elif name == 'ContrastiveLoss':
        return ContrastiveLoss(loss_config['delta_var'], loss_config['delta_dist'], loss_config['norm'],
                               loss_config['alpha'], loss_config['beta'], loss_config['gamma'])
    elif name == 'WeightedSmoothL1Loss':
        return WeightedSmoothL1Loss(threshold=loss_config['threshold'], initial_weight=loss_config['initial_weight'],
                                    apply_below_threshold=loss_config.get('apply_below_threshold', True))
    else:
        raise RuntimeError(f"Unsupported loss function: '{name}'. Supported losses: {SUPPORTED_LOSSES}")
 def initialize_criterion(self):
     if self.model_level == 'low':
         if self.is_control:
             self.criterion = L1Loss(reduction='sum')
         else:
             self.criterion = BCEWithLogitsLoss(reduction='sum')
     else:
         self.criterion = CrossEntropyLoss()
예제 #30
0
 def __init__(self, mode: str = "l1", reduction: str = "mean"):
     super().__init__()
     mode = mode.strip().lower()
     self.loss = {
         "l1": L1Loss(reduction=reduction),
         "l2": MSELoss(reduction=reduction),
         "smooth": SmoothL1Loss(reduction=reduction),
     }[mode]