예제 #1
0
파일: train.py 프로젝트: aimeng100/pyGAT
def train(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model(features, adj)
    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    acc_train = accuracy(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()

    if not args.fastmode:
        # Evaluate validation set performance separately,
        # deactivates dropout during validation run.
        model.eval()
        output = model(features, adj)

    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.data[0]),
          'acc_train: {:.4f}'.format(acc_train.data[0]),
          'loss_val: {:.4f}'.format(loss_val.data[0]),
          'acc_val: {:.4f}'.format(acc_val.data[0]),
          'time: {:.4f}s'.format(time.time() - t))

    return loss_val.data[0]
예제 #2
0
    def forward(self, y_pred, y_true):
        torch.nn.modules.loss._assert_no_grad(y_true)

        y_pred_log = torch.log(y_pred)
        start_loss = F.nll_loss(y_pred_log[:, 0, :], y_true[:, 0])
        end_loss = F.nll_loss(y_pred_log[:, 1, :], y_true[:, 1])
        return start_loss + end_loss
예제 #3
0
def train(epoch, model):
    #最后的全连接层学习率为前面的10倍
    LEARNING_RATE = lr / math.pow((1 + 10 * (epoch - 1) / epochs), 0.75)
    print("learning rate:", LEARNING_RATE)
    optimizer_fea = torch.optim.SGD([
        {'params': model.sharedNet.parameters()},
        {'params': model.cls_fc.parameters(), 'lr': LEARNING_RATE},
    ], lr=LEARNING_RATE / 10, momentum=momentum, weight_decay=l2_decay)
    optimizer_critic = torch.optim.SGD([
        {'params': model.domain_fc.parameters(), 'lr': LEARNING_RATE}
    ], lr=LEARNING_RATE, momentum=momentum, weight_decay=l2_decay)

    data_source_iter = iter(source_loader)
    data_target_iter = iter(target_train_loader)
    dlabel_src = Variable(torch.ones(batch_size).long().cuda())
    dlabel_tgt = Variable(torch.zeros(batch_size).long().cuda())
    i = 1
    while i <= len_source_loader:
        model.train()

        source_data, source_label = data_source_iter.next()
        if cuda:
            source_data, source_label = source_data.cuda(), source_label.cuda()
        source_data, source_label = Variable(source_data), Variable(source_label)
        clabel_src, dlabel_pred_src = model(source_data)
        label_loss = F.nll_loss(F.log_softmax(clabel_src, dim=1), source_label)
        critic_loss_src = F.nll_loss(F.log_softmax(dlabel_pred_src, dim=1), dlabel_src)
        confusion_loss_src = 0.5 * ( F.nll_loss(F.log_softmax(dlabel_pred_src, dim=1), dlabel_src) + F.nll_loss(F.log_softmax(dlabel_pred_src, dim=1), dlabel_tgt) )

        target_data, target_label = data_target_iter.next()
        if i % len_target_loader == 0:
            data_target_iter = iter(target_train_loader)
        if cuda:
            target_data, target_label = target_data.cuda(), target_label.cuda()
        target_data = Variable(target_data)
        clabel_tgt, dlabel_pred_tgt = model(target_data)
        critic_loss_tgt = F.nll_loss(F.log_softmax(dlabel_pred_tgt, dim=1), dlabel_tgt)
        confusion_loss_tgt = 0.5 * (F.nll_loss(F.log_softmax(dlabel_pred_tgt, dim=1), dlabel_src) + F.nll_loss(
            F.log_softmax(dlabel_pred_tgt, dim=1), dlabel_tgt))

        confusion_loss_total = (confusion_loss_src + confusion_loss_tgt) / 2
        fea_loss_total = confusion_loss_total + label_loss
        critic_loss_total = (critic_loss_src + critic_loss_tgt) / 2

        optimizer_fea.zero_grad()
        fea_loss_total.backward(retain_graph=True)
        optimizer_fea.step()
        optimizer_fea.zero_grad()
        optimizer_critic.zero_grad()
        critic_loss_total.backward()
        optimizer_critic.step()

        if i % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tconfusion_Loss: {:.6f}\tlabel_Loss: {:.6f}\tdomain_Loss: {:.6f}'.format(
                epoch, i * len(source_data),len_source_dataset,
                100. * i / len_source_loader, confusion_loss_total.data[0], label_loss.data[0], critic_loss_total.data[0]))
        i = i + 1
예제 #4
0
    def get_loss(cls, start_log_probs, end_log_probs, starts, ends):
        """
        Get the loss, $-\log P(s|p,q)P(e|p,q)$.
        The start and end labels are expected to be in span format,
        so that text[start:end] is the answer.
        """

        # Subtracts 1 from the end points, to get the exact indices, not 1
        # after the end.
        loss = nll_loss(start_log_probs, starts) +\
            nll_loss(end_log_probs, ends-1)
        return loss
  def train(self, epoch):
    """
    Train one epoch of this model by iterating through mini batches. An epoch
    ends after one pass through the training set, or if the number of mini
    batches exceeds the parameter "batches_in_epoch".
    """

    self.logger.info("epoch: %s", epoch)

    t0 = time.time()
    self.preEpoch()

    self.logger.info("Learning rate: %s",
                     self.learningRate if self.lr_scheduler is None
                     else self.lr_scheduler.get_lr())

    self.model.train()
    for batch_idx, (batch, target) in enumerate(self.train_loader):
      data = batch["input"]
      if self.model_type in ["resnet9", "cnn"]:
        data = torch.unsqueeze(data, 1)
      data, target = data.to(self.device), target.to(self.device)
      self.optimizer.zero_grad()
      output = self.model(data)
      loss = F.nll_loss(output, target)
      loss.backward()
      self.optimizer.step()

      if batch_idx >= self.batches_in_epoch:
        break

    self.postEpoch()

    self.logger.info("training duration: %s", time.time() - t0)
예제 #6
0
파일: utils.py 프로젝트: Wizaron/LSD-seg
def cross_entropy2d(input, target, weight=None, size_average=True):
    """
    Function to compute pixelwise cross-entropy for 2D image. This is the segmentation loss.
    Args:
        input: input tensor of shape (minibatch x num_channels x h x w)
        target: 2D label map of shape (minibatch x h x w)
        weight (optional): tensor of size 'C' specifying the weights to be given to each class
        size_average (optional): boolean value indicating whether the NLL loss has to be normalized
            by the number of pixels in the image 
    """
    
    # input: (n, c, h, w), target: (n, h, w)
    n, c, h, w = input.size()
    
    # log_p: (n, c, h, w)
    log_p = F.log_softmax(input)
    
    # log_p: (n*h*w, c)
    log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c)
    try:
        log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0]
    except:
        print "Exception: ", target.size()
    log_p = log_p.view(-1, c)
    
    # target: (n*h*w,)
    mask = target >= 0
    target = target[mask]
    target = torch.squeeze(target)
    loss = F.nll_loss(log_p, target, weight=weight, size_average=False)
    if size_average:
        loss /= mask.data.sum()

    return loss
  def test(self, test_loader=None):
    """
    Test the model using the given loader and return test metrics
    """
    if test_loader is None:
      test_loader = self.test_loader

    self.model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
      for batch, target in test_loader:
        data = batch["input"]
        if self.model_type in ["resnet9", "cnn"]:
          data = torch.unsqueeze(data, 1)
        data, target = data.to(self.device), target.to(self.device)
        output = self.model(data)
        test_loss += F.nll_loss(output, target, reduction='sum').item()
        pred = output.max(1, keepdim=True)[1]
        correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.sampler)
    test_error = 100. * correct / len(test_loader.sampler)

    entropy = self.entropy()
    ret = {
      "total_correct": correct,
      "mean_loss": test_loss,
      "mean_accuracy": test_error,
      "entropy": float(entropy)}

    return ret
예제 #8
0
def train(args, model, device, train_loader, optimizer):
    model.train()
    start_time = time()

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            percentage = 100. * batch_idx / len(train_loader)
            cur_length = int((tracker_length * int(percentage)) / 100)
            bar = '=' * cur_length + '>' + '-' * (tracker_length - cur_length)
            sys.stdout.write('\r{}/{} [{}] - loss: {:.4f}'.format(
                batch_idx * len(data), len(train_loader.dataset),
                bar, loss.item()))
            sys.stdout.flush()

    train_time = time() - start_time
    sys.stdout.write('\r{}/{} [{}] - {:.1f}s {:.1f}us/step - loss: {:.4f}'.format(
        len(train_loader.dataset), len(train_loader.dataset), '=' * tracker_length, 
        train_time, (train_time / len(train_loader.dataset)) * 1000000.0, loss.item()))
    sys.stdout.flush()

    return len(train_loader.dataset), train_time, loss.item()
예제 #9
0
def test(epoch, best_acc):
    slope = get_slope(epoch)

    model.eval()
    test_loss = 0.0
    correct = 0.0
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model((data, slope))
        test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    test_acc = correct / len(test_loader.dataset)
    print 'Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
          test_loss, int(correct), len(test_loader.dataset),
          100. * test_acc)

    if test_acc >= best_acc:
        torch.save(model.state_dict(), os.path.join('models','{}.pth'.format(model_name)))

    return test_loss, test_acc
예제 #10
0
def m_testxxx(epoch):
    # checkpoint = torch.load('checkpoint-1.pth.tar')
    # model.load_state_dict(checkpoint['state_dict'])
    # optimizer.load_state_dict(checkpoint['optimizer'])
    #
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        # x_data = data[0].numpy()
        # x_data = np.reshape(x_data, [28, 28])
        # np.savetxt('./data.csv', x_data)
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target).data[0]
        pred = output.data.max(1)[1]  # get the index of the max log-probability
        #result = pred.numpy()
        #np.reshape(result, [-1, 1])
        #print(result.shape)
        # print(pred)
        correct += pred.eq(target.data).cpu().sum()

    test_loss = test_loss
    test_loss /= len(test_loader)  # loss function already averages over batch size
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
예제 #11
0
def train(model, device, train_loader, optimizer, epoch):
    """Train for one epoch on the training set"""
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        # compute output
        output = model(data)
        loss = F.nll_loss(output, target)

        # measure accuracy and record loss
        prec1 = accuracy(output, target, topk=(1,))[0]
        losses.update(loss.item(), data.size(0))
        top1.update(prec1.item(), data.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_idx % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch, batch_idx, len(train_loader), loss=losses, top1=top1))
예제 #12
0
    def get_combined_loss(cls, combined, starts, ends):
        """
        Get the loss, $-\log P(s,e|p,q)$.
        In practice, with:
            1. $\Psi_s(s|p,q)$ the start logits,
            2. $\Psi_e(e|p,q)$ the end logits,
            3. $Z_s = \log\sum_{i}\exp\Psi_s(i|p,q)$, the start partition,
            4. $Z_e = \log\sum_{i}\exp\Psi_e(i|p,q)$, the end partition, and
            5. $Z_c = \log\sum_{i}\sum{j>=i}\exp(\Psi_s(i|p,q)+\Psi_e(i|p,q))$,
            the combined partition,
        the default loss is:
            $Z_s + Z_e - \Psi_s(s|p,q) - \Psi_e(e|p,q)$,
        and the combined loss is:
            $Z_c - \Psi_s(s|p,q) - \Psi_e(e|p,q)$.

        The combined loss uses a normalization that ignores invalid end points.
        This is not a major difference, and should only slow things down during
        training.
        This loss is only used to validate and to compare.
        """
        batch_size, num_tokens, _other = combined.size()
        assert num_tokens == _other
        mask = torch.zeros(batch_size, num_tokens, num_tokens).float()
        for start in range(1, num_tokens):
            mask[:, start, :start] = -1e20
        mask = mask.type_as(combined.data)
        combined = combined + Variable(mask)
        combined = combined.view(batch_size, num_tokens*num_tokens)
        combined = nn.functional.log_softmax(combined, dim=1)
        labels = starts * num_tokens + ends
        return nll_loss(combined, labels)
예제 #13
0
def train(epoch, model):
    LEARNING_RATE = lr / math.pow((1 + 10 * (epoch - 1) / epochs), 0.75)
    print('learning rate{: .4f}'.format(LEARNING_RATE) )
    optimizer = torch.optim.SGD([
        {'params': model.sharedNet.parameters()},
        {'params': model.cls_fc.parameters(), 'lr': LEARNING_RATE},
        ], lr=LEARNING_RATE / 10, momentum=momentum, weight_decay=l2_decay)

    model.train()

    iter_source = iter(source_loader)
    iter_target = iter(target_train_loader)
    num_iter = len_source_loader
    for i in range(1, num_iter):
        data_source, label_source = iter_source.next()
        data_target, _ = iter_target.next()
        if i % len_target_loader == 0:
            iter_target = iter(target_train_loader)
        if cuda:
            data_source, label_source = data_source.cuda(), label_source.cuda()
            data_target = data_target.cuda()
        data_source, label_source = Variable(data_source), Variable(label_source)
        data_target = Variable(data_target)

        optimizer.zero_grad()
        label_source_pred, loss_mmd = model(data_source, data_target)
        loss_cls = F.nll_loss(F.log_softmax(label_source_pred, dim=1), label_source)
        gamma = 2 / (1 + math.exp(-10 * (epoch) / epochs)) - 1
        loss = loss_cls + gamma * loss_mmd
        loss.backward()
        optimizer.step()
        if i % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tsoft_Loss: {:.6f}\tmmd_Loss: {:.6f}'.format(
                epoch, i * len(data_source), len_source_dataset,
                100. * i / len_source_loader, loss.data[0], loss_cls.data[0], loss_mmd.data[0]))
예제 #14
0
def evaluate():
    should_stop = False
    model.eval()

    for name, loader in [('train', train_loader), ('test', test_loader)]:
        loss = 0
        correct = 0
        for data, target in loader:
            if args.cuda:
                data, target = data.cuda(), target.cuda()
            if isinstance(model, MLP):
                data = data.view(-1, 784)
            data, target = Variable(data, volatile=True), Variable(target)
            output = model(data)
            loss += F.nll_loss(output, target, size_average=False).data[0]
            # get the index of the max log-probability
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()

        loss /= len(loader.dataset)
        print('{} -- Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'
              .format(name.ljust(5), loss, correct, len(loader.dataset),
                      100. * correct / len(loader.dataset)))
        if name == 'test':
            scheduler.step(loss)
            should_stop = should_stop or correct == len(loader.dataset)
    return should_stop or optimizer.param_groups[0]['lr'] < args.lr / 1e2
예제 #15
0
def train(epoch):

    slope = get_slope(epoch)

    print '# Epoch : {} - Slope : {}'.format(epoch, slope)

    model.train()
    train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model((data, slope))
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.data

    train_loss /= len(train_loader)
    train_loss = train_loss[0]

    print 'Training Loss : {}'.format(train_loss)

    return train_loss
예제 #16
0
def test(model, device, test_loader, epoch):
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    for batch_idx, (data, target) in enumerate(test_loader):
        data, target = data.to(device), target.to(device)

        # compute output
        with torch.no_grad():
            output = model(data)
        loss = F.nll_loss(output, target)

        # measure accuracy and record loss
        prec1 = accuracy(output, target, topk=(1,))[0]
        losses.update(loss.item(), data.size(0))
        top1.update(prec1.item(), data.size(0))

        if batch_idx % args.print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      batch_idx, len(test_loader), loss=losses,
                      top1=top1))

    print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1))
    return top1.avg
def train(args, epoch, net, trainLoader, optimizer, trainF):
    net.train()
    nProcessed = 0
    nTrain = len(trainLoader.dataset)
    for batch_idx, (data, target) in enumerate(trainLoader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = net(data)
        loss = F.nll_loss(output, target)
        # make_graph.save('/tmp/t.dot', loss.creator); assert(False)
        loss.backward()
        optimizer.step()
        nProcessed += len(data)
        pred = output.data.max(1)[1] # get the index of the max log-probability
        incorrect = pred.ne(target.data).cpu().sum()
        err = 100.*incorrect/len(data)
        partialEpoch = epoch + batch_idx / len(trainLoader) - 1
        print('Train Epoch: {:.2f} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tError: {:.6f}'.format(
            partialEpoch, nProcessed, nTrain, 100. * batch_idx / len(trainLoader),
            loss.data[0], err))

        trainF.write('{},{},{}\n'.format(partialEpoch, loss.data[0], err))
        trainF.flush()
예제 #18
0
  def _test_pytorch(self, model):
    """
    Test pre-trained pytorch model using MNIST Dataset
    :param model: Pre-trained PytorchMNIST model
    :return: tuple(loss, accuracy)
    """
    data_loader = torch.utils.data.DataLoader(
      datasets.MNIST(self.dataDir, train=False, download=True,
                     transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))])),
      batch_size=BATCH_SIZE, shuffle=True)

    model.eval()
    loss = 0.0
    num_correct = 0.0
    with torch.no_grad():
      for data, target in data_loader:
        data = data.view(-1, 28 * 28)
        output = model(data)
        loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
        pred = output.max(1, keepdim=True)[1]  # get the index of the max log-probability
        num_correct += pred.eq(target.view_as(pred)).sum().item()

    loss /= len(data_loader.dataset)
    accuracy = num_correct / len(data_loader.dataset)

    return (loss, accuracy)
예제 #19
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    #step1: config model
    model = getattr(Nets,opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        model.to(device)

    #step2: data
    train_data = imageSentiment(opt.train_path,train = True) #训练集
    val_data = imageSentiment(opt.train_path,train = False) #验证集
    train_dataloader = DataLoader(train_data,batch_size = opt.batch_size,shuffle=True,num_workers = opt.num_workers)
    val_dataloader = DataLoader(val_data,batch_size = opt.batch_size,shuffle=False,num_workers = opt.num_workers)

    #step3: 定义损失函数及优化器
    # criterion = nn.CrossEntropyLoss() #交叉熵损失函数 如果使用该损失函数 则网络最后无需使用softmax函数
    lr = opt.lr
    # optimizer = Optim.Adam(model.parameters(),lr = lr,weight_decay= opt.weight_decay)
    optimizer = Optim.SGD(model.parameters(),lr = 0.001,momentum=0.9,nesterov=True)
    #step4: 统计指标(计算平均损失以及混淆矩阵)
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(7)
    previous_loss = 1e100

    #训练
    for i in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()
        total_loss = 0.
        for ii,(label,data) in tqdm(enumerate(train_dataloader),total=len(train_dataloader)):
            if opt.use_gpu:
                label,data = label.to(device),data.to(device)

            optimizer.zero_grad()
            score = model(data)
            # ps:使用nll_loss和crossentropyloss进行多分类时 target为索引标签即可 无需转为one-hot
            loss = F.nll_loss(score,label)
            total_loss += loss.item()
            loss.backward()
            optimizer.step()

            #更新统计指标以及可视化
            loss_meter.add(loss.item())
            confusion_matrix.add(score.data,label.data)

            if ii%opt.print_freq==opt.print_freq-1:
                vis.plot('loss',loss_meter.value()[0])

        vis.plot('mach avgloss', total_loss/len(train_dataloader))
        model.save()

        #计算验证集上的指标
        val_accuracy = val(model,val_dataloader)

        vis.plot('val_accuracy',val_accuracy)
예제 #20
0
파일: train.py 프로젝트: aimeng100/pyGAT
def compute_test():
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.data[0]),
          "accuracy= {:.4f}".format(acc_test.data[0]))
예제 #21
0
파일: model.py 프로젝트: UriSha/sigmorphon
 def loss(self, predict, target):
     '''
     compute loss
     '''
     return F.nll_loss(
         predict.view(-1, self.trg_vocab_size),
         target.view(-1),
         ignore_index=PAD_IDX)
예제 #22
0
 def part_loss(pred_part, gt_seg_part, gt_seg_object, object_label, valid):
     mask_object = (gt_seg_object == object_label)
     loss = F.nll_loss(pred_part, gt_seg_part * mask_object.long(), reduction='none')
     loss = loss * mask_object.float()
     loss = torch.sum(loss.view(loss.size(0), -1), dim=1)
     nr_pixel = torch.sum(mask_object.view(mask_object.shape[0], -1), dim=1)
     sum_pixel = (nr_pixel * valid).sum()
     loss = (loss * valid.float()).sum() / torch.clamp(sum_pixel, 1).float()
     return loss
예제 #23
0
 def _train_iteration(self):
     self.model.train()
     for batch_idx, (data, target) in enumerate(self.train_loader):
         if self.args.cuda:
             data, target = data.cuda(), target.cuda()
         self.optimizer.zero_grad()
         output = self.model(data)
         loss = F.nll_loss(output, target)
         loss.backward()
         self.optimizer.step()
예제 #24
0
 def train(epoch):
     model.train()
     for batch_idx, (data, target) in enumerate(train_loader):
         if args.cuda:
             data, target = data.cuda(), target.cuda()
         optimizer.zero_grad()
         output = model(data)
         loss = F.nll_loss(output, target)
         loss.backward()
         optimizer.step()
예제 #25
0
 def train_(self, input_img, input_qst, label):
     self.optimizer.zero_grad()
     output = self(input_img, input_qst)
     loss = F.nll_loss(output, label)
     loss.backward()
     self.optimizer.step()
     pred = output.data.max(1)[1]
     correct = pred.eq(label.data).cpu().sum()
     accuracy = correct * 100. / len(label)
     return accuracy
    def forward(self, batch):
        context, context_lengths, context_limited    = batch.context,  batch.context_lengths,  batch.context_limited
        question, question_lengths, question_limited = batch.question, batch.question_lengths, batch.question_limited
        answer, answer_lengths, answer_limited       = batch.answer,   batch.answer_lengths,   batch.answer_limited
        oov_to_limited_idx, limited_idx_to_full_idx  = batch.oov_to_limited_idx, batch.limited_idx_to_full_idx

        def map_to_full(x):
            return limited_idx_to_full_idx[x]
        self.map_to_full = map_to_full

        context_embedded = self.encoder_embeddings(context)
        question_embedded = self.encoder_embeddings(question)

        context_encoded = self.bilstm_before_coattention(context_embedded, context_lengths)[0]
        question_encoded = self.bilstm_before_coattention(question_embedded, question_lengths)[0]

        context_padding = context.data == self.pad_idx
        question_padding = question.data == self.pad_idx

        coattended_context = self.coattention(context_encoded, question_encoded, context_padding, question_padding)

        context_summary = torch.cat([coattended_context, context_encoded, context_embedded], -1)
        condensed_context, _ = self.context_bilstm_after_coattention(context_summary, context_lengths)
        self_attended_context = self.self_attentive_encoder_context(condensed_context, padding=context_padding)
        final_context, (context_rnn_h, context_rnn_c) = self.bilstm_context(self_attended_context[-1], context_lengths)
        context_rnn_state = [self.reshape_rnn_state(x) for x in (context_rnn_h, context_rnn_c)]

        context_indices = context_limited if context_limited is not None else context
        answer_indices = answer_limited if answer_limited is not None else answer

        pad_idx = self.field.decoder_stoi[self.field.pad_token]
        context_padding = context_indices.data == pad_idx

        self.dual_ptr_rnn_decoder.applyMasks(context_padding)

        if self.training:
            answer_padding = answer_indices.data == pad_idx
            answer_embedded = self.decoder_embeddings(answer)
            self_attended_decoded = self.self_attentive_decoder(answer_embedded[:, :-1].contiguous(), self_attended_context, context_padding=context_padding, answer_padding=answer_padding[:, :-1], positional_encodings=True)
            decoder_outputs = self.dual_ptr_rnn_decoder(self_attended_decoded, 
                final_context, hidden=context_rnn_state)
            rnn_output, context_attention, context_alignment, vocab_pointer_switch, rnn_state = decoder_outputs

            probs = self.probs(self.out, rnn_output, vocab_pointer_switch, 
                context_attention, 
                context_indices, 
                oov_to_limited_idx)

            probs, targets = mask(answer_indices[:, 1:].contiguous(), probs.contiguous(), pad_idx=pad_idx)
            loss = F.nll_loss(probs.log(), targets)
            return loss, None
        else:
            return None, self.greedy(self_attended_context, final_context, 
                context_indices,
                oov_to_limited_idx, rnn_state=context_rnn_state).data
예제 #27
0
def evaluate(net, dataloader, num_ens=1):
    """Calculate ensemble accuracy and NLL"""
    accs = []
    nlls = []
    for i, (inputs, labels) in enumerate(dataloader):
        inputs, labels = Variable(inputs.cuda(async=True)), Variable(labels.cuda(async=True))
        outputs = torch.zeros(inputs.shape[0], net.num_classes, num_ens).cuda()
        for j in range(num_ens):
            outputs[:, :, j] = F.log_softmax(net(inputs), dim=1).data
        accs.append(metrics.logit2acc(logmeanexp(outputs, dim=2), labels))
        nlls.append(F.nll_loss(Variable(logmeanexp(outputs, dim=2)), labels, size_average=False).data.cpu().numpy())
    return np.mean(accs), np.sum(nlls)
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))
예제 #29
0
파일: mnist_cnn.py 프로젝트: sunzuolei/ml
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.item()))
예제 #30
0
파일: train.py 프로젝트: Madhu612/examples
def train_epoch(epoch, args, model, data_loader, optimizer):
    model.train()
    pid = os.getpid()
    for batch_idx, (data, target) in enumerate(data_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                pid, epoch, batch_idx * len(data), len(data_loader.dataset),
                100. * batch_idx / len(data_loader), loss.item()))
예제 #31
0
def fit(epoch,
        model,
        data_loader,
        phase='training',
        volatile=False,
        is_cuda=False):

    optimizer = optim.SGD(model.parameters(), lr=Leaning_Rate, momentum=0.5)

    if phase == 'training':
        model.train()
    if phase == 'validation':
        model.eval()
        volatile = True

    running_loss = 0.0
    running_correct = 0

    for batch_idx, (data, target) in enumerate(data_loader):

        # data, target = data.cuda(), target.cuda()

        #data, target = Variable(data, volatile), Variable(target)
        if phase == 'training':
            optimizer.zero_grad()

        # print("data shape:{}".format(data.shape))
        output = model(data)

        #-------------------------------------------------------

        #
        #--------------------------------------------------------

        #-------------------------------------------------------------------

        loss = F.nll_loss(output, target)
        running_loss += F.nll_loss(output, target, size_average=False).data

        #----------------------------------------------------------------------

        preds = output.data.max(dim=1, keepdim=True)[1]

        gound_truth = target.data

        # print("preds:{}".format(preds))

        answer = preds.squeeze()

        # print("gound_truth:{}".format(gound_truth))
        # print("answer:{}".format(answer))

        a = gound_truth.data.detach().cpu().numpy()
        b = answer.data.detach().cpu().numpy()

        gound_truth_list.append(a)
        answer_list.append(b)

        # print("ground_truth numpy:{}".format(a))
        # print("answer numpy:{}".format(b))

        running_correct += preds.eq(target.data.view_as(preds)).cpu().sum()

        if phase == 'training':
            loss.backward()
            optimizer.step()

    loss = running_loss / len(data_loader.dataset)
    accuracy = 100. * running_correct.item() / len(data_loader.dataset)
    print(
        f'{phase} loss is {loss:{5}.{2}} and {phase} accuracy is {running_correct}/{len(data_loader.dataset)}{accuracy:{10}.{4}}'
    )

    # print("gound_truth_list:{}".format(gound_truth_list))
    # print("answer_list:{}".format(answer_list))

    return loss, accuracy
예제 #32
0
    for batch_X, batch_y in iterator.get_batches(train_set, shuffle=False):
        net_in = np_to_var(batch_X)
        if cuda:
            net_in = net_in.cuda()
        net_target = np_to_var(batch_y)
        if cuda:
            net_target = net_target.cuda()
        # Remove gradients of last backward pass from all parameters
        optimizer.zero_grad()
        outputs = model(net_in)
        # Mean predictions across trial
        # Note that this will give identical gradients to computing
        # a per-prediction loss (at least for the combination of log softmax activation
        # and negative log likelihood loss which we are using here)
        outputs = th.mean(outputs, dim=2)[:, :, 0]
        loss = F.nll_loss(outputs, net_target)
        loss.backward()
        optimizer.step()

    # Print some statistics each epoch
    model.eval()
    for setname, dataset in (('Train', train_set), ('Test', test_set)):
        # Collect all predictions and losses
        all_preds = []
        all_losses = []
        batch_sizes = []
        for batch_X, batch_y in iterator.get_batches(dataset, shuffle=False):
            net_in = np_to_var(batch_X)
            if cuda:
                net_in = net_in.cuda()
            net_target = np_to_var(batch_y)
예제 #33
0
    def forward(
            self,  # type: ignore
            question: Dict[str, torch.LongTensor],
            passage: Dict[str, torch.LongTensor],
            span_start: torch.IntTensor = None,
            span_end: torch.IntTensor = None,
            metadata: List[Dict[str, Any]] = None,
            store_metrics: bool = True,
            valid_output_mask: torch.LongTensor = None,
            sent_targets: torch.Tensor = None,
            stance: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        question : Dict[str, torch.LongTensor]
            From a ``TextField``.
        passage : Dict[str, torch.LongTensor]
            From a ``TextField``.  The model assumes that this passage contains the answer to the
            question, and predicts the beginning and ending positions of the answer within the
            passage.
        span_start : ``torch.IntTensor``, optional
            From an ``IndexField``.  This is one of the things we are trying to predict - the
            beginning position of the answer with the passage.  This is an `inclusive` token index.
            If this is given, we will compute a loss that gets included in the output dictionary.
        span_end : ``torch.IntTensor``, optional
            From an ``IndexField``.  This is one of the things we are trying to predict - the
            ending position of the answer with the passage.  This is an `inclusive` token index.
            If this is given, we will compute a loss that gets included in the output dictionary.
        metadata : ``List[Dict[str, Any]]``, optional
            If present, this should contain the question ID, original passage text, and token
            offsets into the passage for each instance in the batch.  We use this for computing
            official metrics using the official SQuAD evaluation script.  The length of this list
            should be the batch size, and each dictionary should have the keys ``id``,
            ``original_passage``, and ``token_offsets``.  If you only want the best span string and
            don't care about official metrics, you can omit the ``id`` key.
        store_metrics : bool
            If true, stores metrics (if applicable) within model metric tracker.
            If false, returns resulting metrics immediately, without updating the model metric tracker.
        valid_output_mask: ``torch.LongTensor``, optional
            The locations for a valid answer. Used to limit the model's output space.

        Returns
        -------
        An output dictionary consisting of:
        span_start_logits : torch.FloatTensor
            A tensor of shape ``(batch_size, passage_length)`` representing unnormalized log
            probabilities of the span start position.
        span_start_probs : torch.FloatTensor
            The result of ``softmax(span_start_logits)``.
        span_end_logits : torch.FloatTensor
            A tensor of shape ``(batch_size, passage_length)`` representing unnormalized log
            probabilities of the span end position (inclusive).
        span_end_probs : torch.FloatTensor
            The result of ``softmax(span_end_logits)``.
        best_span : torch.IntTensor
            The result of a constrained inference over ``span_start_logits`` and
            ``span_end_logits`` to find the most probable span.  Shape is ``(batch_size, 2)``
            and each offset is a token index.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.
        best_span_str : List[str]
            If sufficient metadata was provided for the instances in the batch, we also return the
            string from the original passage that the model thinks is the best answer to the
            question.
        """
        embedded_question = self._highway_layer(
            self._text_field_embedder(question))
        embedded_passage = self._highway_layer(
            self._text_field_embedder(passage))
        batch_size = embedded_question.size(0)
        passage_length = embedded_passage.size(1)
        question_mask = util.get_text_field_mask(question).float()
        passage_mask = util.get_text_field_mask(passage).float()
        question_lstm_mask = question_mask if self._mask_lstms else None
        passage_lstm_mask = passage_mask if self._mask_lstms else None

        encoded_question = self._dropout(
            self._phrase_layer(embedded_question, question_lstm_mask))
        encoded_passage = self._dropout(
            self._phrase_layer(embedded_passage, passage_lstm_mask))
        encoding_dim = encoded_question.size(-1)

        # Shape: (batch_size, passage_length, question_length)
        passage_question_similarity = self._matrix_attention(
            encoded_passage, encoded_question)
        # Shape: (batch_size, passage_length, question_length)
        passage_question_attention = util.masked_softmax(
            passage_question_similarity, question_mask)
        # Shape: (batch_size, passage_length, encoding_dim)
        passage_question_vectors = util.weighted_sum(
            encoded_question, passage_question_attention)

        # We replace masked values with something really negative here, so they don't affect the
        # max below.
        masked_similarity = util.replace_masked_values(
            passage_question_similarity, question_mask.unsqueeze(1), -1e7)
        # Shape: (batch_size, passage_length)
        question_passage_similarity = masked_similarity.max(
            dim=-1)[0].squeeze(-1)
        # Shape: (batch_size, passage_length)
        question_passage_attention = util.masked_softmax(
            question_passage_similarity, passage_mask)
        # Shape: (batch_size, encoding_dim)
        question_passage_vector = util.weighted_sum(
            encoded_passage, question_passage_attention)
        # Shape: (batch_size, passage_length, encoding_dim)
        tiled_question_passage_vector = question_passage_vector.unsqueeze(
            1).expand(batch_size, passage_length, encoding_dim)

        # Shape: (batch_size, passage_length, encoding_dim * 4)
        final_merged_passage = torch.cat([
            encoded_passage, passage_question_vectors,
            encoded_passage * passage_question_vectors,
            encoded_passage * tiled_question_passage_vector
        ],
                                         dim=-1)

        # Debate: Conditioning on whose turn it is (A/B)
        if not self.is_judge:
            turn_film_params = self._turn_film_gen(
                stance.to(final_merged_passage).unsqueeze(1))
            turn_gammas, turn_betas = torch.split(
                turn_film_params, self._modeling_layer.get_input_dim(), dim=-1)
            final_merged_passage_mask = (
                final_merged_passage !=
                0).float()  # NOTE: Using heuristic to get mask
            final_merged_passage = self._film(
                final_merged_passage, 1. + turn_gammas,
                turn_betas) * final_merged_passage_mask
        modeled_passage = self._dropout(
            self._modeling_layer(final_merged_passage, passage_lstm_mask))
        modeling_dim = modeled_passage.size(-1)

        # Shape: (batch_size, passage_length, encoding_dim * 4 + modeling_dim))
        span_start_input_full = torch.cat(
            [final_merged_passage, modeled_passage], dim=-1)
        span_start_input = self._dropout(span_start_input_full)
        if not self.is_judge:
            value_head_input = span_start_input_full.detach(
            ) if self._detach_value_head else span_start_input_full
            # Shape: (batch_size)
            tokenwise_values = self._value_head(value_head_input).squeeze(-1)
            value, value_loc = util.replace_masked_values(
                tokenwise_values, passage_mask, -1e7).max(-1)
        # Shape: (batch_size, passage_length)
        span_start_logits = self._span_start_predictor(
            span_start_input).squeeze(-1)
        valid_output_mask = passage_mask if valid_output_mask is None else valid_output_mask
        # Shape: (batch_size, passage_length)
        span_start_probs = util.masked_softmax(span_start_logits,
                                               valid_output_mask)

        # Shape: (batch_size, modeling_dim)
        span_start_representation = util.weighted_sum(modeled_passage,
                                                      span_start_probs)
        # Shape: (batch_size, passage_length, modeling_dim)
        tiled_start_representation = span_start_representation.unsqueeze(
            1).expand(batch_size, passage_length, modeling_dim)

        # Shape: (batch_size, passage_length, encoding_dim * 4 + modeling_dim * 3)
        span_end_representation = torch.cat([
            final_merged_passage, modeled_passage, tiled_start_representation,
            modeled_passage * tiled_start_representation
        ],
                                            dim=-1)
        # Shape: (batch_size, passage_length, encoding_dim)
        encoded_span_end = self._dropout(
            self._span_end_encoder(span_end_representation, passage_lstm_mask))
        # Shape: (batch_size, passage_length, encoding_dim * 4 + span_end_encoding_dim)
        span_end_input = self._dropout(
            torch.cat([final_merged_passage, encoded_span_end], dim=-1))
        span_end_logits = self._span_end_predictor(span_end_input).squeeze(-1)
        span_end_probs = util.masked_softmax(span_end_logits,
                                             valid_output_mask)
        span_start_logits = util.replace_masked_values(span_start_logits,
                                                       valid_output_mask, -1e7)
        span_end_logits = util.replace_masked_values(span_end_logits,
                                                     valid_output_mask, -1e7)
        best_span = self.get_best_span(span_start_logits, span_end_logits)

        output_dict = {
            "passage_question_attention":
            passage_question_attention,
            "span_start_logits":
            span_start_logits,
            "span_start_probs":
            span_start_probs,
            "span_end_logits":
            span_end_logits,
            "span_end_probs":
            span_end_probs,
            "best_span":
            best_span,
            "value":
            value if not self.is_judge else None,
            "prob":
            torch.tensor([
                span_start_probs[i, span_start[i]]
                if span_start[i] < span_start_probs.size(1) else 0.
                for i in range(batch_size)
            ]) if self.is_judge else None,  # prob(true answer)
            "prob_dist":
            span_start_probs,
        }

        # Compute the loss for training.
        if (span_start is not None) and self.is_judge:
            span_start[span_start >= passage_mask.size(
                1)] = -100  # NB: Hacky. Don't add to loss if span not in input
            loss = nll_loss(
                util.masked_log_softmax(span_start_logits, valid_output_mask),
                span_start.squeeze(-1))
            if store_metrics:
                self._span_start_accuracy(span_start_logits,
                                          span_start.squeeze(-1))
            span_end[span_end >= passage_mask.size(
                1)] = -100  # NB: Hacky. Don't add to loss if span not in input
            loss += nll_loss(
                util.masked_log_softmax(span_end_logits, valid_output_mask),
                span_end.squeeze(-1))
            if store_metrics:
                self._span_end_accuracy(span_end_logits, span_end.squeeze(-1))
                self._span_accuracy(best_span,
                                    torch.stack([span_start, span_end], -1))
            output_dict["loss"] = loss
        elif not self.is_judge:  # Debate SL
            if self.reward_method == 'sl':  # sent_targets should be a vector of target indices
                output_dict["loss"] = nll_loss(
                    util.masked_log_softmax(span_start_logits,
                                            valid_output_mask),
                    sent_targets.squeeze(-1))
                if store_metrics:
                    self._span_start_accuracy(span_start_logits,
                                              sent_targets.squeeze(-1))
            elif self.reward_method.startswith('sl-sents'):
                # sent_targets should be a matrix of target values (non-zero only in EOS indices)
                sent_targets = util.replace_masked_values(
                    sent_targets, valid_output_mask, -1e7)
                output_dict["loss"] = util.masked_mean(
                    ((span_start_logits - sent_targets)**2), valid_output_mask,
                    1)
                if store_metrics:
                    self._span_start_accuracy(span_start_logits,
                                              sent_targets.max(-1)[1])

        # Compute the EM and F1 on SQuAD and add the tokenized input to the output.
        batch_ems = []
        batch_f1s = []
        if metadata is not None:
            output_dict['best_span_str'] = []
            question_tokens = []
            passage_tokens = []
            for i in range(batch_size):
                question_tokens.append(metadata[i]['question_tokens'])
                passage_tokens.append(metadata[i]['passage_tokens'])
                passage_str = metadata[i]['original_passage']
                offsets = metadata[i]['token_offsets']
                predicted_span = tuple(best_span[i].detach().cpu().numpy())
                start_offset = offsets[predicted_span[0]][0]
                end_offset = offsets[predicted_span[1]][1]
                best_span_string = passage_str[start_offset:end_offset]
                output_dict['best_span_str'].append(best_span_string)
                answer_texts = metadata[i].get('answer_texts', [])
                if answer_texts:
                    self._squad_metrics(best_span_string, answer_texts)
                    sample_squad_metrics = SquadEmAndF1()
                    sample_squad_metrics(best_span_string, answer_texts)
                    sample_em, sample_f1 = sample_squad_metrics.get_metric(
                        reset=True)
                    batch_ems.append(sample_em)
                    batch_f1s.append(sample_f1)
            output_dict['question_tokens'] = question_tokens
            output_dict['passage_tokens'] = passage_tokens
            output_dict['em'] = torch.tensor(batch_ems)
            output_dict['f1'] = torch.tensor(batch_f1s)
        return output_dict
예제 #34
0
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))]),
)
indices = np.isin(mnist_dataset.targets, keep_labels).astype("uint8")
selected_data = (th.native_masked_select(mnist_dataset.data.transpose(0, 2),
                                         th.tensor(indices)).view(
                                             28, 28, -1).transpose(2, 0))
selected_targets = th.native_masked_select(mnist_dataset.targets,
                                           th.tensor(indices))
dataset = sy.BaseDataset(data=selected_data,
                         targets=selected_targets,
                         transform=mnist_dataset.transform)

dataset = sy.BaseDataset(data=selected_data,
                         targets=selected_targets,
                         transform=mnist_dataset.transform)

trainloader = th.utils.data.DataLoader(dataset, batch_size=64, shuffle=True)

optimizer = optim.SGD(model.parameters(), lr=0.001)

start_time = time.time()
for batch_idx, (inputs, targets) in enumerate(trainloader):
    inputs, targets = inputs.to(device), targets.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = F.nll_loss(outputs, targets)
    loss.backward()
    optimizer.step()
print("[PROF]", "LocalTraining", "duration", time.time() - start_time)
def loss_fn(predictions, targets):
    return F.nll_loss(predictions, targets)
예제 #36
0
 def validation_step(self, val_batch, batch_idx):
     x, y = val_batch
     logits = self.forward(x)
     loss = F.nll_loss(logits, y)
     acc = self.accuracy(logits, y)
     return {"val_loss": loss, "val_accuracy": acc}
def test(model, device, test_loader, epsilon):

    # Accuracy counter
    correct = 0
    adv_examples = []

    # Loop over all examples in test set
    for data, target in test_loader:

        # Send the data and label to the device
        data, target = data.to(device), target.to(device)

        # Set requires_grad attribute of tensor. Important for Attack
        data.requires_grad = True

        # Forward pass the data through the model
        output = model(data)
        init_pred = output.max(
            1, keepdim=True)[1]  # get the index of the max log-probability

        # If the initial prediction is wrong, dont bother attacking, just move on
        if init_pred.item() != target.item():
            continue

        # Calculate the loss
        loss = F.nll_loss(output, target)

        # Zero all existing gradients
        model.zero_grad()

        # Calculate gradients of model in backward pass
        loss.backward()

        # Collect datagrad
        data_grad = data.grad.data

        # Call FGSM Attack
        perturbed_data = fgsm_attack(data, epsilon, data_grad)

        # Re-classify the perturbed image
        output = model(perturbed_data)

        # Check for success
        final_pred = output.max(
            1, keepdim=True)[1]  # get the index of the max log-probability
        if final_pred.item() == target.item():
            correct += 1
            # Special case for saving 0 epsilon examples
            if (epsilon == 0) and (len(adv_examples) < 5):
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                adv_examples.append(
                    (init_pred.item(), final_pred.item(), adv_ex))
        else:
            # Save some adv examples for visualization later
            if len(adv_examples) < 5:
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                adv_examples.append(
                    (init_pred.item(), final_pred.item(), adv_ex))

    # Calculate final accuracy for this epsilon
    final_acc = correct / float(len(test_loader))
    print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(
        epsilon, correct, len(test_loader), final_acc))

    # Return the accuracy and an adversarial example
    return final_acc, adv_examples
예제 #38
0
파일: cnn.py 프로젝트: caiwd/PCCNN
 def forward(self, preds, target):
     n = preds.size()[-1]
     log_preds = F.log_softmax(preds, dim=-1)
     loss = reduce_loss(-log_preds.sum(dim=-1), self.reduction)
     nll = F.nll_loss(log_preds, target, reduction=self.reduction)
     return linear_combination(loss/n, nll, self.epsilon)
예제 #39
0
 def training_step(self, batch, batch_nb):
     data, target = batch
     output = self.forward(data)
     loss = F.nll_loss(output, target)
     return {"loss": loss}
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Get model
    log.info('Building model...')
    model = BiDAF(word_vectors=word_vectors,
                  hidden_size=args.hidden_size,
                  drop_prob=args.drop_prob)
    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = optim.Adadelta(model.parameters(), args.lr,
                               weight_decay=args.l2_wd)
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()

                # Forward
                log_p1, log_p2 = model(cw_idxs, qw_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch,
                                         NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR',
                               optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
예제 #41
0
def loss_fn(pred, target):
    return F.nll_loss(input=pred, target=target)
예제 #42
0
    def forward(
            self,
            question: Dict[str, torch.LongTensor],
            passage: Dict[str, torch.LongTensor],
            span_start: torch.IntTensor = None,
            span_end: torch.IntTensor = None,
            yesno_list: torch.IntTensor = None,
            metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:

        batch_size, max_qa_count, max_q_len, _ = question[
            'token_characters'].size()
        total_qa_count = batch_size * max_qa_count
        qa_mask = torch.ge(yesno_list, 0).view(total_qa_count)

        embedded_question = self._text_field_embedder(question,
                                                      num_wrapping_dims=1)
        # total_qa_count * max_q_len * encoding_dim
        embedded_question = embedded_question.reshape(
            total_qa_count, max_q_len,
            self._text_field_embedder.get_output_dim())
        embedded_passage = self._text_field_embedder(passage)

        word_emb_ques, elmo_ques, ques_feat = torch.split(embedded_question,
                                                          [200, 1024, 40],
                                                          dim=2)
        word_emb_pass, elmo_pass, pass_feat = torch.split(embedded_passage,
                                                          [200, 1024, 40],
                                                          dim=2)
        embedded_question = torch.cat([word_emb_ques, elmo_ques], dim=2)
        embedded_passage = torch.cat([word_emb_pass, elmo_pass], dim=2)

        embedded_question = self._variational_dropout(embedded_question)
        embedded_passage = self._variational_dropout(embedded_passage)
        passage_length = embedded_passage.size(1)

        question_mask = util.get_text_field_mask(question,
                                                 num_wrapping_dims=1).float()
        question_mask = question_mask.reshape(total_qa_count, max_q_len)
        passage_mask = util.get_text_field_mask(passage).float()

        repeated_passage_mask = passage_mask.unsqueeze(1).repeat(
            1, max_qa_count, 1)
        repeated_passage_mask = repeated_passage_mask.view(
            total_qa_count, passage_length)

        encode_passage = self._phrase_layer(embedded_passage, passage_mask)
        projected_passage = self.relu(
            self.projected_layer(torch.cat([encode_passage, elmo_pass],
                                           dim=2)))

        encode_question = self._phrase_layer(embedded_question, question_mask)
        projected_question = self.relu(
            self.projected_layer(torch.cat([encode_question, elmo_ques],
                                           dim=2)))

        encoded_passage = self._variational_dropout(projected_passage)
        repeated_encoded_passage = encoded_passage.unsqueeze(1).repeat(
            1, max_qa_count, 1, 1)
        repeated_encoded_passage = repeated_encoded_passage.view(
            total_qa_count, passage_length, self._encoding_dim)
        repeated_pass_feat = (pass_feat.unsqueeze(1).repeat(
            1, max_qa_count, 1, 1)).view(total_qa_count, passage_length, 40)
        encoded_question = self._variational_dropout(projected_question)

        # total_qa_count * max_q_len * passage_length
        # cnt * m * n
        s = torch.bmm(encoded_question,
                      repeated_encoded_passage.transpose(2, 1))
        alpha = util.masked_softmax(s,
                                    question_mask.unsqueeze(2).expand(
                                        s.size()),
                                    dim=1)
        # cnt * n * h
        aligned_p = torch.bmm(alpha.transpose(2, 1), encoded_question)

        # cnt * m * n
        beta = util.masked_softmax(s,
                                   repeated_passage_mask.unsqueeze(1).expand(
                                       s.size()),
                                   dim=2)
        # cnt * m * h
        un_aligned_q = torch.bmm(beta, repeated_encoded_passage)

        # flow
        # (b * qa) * m * h -> (b * m) * qa * h
        un_aligned_q = un_aligned_q.reshape(
            batch_size, max_qa_count, max_q_len,
            -1).transpose(2, 1).reshape(batch_size * max_q_len, max_qa_count,
                                        -1)
        tmp_q_mask = question_mask.reshape(batch_size, max_qa_count,
                                           max_q_len).transpose(2, 1).reshape(
                                               batch_size * max_q_len,
                                               max_qa_count)
        aligned_q = self.flow(un_aligned_q, tmp_q_mask).reshape(
            batch_size, max_q_len, max_qa_count,
            -1).transpose(2, 1).reshape(total_qa_count, max_q_len,
                                        self._encoding_dim)

        fused_p = self.fuse(repeated_encoded_passage, aligned_p)
        fused_q = self.fuse(encoded_question, aligned_q)

        # add manual features here
        q_aware_p = self.projected_lstm(
            torch.cat([fused_p, repeated_pass_feat], dim=2),
            repeated_passage_mask)

        # cnt * n * n
        self_p = torch.bmm(q_aware_p, q_aware_p.transpose(2, 1))
        for i in range(passage_length):
            self_p[:, i, i] = 0
        lamb = util.masked_softmax(self_p,
                                   repeated_passage_mask.unsqueeze(1).expand(
                                       self_p.size()),
                                   dim=2)
        # cnt * n * h
        self_aligned_p = torch.bmm(lamb, q_aware_p)

        # cnt * n * h
        fused_self_p = self.fuse(q_aware_p, self_aligned_p)
        contextual_p = self.contextual_layer_p(fused_self_p,
                                               repeated_passage_mask)

        contextual_q = self.contextual_layer_q(fused_q, question_mask)
        # cnt * m
        gamma = util.masked_softmax(
            self.linear_self_align(contextual_q).squeeze(2),
            question_mask,
            dim=1)
        # cnt * h
        weighted_q = torch.bmm(gamma.unsqueeze(1), contextual_q).squeeze(1)

        span_start_logits = self.bilinear_layer_s(weighted_q, contextual_p)
        span_end_logits = self.bilinear_layer_e(weighted_q, contextual_p)

        # cnt * n * 1  cnt * 1 * h
        span_yesno_logits = self.yesno_predictor(
            torch.bmm(span_end_logits.unsqueeze(2), weighted_q.unsqueeze(1)))

        span_start_logits = util.replace_masked_values(span_start_logits,
                                                       repeated_passage_mask,
                                                       -1e7)
        span_end_logits = util.replace_masked_values(span_end_logits,
                                                     repeated_passage_mask,
                                                     -1e7)

        best_span = self._get_best_span_yesno_followup(span_start_logits,
                                                       span_end_logits,
                                                       span_yesno_logits,
                                                       self._max_span_length)

        output_dict: Dict[str, Any] = {}

        # Compute the loss for training

        if span_start is not None:
            loss = nll_loss(util.masked_log_softmax(span_start_logits,
                                                    repeated_passage_mask),
                            span_start.view(-1),
                            ignore_index=-1)
            self._span_start_accuracy(span_start_logits,
                                      span_start.view(-1),
                                      mask=qa_mask)
            loss += nll_loss(util.masked_log_softmax(span_end_logits,
                                                     repeated_passage_mask),
                             span_end.view(-1),
                             ignore_index=-1)
            self._span_end_accuracy(span_end_logits,
                                    span_end.view(-1),
                                    mask=qa_mask)
            self._span_accuracy(best_span[:, 0:2],
                                torch.stack([span_start, span_end],
                                            -1).view(total_qa_count, 2),
                                mask=qa_mask.unsqueeze(1).expand(-1, 2).long())
            # add a select for the right span to compute loss
            gold_span_end_loc = []
            span_end = span_end.view(
                total_qa_count).squeeze().data.cpu().numpy()
            for i in range(0, total_qa_count):
                gold_span_end_loc.append(
                    max(span_end[i] * 3 + i * passage_length * 3, 0))
                gold_span_end_loc.append(
                    max(span_end[i] * 3 + i * passage_length * 3 + 1, 0))
                gold_span_end_loc.append(
                    max(span_end[i] * 3 + i * passage_length * 3 + 2, 0))
            gold_span_end_loc = span_start.new(gold_span_end_loc)
            pred_span_end_loc = []
            for i in range(0, total_qa_count):
                pred_span_end_loc.append(
                    max(best_span[i][1] * 3 + i * passage_length * 3, 0))
                pred_span_end_loc.append(
                    max(best_span[i][1] * 3 + i * passage_length * 3 + 1, 0))
                pred_span_end_loc.append(
                    max(best_span[i][1] * 3 + i * passage_length * 3 + 2, 0))
            predicted_end = span_start.new(pred_span_end_loc)

            _yesno = span_yesno_logits.view(-1).index_select(
                0, gold_span_end_loc).view(-1, 3)
            loss += nll_loss(torch.nn.functional.log_softmax(_yesno, dim=-1),
                             yesno_list.view(-1),
                             ignore_index=-1)

            _yesno = span_yesno_logits.view(-1).index_select(
                0, predicted_end).view(-1, 3)
            self._span_yesno_accuracy(_yesno,
                                      yesno_list.view(-1),
                                      mask=qa_mask)

            output_dict["loss"] = loss

        # Compute the EM and F1 on SQuAD and add the tokenized input to the output.
        output_dict['best_span_str'] = []
        output_dict['qid'] = []
        output_dict['yesno'] = []
        best_span_cpu = best_span.detach().cpu().numpy()
        for i in range(batch_size):
            passage_str = metadata[i]['original_passage']
            offsets = metadata[i]['token_offsets']
            f1_score = 0.0
            per_dialog_best_span_list = []
            per_dialog_yesno_list = []
            per_dialog_query_id_list = []
            for per_dialog_query_index, (iid, answer_texts) in enumerate(
                    zip(metadata[i]["instance_id"],
                        metadata[i]["answer_texts_list"])):
                predicted_span = tuple(best_span_cpu[i * max_qa_count +
                                                     per_dialog_query_index])
                start_offset = offsets[predicted_span[0]][0]
                end_offset = offsets[predicted_span[1]][1]
                yesno_pred = predicted_span[2]
                per_dialog_yesno_list.append(yesno_pred)
                per_dialog_query_id_list.append(iid)
                best_span_string = passage_str[start_offset:end_offset]
                per_dialog_best_span_list.append(best_span_string)
                if answer_texts:
                    if len(answer_texts) > 1:
                        t_f1 = []
                        # Compute F1 over N-1 human references and averages the scores.
                        for answer_index in range(len(answer_texts)):
                            idxes = list(range(len(answer_texts)))
                            idxes.pop(answer_index)
                            refs = [answer_texts[z] for z in idxes]
                            t_f1.append(
                                squad_eval.metric_max_over_ground_truths(
                                    squad_eval.f1_score, best_span_string,
                                    refs))
                        f1_score = 1.0 * sum(t_f1) / len(t_f1)
                    else:
                        f1_score = squad_eval.metric_max_over_ground_truths(
                            squad_eval.f1_score, best_span_string,
                            answer_texts)
                self._official_f1(100 * f1_score)
            output_dict['qid'].append(per_dialog_query_id_list)
            output_dict['best_span_str'].append(per_dialog_best_span_list)
            output_dict['yesno'].append(per_dialog_yesno_list)
        return output_dict
예제 #43
0
def train():
    min_loss = 1e10
    max_acc=0
    patience_cnt = 0
    val_loss_values = []
    val_acc_values = []
    best_epoch = 0

    t = time.time()
    print(count_parameters(model)/1000)
    model.train()
    step=0
    for epoch in range(args.epochs):
        torch.cuda.empty_cache()
        loss_train = 0.0
        correct = 0
        for i, data in enumerate(train_loader):
            optimizer.zero_grad()
            data = data.to(args.device)
            out = model(data)
            loss = F.nll_loss(out, data.y)
            if torch.isnan(loss):
                print('NO')
            loss.backward()
            step+=1
            optimizer.step()
            loss_train += loss.item()
            pred = out.max(dim=1)[1]
            correct += pred.eq(data.y).sum().item()
        nni.report_intermediate_result((time.time()-t))

    t2=time.time()
    time_cost=(t2-t)
    print('{:.2f}'.format(time_cost))
    nni.report_final_result(time_cost) 
    #     acc_train = correct / len(train_loader.dataset)
    #     acc_val, loss_val = compute_test(val_loader)

    #     # # client_send(gpuid, 1)
    #     # # if epoch>5:
    #     # #     client_send(gpuid, 1)

    #     outs='Epoch: {:04d}'.format(epoch + 1)+'\tloss_train: {:.6f}'.format(loss_train)+\
    #           '\tacc_train: {:.6f}'.format(acc_train)+ '\tloss_val: {:.6f}'.format(loss_val)+\
    #           '\tacc_val: {:.6f}'.format(acc_val)+'\ttime: {:.6f}s'.format(time.time() - t)
    #     nni.report_intermediate_result(-loss_val) 
    #     print(outs)
    #     logging.info(outs)

    #     val_loss_values.append(loss_val)
    #     val_acc_values.append(acc_val)
    #     torch.save(model.state_dict(), res/'{}.pth'.format(epoch))
    #     if val_loss_values[-1] < min_loss:
    #         min_loss = val_loss_values[-1]
    #         best_epoch = epoch
    #         patience_cnt = 0
    #     else:
    #         patience_cnt += 1

    #     # if val_acc_values[-1] > max_acc:
    #     #     max_acc = val_acc_values[-1]
    #     #     best_epoch=epoch
    #     #     patience_cnt = 0
    #     # else:
    #     #     patience_cnt +=1

    #     if patience_cnt == args.patience:
    #         break

    #     files = glob.glob(res.as_posix()+'/*.pth')
    #     for f in files:
    #         epoch_nb = int(f.split('/')[-1].split('.')[0])
    #         if epoch_nb < best_epoch:
    #             os.remove(f)

    # files = glob.glob(res.as_posix()+'/*.pth')
    # for f in files:
    #     epoch_nb = int(f.split('/')[-1].split('.')[0])
    #     if epoch_nb > best_epoch:
    #         os.remove(f)
    # outs='Optimization Finished! Total time elapsed: {:.6f}'.format(time.time() - t)
    # print(outs)

    return best_epoch
model = CNNModel().to(device)

learning_rate = 0.01
optimizer= optim.SGD(model.parameters(), lr=learning_rate)    

# Train model
iter = 0 
for epoch in range(num_epochs):
    for i, (images,labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        output = model(images)
        
        loss = F.nll_loss(output, labels)
        loss.backward()
        optimizer.step()
        iter = iter + 1
        if (i+1) % 100 == 0:
            print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f' 
                   %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))

model.eval()
correct=0
total=0
for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    def forward(self,
                question: Dict[str, torch.LongTensor],
                passage: Dict[str, torch.LongTensor],
                #passages_length: torch.LongTensor = None,
                #correct_passage: torch.LongTensor = None,
                span_start: torch.IntTensor = None,
                span_end: torch.IntTensor = None,
                metadata = None) -> Dict[str, torch.Tensor]:
     
        # shape: B x Tq x E
        embedded_question = self._embedder(question)
        embedded_passage = self._embedder(passage)

        batch_size = embedded_question.size(0)
        total_passage_length = embedded_passage.size(1)

        question_mask = util.get_text_field_mask(question)
        passage_mask = util.get_text_field_mask(passage)

        # shape: B x T x 2H
        encoded_question = self._dropout(self._question_encoder(embedded_question, question_mask))
        encoded_passage = self._dropout(self._passage_encoder(embedded_passage, passage_mask))
        passage_mask = passage_mask.float()
        question_mask = question_mask.float()

        encoding_dim = encoded_question.size(-1)

        # shape: B x 2H
        if encoded_passage.is_cuda:
            cuda_device = encoded_passage.get_device()
            gru_hidden = Variable(torch.zeros(batch_size, encoding_dim).cuda(cuda_device))
        else:
            gru_hidden = Variable(torch.zeros(batch_size, encoding_dim))

        question_awared_passage = []
        for timestep in range(total_passage_length):
            # shape: B x Tq = attention(B x 2H, B x Tq x 2H)
            attn_weights = self._question_attention_for_passage(encoded_passage[:, timestep, :], encoded_question, question_mask)
            # shape: B x 2H = weighted_sum(B x Tq x 2H, B x Tq)
            attended_question = util.weighted_sum(encoded_question, attn_weights)
            # shape: B x 4H
            passage_question_combined = torch.cat([encoded_passage[:, timestep, :], attended_question], dim=-1)
            # shape: B x 4H
            gate = F.sigmoid(self._gate(passage_question_combined))
            gru_input = gate * passage_question_combined
            # shape: B x 2H
            gru_hidden = self._dropout(self._gru_cell(gru_input, gru_hidden))
            question_awared_passage.append(gru_hidden)

        # shape: B x T x 2H
        # question aware passage representation v_P
        question_awared_passage = torch.stack(question_awared_passage, dim=1)

        # compute question vector r_Q
        # shape: B x T = attention(B x 2H, B x T x 2H)
        v_r_Q_tiled = self._v_r_Q.unsqueeze(0).expand(batch_size, encoding_dim)
        attn_weights = self._question_attention_for_question(v_r_Q_tiled, encoded_question, question_mask)
        # shape: B x 2H
        r_Q = util.weighted_sum(encoded_question, attn_weights)
        # shape: B x T = attention(B x 2H, B x T x 2H)
        span_start_logits = self._passage_attention_for_answer(r_Q, question_awared_passage, passage_mask)
        span_start_logits = util.replace_masked_values(span_start_logits, passage_mask, -1e7)
        span_start_probs = util.masked_softmax(span_start_logits, passage_mask)
        span_start_log_probs = util.masked_log_softmax(span_start_logits, passage_mask)
        # shape: B x 2H
        c_t = util.weighted_sum(question_awared_passage, span_start_probs)
        # shape: B x 2H
        h_1 = self._dropout(self._answer_net(c_t, r_Q))

        span_end_logits = self._passage_attention_for_answer(h_1, question_awared_passage, passage_mask)
        span_end_logits = util.replace_masked_values(span_end_logits, passage_mask, -1e7)
        span_end_probs = util.masked_softmax(span_end_logits, passage_mask)
        span_end_log_probs = util.masked_log_softmax(span_end_logits, passage_mask)
        
        best_span = self.get_best_span(span_start_logits, span_end_logits)

        #num_passages = passages_length.size(1)
        #acc = Variable(torch.zeros(batch_size, num_passages + 1)).cuda(cuda_device).long()

        #acc[:, 1:num_passages+1] = torch.cumsum(passages_length, dim=1)

        #g_batch = []
        #for b in range(batch_size):
        #    g = []
        #    for i in range(num_passages):
        #        if acc[b, i+1].data[0] > acc[b, i].data[0]:
        #            attn_weights = self._passage_attention_for_ranking(r_Q[b:b+1], question_awared_passage[b:b+1, acc[b, i].data[0]: acc[b, i+1].data[0], :], passage_mask[b:b+1, acc[b, i].data[0]: acc[b, i+1].data[0]])
        #            r_P = util.weighted_sum(question_awared_passage[b:b+1, acc[b, i].data[0]:acc[b, i+1].data[0], :], attn_weights)
        #            question_passage_combined = torch.cat([r_Q[b:b+1], r_P], dim=-1)
        #            gi = self._dropout(self._match_layer_2(F.tanh(self._dropout(self._match_layer_1(question_passage_combined)))))
        #            g.append(gi)
        #        else:
        #            g.append(Variable(torch.zeros(1, 1)).cuda(cuda_device))
        #    g = torch.cat(g, dim=1)
        #    g_batch.append(g)
        
        #t2 = time.time()
        #g = torch.cat(g_batch, dim=0)
        #passage_log_probs = F.log_softmax(g, dim=-1)

        output_dict = {}
        if span_start is not None:
            AP_loss = F.nll_loss(span_start_log_probs, span_start.squeeze(-1)) +\
                F.nll_loss(span_end_log_probs, span_end.squeeze(-1))
            #PR_loss = F.nll_loss(passage_log_probs, correct_passage.squeeze(-1))
            #loss = self._r * AP_loss + self._r * PR_loss
            self._span_start_accuracy(span_start_logits, span_start.squeeze(-1))
            self._span_end_accuracy(span_end_logits, span_end.squeeze(-1))
            self._span_accuracy(best_span, torch.stack([span_start, span_end], -1))
            output_dict['loss'] = AP_loss

        _, max_start = torch.max(span_start_probs, dim=1)
        _, max_end = torch.max(span_end_probs, dim=1)
        #t3 = time.time()
        output_dict['span_start_idx'] = max_start
        output_dict['span_end_idx'] = max_end
        #t4 = time.time()
        #global ITE
        #ITE += 1
        #if (ITE % 100 == 0):
        #    print(" gold %i:%i|predicted %i:%i" %(span_start.squeeze(-1)[0], span_end.squeeze(-1)[0], max_start.data[0], max_end.data[0]))
        if metadata is not None:
             output_dict['best_span_str'] = []
             question_tokens = []
             passage_tokens = []
             for i in range(batch_size): 
                 question_tokens.append(metadata[i]['question_tokens'])
                 passage_tokens.append(metadata[i]['passage_tokens']) 
                 passage_str = metadata[i]['original_passage']
                 offsets = metadata[i]['token_offsets']
                 predicted_span = tuple(best_span[i].data.cpu().numpy())
                 start_offset = offsets[predicted_span[0]][0]
                 end_offset = offsets[predicted_span[1]][1]
                 best_span_string = passage_str[start_offset:end_offset]
                 output_dict['best_span_str'].append(best_span_string)
                 answer_texts = metadata[i].get('answer_texts', [])
                 if answer_texts:
                     self._squad_metrics(best_span_string, answer_texts)
             output_dict['question_tokens'] = question_tokens
             output_dict['passage_tokens'] = passage_tokens
        
        #t5 = time.time()
        #print("Total: %.5f" % (t5-t0))
        #print("Batch processing 1: %.5f" % (t2-t1))
        #print("Batch processing 2: %.5f" % (t4-t3))
        return output_dict
예제 #46
0
num_batch = len(dataset) / opt.batchSize
miou_list = list()
for epoch in range(opt.nepoch):
    for i, data in enumerate(dataloader, 0):
        points, target = data
        points, target = Variable(points), Variable(target)
        points = points.transpose(2, 1)
        points, target = points.cuda(), target.cuda()
        optimizer.zero_grad()
        classifier = classifier.train()
        pred = classifier(points)
        pred = pred.view(-1, num_classes)
        target = target.view(-1, 1)[:, 0] - 1
        #print(pred.size(), target.size())
        loss = F.nll_loss(pred, target)
        loss.backward()
        optimizer.step()
        pred_choice = pred.data.max(1)[1]
        correct = pred_choice.eq(target.data).cpu().sum()
        print('[%d: %d/%d] train loss: %f accuracy: %f' %
              (epoch, i, num_batch, loss.item(),
               correct.item() / float(opt.batchSize * opt.num_points)))

        if i % 100 == 0:

            j, data = next(enumerate(testdataloader, 0))
            points, target = data
            points, target = Variable(points), Variable(target)
            points = points.transpose(2, 1)
            points, target = points.cuda(), target.cuda()
예제 #47
0
def get_cls_loss(pred, label, select):
    if len(select.size()) == 0 or select.size() == torch.Size([0]):
        return 0
    pred = torch.index_select(pred, 0, select)
    label = torch.index_select(label, 0, select)
    return F.nll_loss(pred, label)
예제 #48
0
def mnist_tutorial(nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   train_end=-1, test_end=-1, learning_rate=LEARNING_RATE):
  """
  MNIST cleverhans tutorial
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
  # Train a pytorch MNIST model
  torch_model = PytorchMnistModel()
  if torch.cuda.is_available():
    torch_model = torch_model.cuda()
  report = AccuracyReport()

  train_loader = torch.utils.data.DataLoader(
      datasets.MNIST('data', train=True, download=True,
                     transform=transforms.ToTensor()),
      batch_size=batch_size, shuffle=True)
  test_loader = torch.utils.data.DataLoader(
      datasets.MNIST('data', train=False, transform=transforms.ToTensor()),
      batch_size=batch_size)

  # Truncate the datasets so that our test run more quickly
  train_loader.dataset.train_data = train_loader.dataset.train_data[
      :train_end]
  test_loader.dataset.test_data = test_loader.dataset.test_data[:test_end]

  # Train our model
  optimizer = optim.Adam(torch_model.parameters(), lr=learning_rate)
  train_loss = []

  total = 0
  correct = 0
  step = 0
  for _epoch in range(nb_epochs):
    for xs, ys in train_loader:
      xs, ys = Variable(xs), Variable(ys)
      if torch.cuda.is_available():
        xs, ys = xs.cuda(), ys.cuda()
      optimizer.zero_grad()
      preds = torch_model(xs)
      loss = F.nll_loss(preds, ys)
      loss.backward()  # calc gradients
      train_loss.append(loss.data.item())
      optimizer.step()  # update gradients

      preds_np = preds.data.cpu().numpy()
      correct += (np.argmax(preds_np, axis=1) == ys).sum()
      total += len(xs)
      step += 1
      if total % 1000 == 0:
        acc = float(correct) / total
        print('[%s] Training accuracy: %.2f%%' % (step, acc * 100))
        total = 0
        correct = 0

  # Evaluate on clean data
  total = 0
  correct = 0
  for xs, ys in test_loader:
    xs, ys = Variable(xs), Variable(ys)
    if torch.cuda.is_available():
      xs, ys = xs.cuda(), ys.cuda()

    preds = torch_model(xs)
    preds_np = preds.data.cpu().numpy()

    correct += (np.argmax(preds_np, axis=1) == ys).sum()
    total += len(xs)

  acc = float(correct) / total
  report.clean_train_clean_eval = acc
  print('[%s] Clean accuracy: %.2f%%' % (step, acc * 100))

  # We use tf for evaluation on adversarial data
  sess = tf.Session()
  x_op = tf.placeholder(tf.float32, shape=(None, 1, 28, 28,))

  # Convert pytorch model to a tf_model and wrap it in cleverhans
  tf_model_fn = convert_pytorch_model_to_tf(torch_model)
  cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')

  # Create an FGSM attack
  fgsm_op = FastGradientMethod(cleverhans_model, sess=sess)
  fgsm_params = {'eps': 0.3,
                 'clip_min': 0.,
                 'clip_max': 1.}
  adv_x_op = fgsm_op.generate(x_op, **fgsm_params)
  adv_preds_op = tf_model_fn(adv_x_op)

  # Run an evaluation of our model against fgsm
  total = 0
  correct = 0
  for xs, ys in test_loader:
    adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs})
    correct += (np.argmax(adv_preds, axis=1) == ys).sum()
    total += len(xs)

  acc = float(correct) / total
  print('Adv accuracy: {:.3f}'.format(acc * 100))
  report.clean_train_adv_eval = acc
  return report
    def forward(self,
                question: Dict[str, torch.LongTensor],
                passage: Dict[str, torch.LongTensor],
                list_,
                passages_length: torch.LongTensor = None,
                correct_passage: torch.LongTensor = None,
                span_start: torch.IntTensor = None,
                span_end: torch.IntTensor = None) -> Dict[str, torch.Tensor]:
        # shape: B x N x T x E
        embedded_passage_list = self._embedder(list_)
        # shape: N
        (batch_size, num_passages, max_p, embedding_size) = embedded_passage_list.size()
     
        # shape: B x Tq x E
        embedded_question = self._embedder(question)
        embedded_passage = embedded_passage_list.view(batch_size, -1, embedding_size)
        # embedded_passage = self._embedder(passage)

        # batch_size = embedded_question.size(0)
        total_passage_length = embedded_passage.size(1)

        question_mask = util.get_text_field_mask(question)
        # passage_mask = util.get_text_field_mask(passage)
        passage_list_mask = util.get_text_field_mask(list_, 1)
        passage_mask = passage_list_mask.view(batch_size, -1)

        # shape: B x T x 2H
        encoded_question = self._dropout(self._question_encoder(embedded_question, question_mask))
        encoded_passage = self._dropout(self._passage_encoder(embedded_passage, passage_mask))
        passage_mask = passage_mask.float()
        question_mask = question_mask.float()

        encoding_dim = encoded_question.size(-1)
        #encoded_passage_list = self._dropout(self._passage_encoder(embedded_passage_list, passage_list_mask))

        # shape: B x 2H
        if encoded_passage.is_cuda:
            cuda_device = encoded_passage.get_device()
            gru_hidden = Variable(torch.zeros(batch_size, encoding_dim).cuda(cuda_device))
        else:
            gru_hidden = Variable(torch.zeros(batch_size, encoding_dim))

        question_awared_passage = []
        for timestep in range(total_passage_length):
            u_t_P = encoded_passage[:, timestep, :]
            # shape: B x Tq = attention(B x 2H, B x Tq x 2H)
            attn_weights = self._question_attention_for_passage(encoded_passage[:, timestep, :], encoded_question, question_mask)
            # shape: B x 2H = weighted_sum(B x Tq x 2H, B x Tq)
            attended_question = util.weighted_sum(encoded_question, attn_weights)
            # shape: B x 4H
            passage_question_combined = torch.cat([encoded_passage[:, timestep, :], attended_question], dim=-1)
            # shape: B x 4H
            gate = F.sigmoid(self._gate(passage_question_combined))
            gru_input = gate * passage_question_combined
            # shape: B x 2H
            gru_hidden = self._dropout(self._gru_cell(gru_input, gru_hidden))
            question_awared_passage.append(gru_hidden)

        # shape: B x T x 2H
        # question aware passage representation v_P
        question_awared_passage = torch.stack(question_awared_passage, dim=1)

        # compute question vector r_Q
        # shape: B x T = attention(B x 2H, B x T x 2H)
        v_r_Q_tiled = self._v_r_Q.unsqueeze(0).expand(batch_size, encoding_dim)
        attn_weights = self._question_attention_for_question(v_r_Q_tiled, encoded_question, question_mask)
        # shape: B x 2H
        r_Q = util.weighted_sum(encoded_question, attn_weights)
        # shape: B x T = attention(B x 2H, B x T x 2H)
        span_start_logits = self._passage_attention_for_answer(r_Q, question_awared_passage, passage_mask)
        span_start_logits = util.replace_masked_values(span_start_logits, passage_mask, -1e7)
        span_start_probs = util.masked_softmax(span_start_logits, passage_mask)
        span_start_log_probs = util.masked_log_softmax(span_start_logits, passage_mask)
        # shape: B x 2H
        c_t = util.weighted_sum(question_awared_passage, span_start_probs)
        # shape: B x 2H
        h_1 = self._dropout(self._answer_net(c_t, r_Q))

        span_end_logits = self._passage_attention_for_answer(h_1, question_awared_passage, passage_mask)
        span_end_logits = util.replace_masked_values(span_end_logits, passage_mask, -1e7)
        span_end_probs = util.masked_softmax(span_end_logits, passage_mask)
        span_end_log_probs = util.masked_log_softmax(span_end_logits, passage_mask)

        #num_passages = passages_length.size(1)
        #cum_passages = torch.cumsum(passages_length, dim=1)
        g = []
        for i in range(num_passages):
            attn_weights = self._passage_attention_for_ranking(r_Q, question_awared_passage[:, i*max_p: (i + 1)*max_p, :], passage_mask[:, i*max_p: (i + 1)*max_p])
            r_P = util.weighted_sum(question_awared_passage[:, i*max_p: (i + 1)*max_p, :], attn_weights)
            question_passage_combined = torch.cat([r_Q, r_P], dim=-1)
            gi = self._dropout(self._match_layer_2(F.tanh(self._match_layer_1(question_passage_combined))))
            g.append(gi)
        # compute r_P
        # shape: B x T = attention(B x 2H, B x T x 2H)
        #attn_weights = self._passage_attention_for_ranking(r_Q, question_awared_passage, passage_mask)
        # shape: B x 2H
        #r_P = util.weighted_sum(question_awared_passage, attn_weights)
        # shape: B x 4H
        #question_passage_combined = torch.cat([r_Q, r_P], dim=-1)
        # shape: B x 10
        #g = self._dropout(self._match_layer_2(F.tanh(self._match_layer_1(question_passage_combined))))
        #cum_passages = torch.cumsum(passages_length, dim=1)
        #for b in range(batch_size):
        #    for i in range(num_passages):
        #        attn_weights = self._passage_attention_for_ranking(r_Q[b], question_awared_passage
        
        padded_span_start = span_start.clone()
        padded_span_end = span_end.clone()
        cumsum = torch.cumsum(passage_mask.long(), dim=1)
        for b in range(batch_size):
             padded_span_start[b] = (cumsum[b] == span_start[b] + 1).nonzero()[0][0]
             padded_span_end[b] = (cumsum[b] == span_end[b] + 1).nonzero()[0][0]
            
        g = torch.cat(g, dim=1)
        passage_log_probs = F.log_softmax(g, dim=-1)

        output_dict = {}
        if span_start is not None:
            AP_loss = F.nll_loss(span_start_log_probs, padded_span_start.squeeze(-1)) +\
                F.nll_loss(span_end_log_probs, padded_span_end.squeeze(-1))
            PR_loss = F.nll_loss(passage_log_probs, correct_passage.squeeze(-1))
            loss = self._r * AP_loss + self._r * PR_loss
            output_dict['loss'] = loss

        _, max_start = torch.max(span_start_probs, dim=1)
        _, max_end = torch.max(span_end_probs, dim=1)
        #max_start = max_start.cpu().data[0]
        #max_end = max_end.cpu().data[0]
        #unpad
        for b in range(batch_size):
            max_start.data[b] = cumsum.data[b, max_start.data[b]] - 1
            max_end.data[b] = cumsum.data[b, max_end.data[b]] - 1
        output_dict['span_start_idx'] = max_start
        output_dict['span_end_idx'] = max_end

        self._num_iter += 1
        if (self._num_iter % 50 == 0):
            print(" gold %i:%i|predicted %i:%i" %(span_start.squeeze(-1)[0], span_end.squeeze(-1)[0], max_start.cpu().data[0], max_end.cpu().data[0]))

        return output_dict
    def test(self):
        print("Starting attack on", self.modelName, "...")
        # Accuracy counter
        allAdversarialExamples = []
        allAccuracies = []

        for epsilon in self.epsilons:
            correct = 0
            loss = 0
            currentExamples = []
            # Loop over all examples in test set
            for batch in tqdm(self.model.testDL):

                # Get image data and true classification from batch
                data, target = batch

                # Send the data and label to the device
                data, target = data.to(self.device), target.to(self.device)

                # Set requires_grad attribute of tensor. Important for Attack
                data.requires_grad = True

                # Forward pass the data through the model
                probabilities, prediction = self.predict(data)

                # Calculate the loss
                currentLoss = F.nll_loss(probabilities, target)

                # Zero all existing gradients
                self.model.zero_grad()

                # Calculate gradients of model in backward pass
                currentLoss.backward(retain_graph=True)

                # Call FGSM Attack
                dataGrad = data.grad
                perturbedData = self.attack(data, dataGrad, epsilon)

                # Re-classify the perturbed image
                probabilities, finalPred = self.predict(perturbedData)

                for i in range(len(data)):

                    # If initial prediction was incorrect, skip image
                    if prediction[i].item() != target[i].item():
                        continue

                    # Save adversarial example
                    adv_ex = perturbedData.squeeze().detach().cpu().numpy()
                    npProb = probabilities.detach().numpy()

                    # Get one image from batch
                    if self.model.batchSize == 64:
                        adv_ex = adv_ex[i]
                        npProb = npProb[i]
                    # Check for success
                    if finalPred[i].item() == target[i].item():
                        correct += 1
                        # Special case for saving 0 epsilon examples
                        if (epsilon == 0) and (len(currentExamples) < 5):
                            currentExamples.append((prediction[i].item(), finalPred[i].item(), adv_ex, npProb))
                    else:
                        # Save some adv examples for visualization later
                        if len(currentExamples) < 5:
                            currentExamples.append((prediction[i].item(), finalPred[i].item(), adv_ex, npProb))

            # Calculate final accuracy for this epsilon
            accuracy = correct / 10000
            print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct,
                                                                     10000,
                                                                     accuracy))
            # Sleep for console output
            sleep(0.1)
            # Append results from current epsilon to output
            allAccuracies.append(accuracy)
            allAdversarialExamples.append(currentExamples)
        return allAccuracies, allAdversarialExamples
예제 #51
0
    def train(self,
              model,
              optimizer,
              data_loader,
              batch_size,
              num_epochs,
              batches_per_epoch,
              save_every,
              print_every,
              check_every,
              seeds,
              test_max_len=50,
              test_temperature=1.0):
        num_tokens = data_loader.get_vocab_size()

        for ind_epoch in range(num_epochs):
            epoch_start = datetime.datetime.now()
            epoch_losses = []
            for ind_batch in range(batches_per_epoch):
                batch = data_loader.get_random_train_batch(batch_size)

                model.zero_grad()
                probas, _ = model(batch)

                loss = F.nll_loss(
                    probas[:, :-1].contiguous().view(-1, num_tokens),
                    batch[:, 1:].contiguous().view(-1))

                epoch_losses.append(loss.item())

                loss.backward()

                optimizer.step()

            epoch_loss = np.mean(epoch_losses)
            validation_loss = self.calc_validation_loss(
                model, data_loader, batch_size)
            optimizer.update(validation_loss)

            if ind_epoch % save_every == 0:
                self.save_checkpoint(ind_epoch, data_loader, model, optimizer)

            if ind_epoch % print_every == 0:
                epoch_seconds = round(
                    (datetime.datetime.now() - epoch_start).total_seconds(), 2)

                print("Epoch:", ind_epoch + 1, "lr: %.2E" % optimizer.get_lr(),
                      "seconds:", epoch_seconds, "train loss:", epoch_loss,
                      "valid loss:", validation_loss)

            if ind_epoch % check_every == 0:
                for seed in seeds:
                    out = self.generate_sample(model, data_loader, seed,
                                               test_max_len, test_temperature)

                    out = re.sub("_PAD_", "", out).strip()
                    out = re.sub("_SEP_", " # " * 10, out)

                    print(out)
                    print()

                print()
예제 #52
0
def train_adv(args, epoch, model, trainLoader, optimizer, trainF, config,
              scheduler):
    model.train()
    attack = FGSM_Attack(model, F.nll_loss)
    nProcessed = 0
    nTrain = len(trainLoader.dataset)
    # nIter_per_epoch = nTrain // batch_size
    # dice_loss = dloss.DiceLoss(nclass=2)
    for batch_idx, (data, target) in enumerate(trainLoader):
        if args.cuda:
            data, target = data.cuda(), target.type(torch.LongTensor).cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()

        _, output_adv, output = attack.fgsm(data,
                                            target,
                                            softmax=F.log_softmax)

        output = output.permute(0, 2, 3, 4, 1).contiguous()
        output = output.view(output.numel() // 3, 3)  # 3 labels
        # output = F.log_softmax(output, dim=-1)  # dim marked
        output_adv = output_adv.permute(0, 2, 3, 4, 1).contiguous()
        output_adv = output_adv.view(output.numel() // 3, 3)  # 3 labels

        target = target.view(target.numel())

        # add CrossEntropyLoss
        loss = F.nll_loss(output, target)
        adv_loss = F.nll_loss(output_adv, target)

        # loss.backward() becomes:
        # with amp.scale_loss(loss, optimizer) as scaled_loss:
        #     scaled_loss.backward()

        loss.backward()
        adv_loss.backward()
        optimizer.step()
        # update learning rate
        scheduler(optimizer, i=batch_idx, epoch=epoch)

        nProcessed += len(data)

        # get the index of the max log-probability
        pred = torch.argmax(output, dim=-1)
        # print(output.size(), pred.size(), target.size())
        dice = evaluate_dice(pred, target, cpu=True)

        incorrect = pred.ne(target.data).cpu().sum()
        partialEpoch = (int)(epoch + batch_idx / len(trainLoader))
        loss_data = loss.detach().data.cpu().numpy()
        adv_loss_data = adv_loss.detach().data.cpu().numpy()
        print(
            'Train Epoch: {} [{}/{} ({:.0f}%)], Loss: {:.4f}, Kidney_Dice: {:.6f}, Tumor_Dice:{:.6}, Adv_loss: {:.4f}'
            .format(partialEpoch, nProcessed, nTrain,
                    100. * batch_idx / len(trainLoader), loss_data, dice[0],
                    dice[1], adv_loss_data))
        #
        trainF.write('{},{},{},{},{}\n'.format(partialEpoch, loss_data,
                                               dice[0], dice[1],
                                               adv_loss_data))
        trainF.flush()
예제 #53
0
    def forward(
            self,  # type: ignore
            question: Dict[str, torch.LongTensor],
            passage: Dict[str, torch.LongTensor],
            span_start: torch.IntTensor = None,
            span_end: torch.IntTensor = None,
            metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        question : Dict[str, torch.LongTensor]
            From a ``TextField``.
        passage : Dict[str, torch.LongTensor]
            From a ``TextField``.  The model assumes that this passage contains the answer to the
            question, and predicts the beginning and ending positions of the answer within the
            passage.
        span_start : ``torch.IntTensor``, optional
            From an ``IndexField``.  This is one of the things we are trying to predict - the
            beginning position of the answer with the passage.  This is an `inclusive` token index.
            If this is given, we will compute a loss that gets included in the output dictionary.
        span_end : ``torch.IntTensor``, optional
            From an ``IndexField``.  This is one of the things we are trying to predict - the
            ending position of the answer with the passage.  This is an `inclusive` token index.
            If this is given, we will compute a loss that gets included in the output dictionary.
        metadata : ``List[Dict[str, Any]]``, optional
            If present, this should contain the question ID, original passage text, and token
            offsets into the passage for each instance in the batch.  We use this for computing
            official metrics using the official SQuAD evaluation script.  The length of this list
            should be the batch size, and each dictionary should have the keys ``id``,
            ``original_passage``, and ``token_offsets``.  If you only want the best span string and
            don't care about official metrics, you can omit the ``id`` key.

        Returns
        -------
        An output dictionary consisting of:
        span_start_logits : torch.FloatTensor
            A tensor of shape ``(batch_size, passage_length)`` representing unnormalized log
            probabilities of the span start position.
        span_start_probs : torch.FloatTensor
            The result of ``softmax(span_start_logits)``.
        span_end_logits : torch.FloatTensor
            A tensor of shape ``(batch_size, passage_length)`` representing unnormalized log
            probabilities of the span end position (inclusive).
        span_end_probs : torch.FloatTensor
            The result of ``softmax(span_end_logits)``.
        best_span : torch.IntTensor
            The result of a constrained inference over ``span_start_logits`` and
            ``span_end_logits`` to find the most probable span.  Shape is ``(batch_size, 2)``
            and each offset is a token index.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.
        best_span_str : List[str]
            If sufficient metadata was provided for the instances in the batch, we also return the
            string from the original passage that the model thinks is the best answer to the
            question.
        """

        embedded_question = self._highway_layer(
            self._text_field_embedder(question))
        embedded_passage = self._highway_layer(
            self._text_field_embedder(passage))
        batch_size = embedded_question.size(0)
        passage_length = embedded_passage.size(1)
        question_mask = util.get_text_field_mask(question).float()
        passage_mask = util.get_text_field_mask(passage).float()
        question_lstm_mask = question_mask if self._mask_lstms else None
        passage_lstm_mask = passage_mask if self._mask_lstms else None

        encoded_question = self._dropout(
            self._phrase_layer(embedded_question, question_lstm_mask))
        encoded_passage = self._dropout(
            self._phrase_layer(embedded_passage, passage_lstm_mask))
        encoding_dim = encoded_question.size(-1)

        #New Question SA encoding
        sa_encoded_question = self._self_attention_layer(
            embedded_question, question_lstm_mask)
        sa_encoded_passage = self._self_attention_layer(
            embedded_passage, passage_lstm_mask)

        # Shape: (batch_size, passage_length, question_length)
        passage_question_similarity = self._matrix_attention(
            encoded_passage, encoded_question)
        sa_passage_question_similarity = self._sa_matrix_attention(
            sa_encoded_passage, sa_encoded_question)

        # Shape: (batch_size, passage_length, question_length)
        #print(passage_question_similarity.size())
        question_mask_temp = question_mask
        question_mask_temp = question_mask_temp.unsqueeze_(1)
        # print(question_mask_temp.size())

        # print(question_mask_temp.size())
        passage_question_attention = util.masked_softmax(
            passage_question_similarity, question_mask_temp)
        sa_passage_question_attention = util.masked_softmax(
            sa_passage_question_similarity, question_mask_temp)

        # Shape: (batch_size, passage_length, encoding_dim)
        passage_question_vectors = util.weighted_sum(
            encoded_question, passage_question_attention)
        sa_passage_question_vectors = util.weighted_sum(
            sa_encoded_question, sa_passage_question_attention)

        # We replace masked values with something really negative here, so they don't affect the
        # max below.
        masked_similarity = util.replace_masked_values(
            passage_question_similarity, question_mask, -1e7)
        # Shape: (batch_size, passage_length)
        question_passage_similarity = masked_similarity.max(
            dim=-1)[0].squeeze(-1)
        # Shape: (batch_size, passage_length)
        question_passage_attention = util.masked_softmax(
            question_passage_similarity, passage_mask)
        # Shape: (batch_size, encoding_dim)
        question_passage_vector = util.weighted_sum(
            encoded_passage, question_passage_attention)
        # Shape: (batch_size, passage_length, encoding_dim)
        tiled_question_passage_vector = question_passage_vector.unsqueeze(
            1).expand(batch_size, passage_length, encoding_dim)

        #print("Shape of SA Encoded:",sa_encoded_question.size(),sa_encoded_passage.size())
        #print("Required Shape of Encoded Passage:",encoded_passage.size(),passage_question_vectors.size())

        # Shape: (batch_size, passage_length, encoding_dim * 4 + 2*sa_dim )
        final_merged_passage = torch.cat([
            encoded_passage, sa_encoded_passage, sa_passage_question_vectors,
            passage_question_vectors,
            encoded_passage * passage_question_vectors,
            encoded_passage * tiled_question_passage_vector
        ],
                                         dim=-1)

        modeled_passage = self._dropout(
            self._modeling_layer(final_merged_passage, passage_lstm_mask))
        modeling_dim = modeled_passage.size(-1)

        # Shape: (batch_size, passage_length, encoding_dim * 4 + modeling_dim + 2*selfattention_dim))
        span_start_input = self._dropout(
            torch.cat([final_merged_passage, modeled_passage], dim=-1))
        # Shape: (batch_size, passage_length)
        span_start_logits = self._span_start_predictor(
            span_start_input).squeeze(-1)
        # Shape: (batch_size, passage_length)
        span_start_probs = util.masked_softmax(span_start_logits, passage_mask)

        # Shape: (batch_size, modeling_dim)
        span_start_representation = util.weighted_sum(modeled_passage,
                                                      span_start_probs)
        # Shape: (batch_size, passage_length, modeling_dim)
        tiled_start_representation = span_start_representation.unsqueeze(
            1).expand(batch_size, passage_length, modeling_dim)

        # Shape: (batch_size, passage_length, encoding_dim * 4 + modeling_dim * 3)
        span_end_representation = torch.cat([
            final_merged_passage, modeled_passage, tiled_start_representation,
            modeled_passage * tiled_start_representation
        ],
                                            dim=-1)
        # Shape: (batch_size, passage_length, encoding_dim)
        encoded_span_end = self._dropout(
            self._span_end_encoder(span_end_representation, passage_lstm_mask))
        # Shape: (batch_size, passage_length, encoding_dim * 4 + span_end_encoding_dim)
        span_end_input = self._dropout(
            torch.cat([final_merged_passage, encoded_span_end], dim=-1))
        span_end_logits = self._span_end_predictor(span_end_input).squeeze(-1)
        span_end_probs = util.masked_softmax(span_end_logits, passage_mask)
        span_start_logits = util.replace_masked_values(span_start_logits,
                                                       passage_mask, -1e7)
        span_end_logits = util.replace_masked_values(span_end_logits,
                                                     passage_mask, -1e7)
        best_span = self.get_best_span(span_start_logits, span_end_logits)

        span_start_logits_do = self._dropout(span_start_logits)
        na_logits_start = self._na_dense(passage_length)(span_start_logits_do)
        span_end_logits_do = self._dropout(span_end_logits)
        na_logits_end = self._na_dense(passage_length)(span_end_logits_do)
        na_logits = Softmax(dim=1)(na_logits_start) * Softmax(
            dim=1)(na_logits_end)
        na_probs = Softmax(dim=1)(na_logits)
        na_gt = (span_start == -1)
        na_inv = (1.0 - na_gt)

        output_dict = {
            "passage_question_attention": passage_question_attention,
            "span_start_logits": span_start_logits,
            "span_start_probs": span_start_probs,
            "span_end_logits": span_end_logits,
            "span_end_probs": span_end_probs,
            "best_span": best_span,
            "na_logits": na_logits,
            "na_probs": na_probs
        }

        # Compute the loss for training.
        if span_start is not None:
            loss = 0.0

            # calculate loss for answer existance
            loss += CrossEntropyLoss()(na_probs.type(torch.cuda.FloatTensor),
                                       na_gt.squeeze(-1).type(
                                           torch.cuda.LongTensor))
            self._na_accuracy(na_probs.type(torch.cuda.FloatTensor),
                              na_gt.squeeze(-1).type(torch.cuda.FloatTensor))

            # calculate loss if there is answer
            # loss for start
            preds_start = (
                na_inv.type(torch.cuda.FloatTensor) * util.masked_log_softmax(
                    span_start_logits.type(torch.cuda.FloatTensor),
                    passage_mask.type(torch.cuda.FloatTensor))).type(
                        torch.cuda.FloatTensor)
            y_start = (
                na_inv.squeeze(-1).type(torch.cuda.ByteTensor) *
                span_start.squeeze(-1).type(torch.cuda.ByteTensor)).type(
                    torch.cuda.LongTensor)
            loss += nll_loss(preds_start, y_start)

            # accuracy for start
            acc_p_start = na_inv.type(
                torch.cuda.FloatTensor) * span_start_logits.type(
                    torch.cuda.FloatTensor)
            acc_y_start = na_inv.squeeze(-1).type(
                torch.cuda.FloatTensor) * span_start.squeeze(-1).type(
                    torch.cuda.FloatTensor)
            self._span_start_accuracy(acc_p_start, acc_y_start)

            # loss for end
            preds_end = (na_inv.type(torch.cuda.FloatTensor) *
                         util.masked_log_softmax(
                             span_end_logits.type(torch.cuda.FloatTensor),
                             passage_mask.type(torch.cuda.FloatTensor))).type(
                                 torch.cuda.FloatTensor)
            y_end = (na_inv.squeeze(-1).type(torch.cuda.ByteTensor) *
                     span_end.squeeze(-1).type(torch.cuda.ByteTensor)).type(
                         torch.cuda.LongTensor)
            loss += nll_loss(preds_end, y_end)

            # accuracy for end
            acc_p_end = na_inv.type(
                torch.cuda.FloatTensor) * span_end_logits.type(
                    torch.cuda.FloatTensor)
            acc_y_end = na_inv.squeeze(-1).type(
                torch.cuda.FloatTensor) * span_end.squeeze(-1).type(
                    torch.cuda.FloatTensor)
            self._span_end_accuracy(acc_p_end, acc_y_end)

            # accuracy for span
            acc_p = na_inv.type(torch.cuda.FloatTensor) * best_span.type(
                torch.cuda.FloatTensor)
            acc_y = na_inv.type(torch.cuda.FloatTensor) * torch.cat([
                span_start.type(torch.cuda.FloatTensor),
                span_end.type(torch.cuda.FloatTensor)
            ], -1)
            self._span_accuracy(acc_p, acc_y)

            output_dict["loss"] = loss

        # Compute the EM and F1 on SQuAD and add the tokenized input to the output.
        if metadata is not None:
            output_dict['best_span_str'] = []
            question_tokens = []
            passage_tokens = []
            for i in range(batch_size):
                question_tokens.append(metadata[i]['question_tokens'])
                passage_tokens.append(metadata[i]['passage_tokens'])
                passage_str = metadata[i]['original_passage']
                offsets = metadata[i]['token_offsets']
                predicted_span = tuple(best_span[i].detach().cpu().numpy())
                start_offset = offsets[predicted_span[0]][0]
                end_offset = offsets[predicted_span[1]][1]
                best_span_string = passage_str[start_offset:end_offset]
                output_dict['best_span_str'].append(best_span_string)
                answer_texts = metadata[i].get('answer_texts', [])
                if answer_texts:
                    self._squad_metrics(best_span_string, answer_texts)
            output_dict['question_tokens'] = question_tokens
            output_dict['passage_tokens'] = passage_tokens
        return output_dict
def train(dataset,
          dataset_folder,
          task,
          number_of_points,
          batch_size,
          epochs,
          learning_rate,
          output_folder,
          number_of_workers,
          model_checkpoint):
    train_dataset = DATASETS[dataset](dataset_folder,
                                      task=task,
                                      number_of_points=number_of_points)
    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   num_workers=number_of_workers)
    test_dataset = DATASETS[dataset](dataset_folder,
                                     task=task,
                                     train=False,
                                     number_of_points=number_of_points)
    test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  num_workers=number_of_workers)

    if task == 'classification':
        model = ClassificationPointNet(num_classes=train_dataset.NUM_CLASSIFICATION_CLASSES,
                                       point_dimension=train_dataset.POINT_DIMENSION)
    elif task == 'segmentation':
        model = SegmentationPointNet(num_classes=train_dataset.NUM_SEGMENTATION_CLASSES,
                                     point_dimension=train_dataset.POINT_DIMENSION)
    else:
        raise Exception('Unknown task !')

    if torch.cuda.is_available():
        model.cuda()
    if model_checkpoint:
        model.load_state_dict(torch.load(model_checkpoint))

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    mb = master_bar(range(epochs))

    if not os.path.isdir(output_folder):
        os.mkdir(output_folder)

    with open(os.path.join(output_folder, 'training_log.csv'), 'w+') as fid:
        fid.write('train_loss,test_loss,train_accuracy,test_accuracy\n')

    train_loss = []
    test_loss = []
    train_acc = []
    test_acc = []
    for epoch in mb:
        epoch_train_loss = []
        epoch_train_acc = []
        batch_number = 0
        for data in progress_bar(train_dataloader, parent=mb):
            batch_number += 1
            points, targets = data
            if torch.cuda.is_available():
                points, targets = points.cuda(), targets.cuda()
            if points.shape[0] <= 1:
                continue
            optimizer.zero_grad()
            model = model.train()
            preds, feature_transform = model(points)
            if task == 'segmentation':
                preds = preds.view(-1, train_dataset.NUM_SEGMENTATION_CLASSES)
                targets = targets.view(-1)

            identity = torch.eye(feature_transform.shape[-1])
            if torch.cuda.is_available():
                identity = identity.cuda()
            regularization_loss = torch.norm(
                identity - torch.bmm(feature_transform, feature_transform.transpose(2, 1))
            )
            loss = F.nll_loss(preds, targets) + 0.001 * regularization_loss
            epoch_train_loss.append(loss.cpu().item())
            loss.backward()
            optimizer.step()
            preds = preds.data.max(1)[1]
            corrects = preds.eq(targets.data).cpu().sum()
            if task == 'classification':
                accuracy = corrects.item() / float(batch_size)
            elif task == 'segmentation':
                accuracy = corrects.item() / float(batch_size*number_of_points)
            epoch_train_acc.append(accuracy)
            mb.child.comment = 'train loss: %f, train accuracy: %f' % (np.mean(epoch_train_loss),
                                                                       np.mean(epoch_train_acc))
        epoch_test_loss = []
        epoch_test_acc = []
        for batch_number, data in enumerate(test_dataloader):
            points, targets = data
            if torch.cuda.is_available():
                points, targets = points.cuda(), targets.cuda()
            model = model.eval()
            preds, feature_transform = model(points)
            if task == 'segmentation':
                preds = preds.view(-1, train_dataset.NUM_SEGMENTATION_CLASSES)
                targets = targets.view(-1)
            loss = F.nll_loss(preds, targets)
            epoch_test_loss.append(loss.cpu().item())
            preds = preds.data.max(1)[1]
            corrects = preds.eq(targets.data).cpu().sum()
            if task == 'classification':
                accuracy = corrects.item() / float(batch_size)
            elif task == 'segmentation':
                accuracy = corrects.item() / float(batch_size*number_of_points)
            epoch_test_acc.append(accuracy)

        mb.write('Epoch %s: train loss: %s, val loss: %f, train accuracy: %s,  val accuracy: %f'
                 % (epoch,
                    np.mean(epoch_train_loss),
                    np.mean(epoch_test_loss),
                    np.mean(epoch_train_acc),
                    np.mean(epoch_test_acc)))
        if test_acc and np.mean(epoch_test_acc) > np.max(test_acc):
            torch.save(model.state_dict(), os.path.join(output_folder, 'shapenet_%s_model.pth' % task))

        with open(os.path.join(output_folder, 'training_log.csv'), 'a') as fid:
            fid.write('%s,%s,%s,%s,%s\n' % (epoch,
                                            np.mean(epoch_train_loss),
                                            np.mean(epoch_test_loss),
                                            np.mean(epoch_train_acc),
                                            np.mean(epoch_test_acc)))
        train_loss.append(np.mean(epoch_train_loss))
        test_loss.append(np.mean(epoch_test_loss))
        train_acc.append(np.mean(epoch_train_acc))
        test_acc.append(np.mean(epoch_test_acc))

    plot_losses(train_loss, test_loss, save_to_file=os.path.join(output_folder, 'loss_plot.png'))
    plot_accuracies(train_acc, test_acc, save_to_file=os.path.join(output_folder, 'accuracy_plot.png'))
예제 #55
0
파일: model.py 프로젝트: vyap2000/DrQA
            inputs = [
                e if e is None else Variable(e.cuda(async=True))
                for e in ex[:5]
            ]
            target_s = Variable(ex[5].cuda(async=True))
            target_e = Variable(ex[6].cuda(async=True))
        else:
            inputs = [e if e is None else Variable(e) for e in ex[:5]]
            target_s = Variable(ex[5])
            target_e = Variable(ex[6])

        # Run forward
        score_s, score_e = self.network(*inputs)

        # Compute loss and accuracies
        loss = F.nll_loss(score_s, target_s) + F.nll_loss(score_e, target_e)

        # Clear gradients and run backward
        self.optimizer.zero_grad()
        loss.backward()

        # Clip gradients
        torch.nn.utils.clip_grad_norm(self.network.parameters(),
                                      self.args.grad_clipping)

        # Update parameters
        self.optimizer.step()
        self.updates += 1

        # Reset any partially fixed parameters (e.g. rare words)
        self.reset_parameters()
예제 #56
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)
        
        #print ('in models: x size ', x.size())
        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = prediction[..., 5:]  # Cls pred.
        if self.loss_type=="bce":
            pred_cls = torch.sigmoid(pred_cls)
        elif self.loss_type=="hierarchical_loss":
            pred_cls = self.logsoftmax(pred_cls)
        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )
            if iou_scores is None or class_mask is None or obj_mask is None or noobj_mask is None or tx is None or ty is None or tw is None or th is None or tcls is None:
                print ('Exception in build targets')
                return None, None
            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            try:
                loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
                loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
                loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
                loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
                loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
                loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
                loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
                if self.loss_type=="bce":
                   loss_cls = self.bce_loss(pred_cls[obj_mask],  tcls[obj_mask])
                elif self.loss_type=="ce":
                   loss_cls = self.ce_loss(pred_cls[obj_mask],  torch.argmax(tcls, 4)[obj_mask])
                elif self.loss_type=="hierarchical_ce":
                    pred_cls_obj_mask = pred_cls[obj_mask]
                    pred_cls_obj_mask_level2 = pred_cls_obj_mask[..., self.class_hierarchy[:,0]]
                    pred_cls_obj_mask_level1 = pred_cls_obj_mask[..., self.class_hierarchy[:,1]]
                    pred_cls_obj_mask = pred_cls_obj_mask_level2 + pred_cls_obj_mask_level1
                    loss_cls = F.nll_loss(pred_cls_obj_mask, torch.argmax(tcls, 4)[obj_mask])
                total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
                if math.isnan(total_loss):
                   return None, None
            except:
                print ('Exception in loss computation')
                return None, None
            # Metrics
            #print ('class_mask[obj_mask] ', class_mask[obj_mask])
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
예제 #57
0
def train():
    model.train()
    optimizer.zero_grad()
    F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
    optimizer.step()
 def expected_loss(self, target, forward_result):
     (a2, logprobs_out) = forward_result
     return F.nll_loss(logprobs_out, target)
예제 #59
0
def train(epoch):
    n_iter = 0
    correct = 0
    network.train()

    # set optimiser to only calculate gradients for parameters to be updated
    optimizer1 = optim.SGD([{
        'params': network.conv1.parameters()
    }, {
        'params': network.conv2.parameters()
    }],
                           lr=learning_rate,
                           momentum=momentum)
    optimizer2 = optim.SGD([{
        'params': network.fc1.parameters()
    }, {
        'params': network.fc2.parameters()
    }],
                           lr=learning_rate,
                           momentum=momentum)

    for batch_idx, (data, target) in enumerate(train_loader):
        for i in range(2):  # for each of the loss functions
            n_iter += 1
            if (i == 0):
                network.conv1.weight.requires_grad_ = True
                #network.pool.weight.requires_grad_ = True
                network.conv2.weight.requires_grad_ = True
                network.fc1.weight.requires_grad_ = False
                network.fc2.weight.requires_grad_ = False
                network.fc3.weight.requires_grad_ = False

                network.conv1.bias.requires_grad_ = True
                #network.pool.bias.requires_grad_ = True
                network.conv2.bias.requires_grad_ = True
                network.fc1.bias.requires_grad_ = False
                network.fc2.bias.requires_grad_ = False
                network.fc3.bias.requires_grad_ = False

            else:
                network.conv1.weight.requires_grad_ = False
                #network.pool.weight.requires_grad_ = False
                network.conv2.weight.requires_grad_ = False
                network.fc1.weight.requires_grad_ = True
                network.fc2.weight.requires_grad_ = True
                network.fc3.weight.requires_grad_ = True

                network.conv1.bias.requires_grad_ = False
                # network.pool.bias.requires_grad_ = False
                network.conv2.bias.requires_grad_ = False
                network.fc1.bias.requires_grad_ = True
                network.fc2.bias.requires_grad_ = True
                network.fc3.bias.requires_grad_ = True

            preds, out = network.forward(data)  # Forward propagation

            if (i == 0):
                optimizer1.zero_grad()  # zero out gradients
                layers = [
                    data.data for data in network.parameters()
                ]  #Get weights matrices and biases of network as a list
                hi = torch.cat([layers[6], layers[7].unsqueeze(1)], 1)
                ones = torch.ones(target.numel()).unsqueeze(1)
                xj = torch.cat([out, ones], 1)
                loss = fp_loss(hi, xj)
                train_losses_fp.append(loss.item())  #Add loss to the list
                loss.backward()  # back propagate loss
                optimizer1.step()  # adjust weights matrices
            else:
                optimizer2.zero_grad()
                loss = F.nll_loss(preds, target)
                train_losses_ce.append(loss.item())  #Add loss to the list
                loss.backward()  # back propagate loss
                optimizer2.step()  # adjust weights matrices

            pred = preds.data.max(1, keepdim=True)[1]
            correct = pred.eq(target.data.view_as(pred)).sum()
            train_accuracy = correct, 64, 100. * (correct / 64)
            train_accuracies.append(train_accuracy)

            if batch_idx % log_interval == 0:
                if i == 0:
                    string = "FP"
                else:
                    string = "CE"
                print(
                    string,
                    'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        epoch, batch_idx * len(data),
                        len(train_loader.dataset),
                        100. * batch_idx / len(train_loader), loss.item()))
                pred = preds.data.max(1, keepdim=True)[1]
                correct = pred.eq(target.data.view_as(pred)).sum()
                train_accuracy = correct, 64, 100. * (correct / 64)
                print(
                    string, 'Train set: Accuracy: {}/{} ({:.0f}%)\n'.format(
                        correct, 64, 100. * correct / 64))
예제 #60
0
    cudnn.benchmark = True
optimizer = optim.Adam(model.parameters())

print("Model Training Starts:")
for epochs in range(num_echos):
    #training part
    print("Training Epochs ", epochs)
    model.train()
    train_accu = []
    for index, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        output = model(data)
        optimizer.zero_grad()
        loss = F.nll_loss(output, target)
        loss.backward()
        if (epochs > 5):
            for group in optimizer.param_groups:
                for p in group['params']:
                    state = optimizer.state[p]
                    if (state['step'] >= 1024):
                        state['step'] = 1000
        optimizer.step()
        prediction = output.data.max(1)[1]
        accuracy = (float(prediction.eq(target.data).sum()) /
                    float(batch_size_train)) * 100.0
        train_accu.append(accuracy)
        #if(index%100 == 1):
        #    print("Step:", index, " Training Accuracy: ",accuracy)
    accu_train = np.mean(train_accu)