Ejemplo n.º 1
0
def train(model, device, train_loader, optimizer, epoch):
    """Train for one epoch on the training set"""
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        # compute output
        output = model(data)
        loss = F.nll_loss(output, target)

        # measure accuracy and record loss
        prec1 = accuracy(output, target, topk=(1,))[0]
        losses.update(loss.item(), data.size(0))
        top1.update(prec1.item(), data.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_idx % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch, batch_idx, len(train_loader), loss=losses, top1=top1))
Ejemplo n.º 2
0
def test(model, device, test_loader, epoch):
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    for batch_idx, (data, target) in enumerate(test_loader):
        data, target = data.to(device), target.to(device)

        # compute output
        with torch.no_grad():
            output = model(data)
        loss = F.nll_loss(output, target)

        # measure accuracy and record loss
        prec1 = accuracy(output, target, topk=(1,))[0]
        losses.update(loss.item(), data.size(0))
        top1.update(prec1.item(), data.size(0))

        if batch_idx % args.print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      batch_idx, len(test_loader), loss=losses,
                      top1=top1))

    print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1))
    return top1.avg
Ejemplo n.º 3
0
def train(epoch):
    model.train()
    train_loss = 0

    start_time = datetime.datetime.now()
    prefix = 'vanila'

    for batch_idx, (data, labels) in enumerate(train_loader):
        data = data.to(device)
        # print(labels)
        labels = classes_to_one_hot(labels)
        # print(labels)
        labels = labels.to(device)
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(data)
        loss = loss_function(recon_batch, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.item() / len(data)))

    print('====> Epoch: {} Average loss: {:.4f}'.format(
          epoch, train_loss / len(train_loader.dataset)))
    torch.save(model, f'checkpoints/{prefix}_{str(start_time)}_{epoch}.pt')
Ejemplo n.º 4
0
def test(model, test_loader, device):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()  # sum up batch loss
            pred = output.max(1, keepdim=True)[1]  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    logger.info('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
Ejemplo n.º 5
0
def test(epoch):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for i, (data, _) in enumerate(test_loader):
            data = data.to(device)
            recon_batch, mu, logvar = model(data)
            test_loss += loss_function(recon_batch, data, mu, logvar).item()
            if i == 0:
                n = min(data.size(0), 8)
                comparison = torch.cat([data[:n],
                                      recon_batch.view(args.batch_size, 1, 28, 28)[:n]])
                save_image(comparison.cpu(),
                         'results/reconstruction_' + str(epoch) + '.png', nrow=n)

    test_loss /= len(test_loader.dataset)
    print('====> Test set loss: {:.4f}'.format(test_loss))
Ejemplo n.º 6
0
def train_transient(model, device, train_loader, optimizer, epoch, track=False):
    """Train for one epoch on the training set"""
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()
    epoch_stats = []

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        # compute output
        output = model(data)
        losses_ = F.nll_loss(output, target, reduction='none')

        if track:
            indices = [batch_idx*train_loader.batch_size + i for i in range(len(data))]
            batch_stats = []
            for i, l in zip(indices, losses_):
                batch_stats.append([i, l.item()])
            epoch_stats.append(batch_stats)

        loss = losses_.mean()

        # measure accuracy and record loss
        prec1 = accuracy(output, target, topk=(1,))[0]
        losses.update(loss.item(), data.size(0))
        top1.update(prec1.item(), data.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_idx % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch, batch_idx, len(train_loader), loss=losses, top1=top1))
    if track:
        return epoch_stats
    return None
Ejemplo n.º 7
0
def train(epoch):
    model.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(data)
        loss = loss_function(recon_batch, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.item() / len(data)))

    print('====> Epoch: {} Average loss: {:.4f}'.format(
          epoch, train_loss / len(train_loader.dataset)))
Ejemplo n.º 8
0
                               num_workers=4)

seed_everything(42)

n_epochs = 30

for epoch in tqdm(range(1, n_epochs + 1)):
    # monitor training loss
    train_loss = 0.0
    ###################
    # train the model #
    ###################
    for data in train_loader:
        # _ stands in for labels, here
        # no need to flatten images
        data = data.to(device)
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        outputs = model(data)
        # calculate the loss
        loss = criterion(outputs, data)
        #print(loss.item())
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss += loss.item() * data.size(0)

    # print avg training statistics
Ejemplo n.º 9
0
def one_train(loader, model, criterion, optimizer, epoch):
    print("LOG : training phase , epoch = ", epoch)
    # 各値初期化
    cos_losses = AverageMeter()
    if opts.semantic_reg:
        img_losses = AverageMeter()
        rec_losses = AverageMeter()
    data_num = len(loader.dataset)  # テストデータの総数
    pbar = tqdm(total=int(data_num / opts.batch_size))  # プログレスバー設定
    # 学習開始
    model.train()  # モデルを学習モードに設定
    for batch, (inputs, targets) in enumerate(loader):
        # データをdeviceに載せる (image, inst, len(inst), ingr, len(ingr)), [target, img_class, rec_class]
        input_var = [data.to(DEVICE, non_blocking=True) for data in inputs]
        target_var = [data.to(DEVICE, non_blocking=True) for data in targets]
        outputs = model(input_var[0], input_var[1], input_var[2], input_var[3],
                        input_var[4])  # モデルから出力を得る

        # Lossの計算 カテゴリ分類のあるなしで場合分け
        if SEMANTIC_REG:
            cos_loss = criterion[0](outputs[0], outputs[1],
                                    target_var[0].float())
            img_loss = criterion[1](outputs[2], target_var[1])
            rec_loss = criterion[1](outputs[3], target_var[2])
            # combined loss
            loss =  opts.cos_weight * cos_loss +\
                    opts.cls_weight * img_loss +\
                    opts.cls_weight * rec_loss

            # measure performance and record losses
            cos_losses.update(cos_loss.data, inputs[0].size(0))
            img_losses.update(img_loss.data, inputs[0].size(0))
            rec_losses.update(rec_loss.data, inputs[0].size(0))
        else:
            loss = criterion(outputs[0], outputs[1], target_var[0])
            # measure performance and record loss
            cos_losses.update(loss.data[0], inputs[0].size(0))

        optimizer.zero_grad()  # 勾配の初期化
        loss.backward()  # 勾配の計算
        optimizer.step()  # パラメータの更新
        pbar.update(1)
    pbar.close()
    if opts.semantic_reg:
        print('Epoch: {0}\t'
              'cos loss:{cos_loss.val:.4f} ({cos_loss.avg:.4f}) '
              'img Loss:{img_loss.val:.4f} ({img_loss.avg:.4f}) '
              'rec loss:{rec_loss.val:.4f} ({rec_loss.avg:.4f}) '
              'vision_lr:({visionLR})-recipe_lr:({recipeLR}) '.format(
                  epoch,
                  cos_loss=cos_losses,
                  img_loss=img_losses,
                  rec_loss=rec_losses,
                  visionLR=optimizer.param_groups[1]['lr'],
                  recipeLR=optimizer.param_groups[0]['lr']))
    else:
        print('Epoch: {0}\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'vision ({visionLR}) - recipe ({recipeLR})\t'.format(
                  epoch,
                  loss=cos_losses,
                  visionLR=optimizer.param_groups[1]['lr'],
                  recipeLR=optimizer.param_groups[0]['lr']))
    return cos_losses.val, img_losses.val, rec_losses.val
Ejemplo n.º 10
0
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    result = {}
    cur = 0
    total = len(test_loader.dataset)
    # total = 900
    with torch.no_grad():
        for data, target, img_path in test_loader:
            if cur >= total:
                break
            data, target = data.to(device), target.to(device,
                                                      dtype=torch.int64)
            output = model(data)
            loss1 = F.nll_loss(output[0], target)
            loss2 = F.nll_loss(output[1], target)
            loss3 = F.nll_loss(output[2], target)
            loss = loss1 + loss2 + 0.1 * loss3
            test_loss += loss  # sum up batch loss
            output_merge = output[0] * 0.5 + output[1] * 0.4 + output[2] * 0.1
            pred = output_merge.argmax(
                dim=1,
                keepdim=True)  # get the index of the max log-probability
            # correct += pred.eq(target.view_as(pred)).sum().item()
            pred = pred.view(pred.shape[0])  # pred.shape = [batch_size]
            for i in range(len(pred)):
                if img_path[i] not in result:
                    result[img_path[i]] = [0, 0]
                result[img_path[i]][0] += target[i].item()  # [label, res]
                result[img_path[i]][1] += pred[i].item()  # [label, res]
            print(len(result))
            del loss1, loss2, loss3, loss
            cur += 30

    # 1. 计算correct
    # def isTrue(a):
    #     target = a[0]
    #     res = a[1]
    #     if (target + res == 0) or (target > 0 and res > 0):  # target 只有0/3 res 有0/1/2/3
    #         return 1
    #     return 0
    # print(len(result))
    # new_result = list(map(isTrue, result.values()))
    # correct = sum(new_result)

    # 2. 函数计算准确率 召回率
    def Accuuacy(res):
        TP = 0  # 检测为篡改 而且是真的
        TN = 0  # 检测为原始 真的
        FP = 0
        FN = 0
        for key, value in res.items():
            if value[1] == 0:
                if value[0] == 0:
                    TN += 1
                else:
                    FN += 1
            else:
                if value[0] == 0:
                    FP += 1
                else:
                    TP += 1
        return TP, TN, FP, FN

    TP, TN, FP, FN = Accuuacy(result)
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)

    print(TP, TN, FP, FN)
    print(accuracy, precision, recall)

    test_loss /= total

    # print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    #     test_loss, correct, len(test_loader.dataset),
    #     100. * correct / len(test_loader.dataset)))
    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, TP + TN, total / 3, 300. * (TP + TN) / total))
    print('--- Accuracy: {} --  Precision: {}  --- Reacll: {}'.format(
        accuracy, precision, recall))
    with open("accuracy.txt", "a+") as file:
        file.write(
            '--- Accuracy: {} --  Precision: {}  --- Reacll: {}\n'.format(
                accuracy, precision, recall))
Ejemplo n.º 11
0
    def train_epoch(self,train_loader,task_id):
        self.adjust_learning_rate(self.epoch)

        self.model.train()

        if task_id > 0 and self.args.experiment.xai_memory:
            for idx, (data, target, sal, tt, _) in enumerate(self.saliency_loaders):

                x = data.to(device=self.device, dtype=torch.float)
                s = sal.to(device=self.device, dtype=torch.float)

                explanations, self.model , _, _ = self.explainer(x, self.model, task_id)

                self.saliency_size = explanations.size()

                # To make predicted explanations (Bx7x7) same as ground truth ones (Bx1x7x7)
                sal_loss = self.sal_loss(explanations.view_as(s), s)
                sal_loss *= self.args.saliency.regularizer

                if self.args.wandb.log:
                    wandb.log({"Saliency loss": sal_loss.item()})

                try:
                    sal_loss.requires_grad = True
                except:
                    continue

                self.optimizer_explanations.zero_grad()
                sal_loss.backward(retain_graph=True)
                self.optimizer_explanations.step()

        # Loop batches
        for batch_idx, (x, y, tt) in enumerate(train_loader):

            images = x.to(device=self.device, dtype=torch.float)
            targets = y.to(device=self.device, dtype=torch.long)
            tt = tt.to(device=self.device, dtype=torch.long)
            # Forward
            if self.args.architecture.multi_head:
                output=self.model.forward(images, tt)
            else:
                output = self.model.forward(images)

            loss=self.criterion(output,targets)

            # L1 regularize
            if self.args.train.l1_reg:
                reg_loss = self.l1_regularizer()
                factor = self.args.train.l1_reg_factor
                loss += factor * reg_loss

            loss *= self.args.train.task_loss_reg


            # Backward
            self.optimizer.zero_grad()
            loss.backward(retain_graph=True)

            # Apply step
            # torch.nn.utils.clip_grad_norm_(self.model.parameters(),self.clipgrad)
            self.optimizer.step()
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)
Ejemplo n.º 13
0
def cluster_for_instance(dataloader, args):
    use_cuda = not args.ngpu and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Extracts scene features from the entire image
    arch = 'resnet18'
    model_file = '%s_places365.pth.tar' % arch
    model = models.__dict__[arch](num_classes=365).to(device)
    checkpoint = torch.load(model_file,
                            map_location=lambda storage, loc: storage)
    state_dict = {
        str.replace(k, 'module.', ''): v
        for k, v in checkpoint['state_dict'].items()
    }
    model.load_state_dict(state_dict)
    model.eval()

    scene_classifier = model.fc
    new_classifier = nn.Sequential()
    model.fc = new_classifier

    categories = dataloader.dataset.categories
    scene_features = [[[], []] for i in range(len(categories))]
    instance_features = [[[], []] for i in range(len(categories))]
    scene_filepaths = [[[], []] for i in range(len(categories))]

    # Extracts features of just the cropped object
    model_file = 'cifar_resnet110.th'
    small_model = resnet110()
    checkpoint = torch.load(model_file,
                            map_location=lambda storage, loc: storage)
    state_dict = {
        str.replace(k, 'module.', ''): v
        for k, v in checkpoint['state_dict'].items()
    }
    small_model.load_state_dict(state_dict)
    small_model.to(device)
    small_model.eval()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    for i, (data, target) in enumerate(tqdm(dataloader)):
        gender = target[1]
        anns = target[0]
        if len(gender) > 1:
            data.to(device)
            data = normalize(data)
            big_data = F.interpolate(data.unsqueeze(0),
                                     size=224,
                                     mode='bilinear').to(device)
            this_features = model.forward(big_data)
            logit = scene_classifier.forward(this_features)
            h_x = F.softmax(logit, 1).data.squeeze()
            probs, idx = h_x.sort(0, True)
            pred = idx[0]

            size = list(data.size())[1:]
            scene_added = []

            for ann in anns:
                index = categories.index(ann['label'])
                bbox = np.array([
                    ann['bbox'][0] * size[1], ann['bbox'][1] * size[1],
                    ann['bbox'][2] * size[0], ann['bbox'][3] * size[0]
                ]).astype(int)
                instance = data[:, bbox[2]:bbox[3], bbox[0]:bbox[1]]
                if 0 in list(instance.size()):
                    continue
                small_data = F.interpolate(instance.unsqueeze(0),
                                           size=32,
                                           mode='bilinear').to(device)
                this_small_features = small_model.features(small_data)
                if len(scene_features[index][
                        gender[0] - 1]) < 500 and index not in scene_added:
                    scene_added.append(index)
                    scene_features[index][gender[0] - 1].extend(
                        this_features.data.cpu().numpy())
                    scene_filepaths[index][gender[0] - 1].append(
                        (target[3], pred))
                if len(instance_features[index][gender[0] - 1]) < 500:
                    instance_features[index][gender[0] - 1].extend(
                        this_small_features.data.cpu().numpy())
    stats = {}
    stats['instance'] = instance_features
    stats['scene'] = scene_features
    stats['scene_filepaths'] = scene_filepaths
    pickle.dump(stats, open("results/{}/4.pkl".format(args.folder), "wb"))
Ejemplo n.º 14
0
def train(args):
    is_distributed = len(args.hosts) > 1 and args.backend is not None
    logger.debug("Distributed training - {}".format(is_distributed))
    use_cuda = (args.processor == 'gpu') or (args.num_gpus > 0)
    logger.debug("Number of gpus available - {}".format(args.num_gpus))
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    device = torch.device("cuda" if use_cuda else "cpu")

    if is_distributed:
        # Initialize the distributed environment.
        world_size = len(args.hosts)
        os.environ['WORLD_SIZE'] = str(world_size)
        host_rank = args.hosts.index(args.current_host)
        os.environ['RANK'] = str(host_rank)
        dist.init_process_group(backend=args.backend,
                                rank=host_rank,
                                world_size=world_size)
        logger.info(
            'Initialized the distributed environment: \'{}\' backend on {} nodes. '
            .format(args.backend, dist.get_world_size()) +
            'Current host rank is {}. Number of gpus: {}'.format(
                dist.get_rank(), args.num_gpus))

    # set the seed for generating random numbers
    torch.manual_seed(args.seed)
    if use_cuda:
        torch.cuda.manual_seed(args.seed)

    train_loader = _get_train_data_loader(args.batch_size, args.data_dir,
                                          is_distributed, **kwargs)
    test_loader = _get_test_data_loader(args.test_batch_size, args.data_dir,
                                        **kwargs)

    # TODO: assert the logs when we move to the SDK local mode
    logger.debug("Processes {}/{} ({:.0f}%) of train data".format(
        len(train_loader.sampler), len(train_loader.dataset),
        100. * len(train_loader.sampler) / len(train_loader.dataset)))

    logger.debug("Processes {}/{} ({:.0f}%) of test data".format(
        len(test_loader.sampler), len(test_loader.dataset),
        100. * len(test_loader.sampler) / len(test_loader.dataset)))

    model = Net()
    if is_distributed and use_cuda:
        # multi-machine multi-gpu case
        logger.debug("Multi-machine multi-gpu: using DistributedDataParallel.")
        # establish host rank and set device on this node
        torch.cuda.set_device(host_rank)
        model.cuda(host_rank)
        # for multiprocessing distributed, the DDP constructor should always set
        # the single device scope. otherwise, DDP will use all available devices.
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[host_rank], output_device=host_rank)
    elif use_cuda:
        # single-machine multi-gpu case
        logger.debug("Single-machine multi-gpu: using DataParallel().cuda().")
        model = model.to(device)
        model = torch.nn.DataParallel(model).to(device)
    else:
        # single-machine or multi-machine cpu case
        logger.debug("Single-machine/multi-machine cpu: using DataParallel.")
        model = model.to(device)
        model = torch.nn.DataParallel(model)

    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader, 1):
            if is_distributed and use_cuda:
                # multi-machine multi-gpu case - allow asynchrous GPU copies of the data
                data, target = data.cuda(non_blocking=True), target.cuda(
                    non_blocking=True)
            else:
                data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            if is_distributed and not use_cuda:
                # average gradients manually for multi-machine cpu case only
                _average_gradients(model)
            optimizer.step()
            if batch_idx % args.log_interval == 0:
                logger.debug(
                    'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        epoch, batch_idx * len(data),
                        len(train_loader.sampler),
                        100. * batch_idx / len(train_loader), loss.item()))
        test(model, test_loader, device)
    save_model(model, args.model_dir)

    if is_distributed and host_rank == 0 or not is_distributed:
        assert_can_track_sagemaker_experiments()
Ejemplo n.º 15
0
    def eval_epoch(self, epoch):
        #model_pth = '%s/model_epoch_%04d.pth' % (osp.join(self.save_path, 'models'), epoch)
        #self.model.load_state_dict(torch.load(model_pth))        # do these two impactful? I don't know
        self.model.eval()
        if not os.path.exists(self.csv_path):
            os.mkdir(self.csv_path)
        eval_csv = os.path.join(self.csv_path, 'eval.csv')
        pred_list, target_list, loss_list, pos_list = [], [], [], []
        for batch_idx, item in enumerate(self.val_loader):
            if self.cfig['model_name'] in ['disrnn', 'trnn']:
                data, target, dist = item
                data, target, dist = data.to(self.device), target.to(
                    self.device), dist.to(self.device)
                if batch_idx == 0: print(dist.shape)
            else:
                data, target, ID = item
                data, target = data.to(self.device), target.to(self.device)

            if self.cfig['model_name'][-3:] == 'rnn':
                data = data.permute([1, 0, 2, 3, 4])
            #data = pack_padded_sequence(data, [3] * self.cfig['batch_size'])   # if use cell, we don't need this
            self.optim.zero_grad()
            if self.cfig['model_name'] in ['disrnn', 'trnn']:
                pred = self.model(data, dist)
            else:
                pred = self.model(data)
            pred_prob = F.softmax(pred)
            #loss = self.criterion(pred, target)
            loss = nn.CrossEntropyLoss()(pred, target)

            pred_cls = pred.data.max(1)[1]  # not test yet
            pos_list += pred_prob[:, 1].data.cpu().numpy().tolist()
            pred_list += pred_cls.data.cpu().numpy().tolist()
            target_list += target.data.cpu().numpy().tolist()
            loss_list.append(loss.data.cpu().numpy().tolist())

        accuracy = accuracy_score(target_list, pred_list)
        print(confusion_matrix(target_list, pred_list))
        fpr, tpr, threshold = metrics.roc_curve(target_list, pos_list)
        roc_auc = metrics.auc(fpr, tpr)
        #-------------------------save to csv -----------------------#
        if not os.path.exists(eval_csv):
            csv_info = ['epoch', 'loss', 'auc', 'accuracy']
            init_csv = pd.DataFrame()
            for key in csv_info:
                init_csv[key] = []
            init_csv.to_csv(eval_csv)
        df = pd.read_csv(eval_csv)
        data = pd.DataFrame()
        tmp_epoch = df['epoch'].tolist()
        tmp_epoch.append(epoch)

        #print ('------------------', tmp_epoch)
        tmp_loss = df['loss'].tolist()
        tmp_loss.append(np.mean(loss_list))
        tmp_auc = df['auc'].tolist()
        tmp_auc.append(roc_auc)
        tmp_acc = df['accuracy'].tolist()
        tmp_acc.append(accuracy)

        data['epoch'], data['loss'], data['auc'], data[
            'accuracy'] = tmp_epoch, tmp_loss, tmp_auc, tmp_acc
        data.to_csv(eval_csv)
        print('val accuracy: ', accuracy, 'val auc: ', roc_auc)
        print('max val auc at: ', max(tmp_auc), tmp_auc.index(max(tmp_auc)))
Ejemplo n.º 16
0
real_label = 1
fake_label = 0

# setup optimizer
optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))

for epoch in range(opt.niter):
    for i, data_map in enumerate(dataloader, 0):
        data = data_map['image']
        encodings = data_map['encoding']

        # (1) Update D network: maximize log(D(x, h)) + (log(1 - D(G(z, h)) + log(1 - D(z, h')))/2
        # train with real image right caption
        netD.zero_grad()
        real_cpu = data.to(device)
        batch_size = real_cpu.size(0)
        label = torch.full((batch_size, ), real_label, device=device)

        output = netD(real_cpu, encodings)
        errD_real = criterion(output, label)
        errD_real.backward()
        D_x = output.mean().item()

        # real image wrong caption
        noise = torch.randn(batch_size, nz, 1, 1, device=device)
        encoded_noise = torch.randn(batch_size, 4800, device=device)
        label.fill_(fake_label)

        output = netD(real_cpu, encoded_noise)
        errD_real_h = criterion(output, label)
def predict_fn(input_data, model):
    print('Inferring sentiment of input data.')

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if model.word_dict is None:
        raise Exception('Model has not been loaded properly, no word_dict.')

    # TODO: Process input_data so that it is ready to be sent to our model.
    #       You should produce two variables:
    #         data_X   - A sequence of length 500 which represents the converted review
    #         data_len - The length of the review

    data_X = None
    data_len = None
    input_data_words = review_to_words(input_data)
    data_X, data_len = convert_and_pad(model.word_dict, input_data_words)
    # Using data_X and data_len we construct an appropriate input tensor. Remember
    # that our model expects input data of the form 'len, review[500]'.
    data_pack = np.hstack((data_len, data_X))
    data_pack = data_pack.reshape(1, -1)

    data = torch.from_numpy(data_pack)
    data = data.to(device)

    # Make sure to put the model into evaluation mode
    model.eval()

    # TODO: Compute the result of applying the model to the input data. The variable `result` should
    #       be a numpy array which contains a single integer which is either 1 or 0

    with torch.no_grad():
        output = model.forward(data)

    # Move the result to cpu (this is a must if GPU was used)

    output = output.cpu()

    result = int(np.round(output.numpy()))
    # result = predictor.predict(data.values)
    print(result)

    return result


# def review_to_words(review):
#     nltk.download("stopwords", quiet=True)
#     stemmer = PorterStemmer()

#     text = BeautifulSoup(review, "html.parser").get_text() # Remove HTML tags
#     text = re.sub(r"[^a-zA-Z0-9]", " ", text.lower()) # Convert to lower case
#     words = text.split() # Split string into words
#     words = [w for w in words if w not in stopwords.words("english")] # Remove stopwords
#     words = [PorterStemmer().stem(w) for w in words] # stem

#     return words

# def convert_and_pad(word_dict, sentence, pad=500):
#     NOWORD = 0 # We will use 0 to represent the 'no word' category
#     INFREQ = 1 # and we use 1 to represent the infrequent words, i.e., words not appearing in word_dict

#     working_sentence = [NOWORD] * pad

#     for word_index, word in enumerate(sentence[:pad]):
#         if word in word_dict:
#             working_sentence[word_index] = word_dict[word]
#         else:
#             working_sentence[word_index] = INFREQ

#     return working_sentence, min(len(sentence), pad)

# def convert_and_pad_data(word_dict, data, pad=500):
#     result = []
#     lengths = []

#     for sentence in data:
#         converted, leng = convert_and_pad(word_dict, sentence, pad)
#         result.append(converted)
#         lengths.append(leng)

#     return np.array(result), np.array(lengths)
Ejemplo n.º 18
0
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_dir,
          save_file):
    """
    This is the training method that is called by the PyTorch training script. The parameters
    passed are as follows:
    model        - The PyTorch model that we wish to train.
    train_loader - The PyTorch DataLoader that should be used during training.
    epochs       - The total number of epochs to train for.
    criterion    - The loss function used for training. 
    optimizer    - The optimizer to use during training.
    device       - Where the model and data should be loaded (gpu or cpu).
    """

    loss_list = []
    acc_list = []

    # training loop is provided
    valid_loss_min = np.Inf
    total_step = len(loaders['Training'])

    for epoch in range(1, n_epochs + 1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        print('Started epoch')
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['Training']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            data, target = data.to(device), target.to(device)
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like

            optimizer.zero_grad()
            # Get output
            output = model(data)
            # Calculate loss
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            train_loss = train_loss + (1 / (batch_idx + 1)) * (loss.data -
                                                               train_loss)

            # Track the accuracy
            total = target.size(0)
            _, predicted = torch.max(output.data, 1)
            correct = (predicted == target).sum().item()
            acc_list.append(correct / total)

            if (batch_idx + 1) % 100 == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                    .format(epoch + 1, n_epochs, batch_idx + 1, total_step,
                            loss.item(), (correct / total) * 100))

        ######################
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['Validation']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            output = model(data)
            loss = criterion(output, target)
            valid_loss = valid_loss + (1 / (batch_idx + 1)) * (loss.data -
                                                               valid_loss)

        # print training/validation statistics
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.
              format(epoch, train_loss, valid_loss))

        ## TODO: save the model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print(
                'Saving model: {} \tNew Valid Loss: {:.6f} \tPrevious Valid Loss: {:.6f}'
                .format(epoch, valid_loss, valid_loss_min))
            torch.save(model.state_dict(), os.path.join(save_dir, save_file))

            valid_loss_min = valid_loss
Ejemplo n.º 19
0
def train(epoch):
    iters = 0
    # For each batch in the dataloader
    stats = adl.Accumulator()
    for i, data in enumerate(dataloader, 0):
        data = data[0]
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        ## Train with all-real batch
        netD.zero_grad()
        # Format batch
        real_cpu = data.to(device)
        b_size = real_cpu.size(0)
        label = torch.full((b_size, ), real_label, device=device)
        # Forward pass real batch through D
        output = netD(real_cpu).view(-1)
        # Calculate loss on all-real batch
        errD_real = criterion(output, label)
        # Calculate gradients for D in backward pass
        errD_real.backward()
        D_x = output.mean().item()

        ## Train with all-fake batch
        # Generate batch of latent vectors
        noise = torch.randn(b_size, nz, 1, 1, device=device)
        # Generate fake image batch with G
        fake = netG(noise)
        label.fill_(fake_label)
        # Classify all fake batch with D
        output = netD(fake.detach()).view(-1)
        # Calculate D's loss on the all-fake batch
        errD_fake = criterion(output, label)
        # Calculate the gradients for this batch
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        # Add the gradients from the all-real and all-fake batches
        errD = errD_real + errD_fake
        # Update D
        optimizerD.step()

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        netG.zero_grad()
        label.fill_(real_label)  # fake labels are real for generator cost
        # Since we just updated D, perform another forward pass of all-fake batch through D
        output = netD(fake).view(-1)
        # Calculate G's loss based on this output
        errG = criterion(output, label)
        # Calculate gradients for G
        errG.backward()
        D_G_z2 = output.mean().item()
        # Update G
        optimizerG.step()

        # Save Losses for plotting later
        G_losses.append(errG.item())
        D_losses.append(errD.item())

        stats["g_loss_sum"] += errG.item()
        stats["d_loss_sum"] += errD.item()
    stats["norm"] += metrics._metrics_state().grad_params[0]
    stats["var"] += metrics._metrics_state().grad_params[1]
    stats["replicas"] += 1.0
    scheduleD.step()
    scheduleG.step()

    with stats.synchronized():
        with SummaryWriter(adaptdl.get_tensorboard_dir()) as writer:
            writer.add_scalar("Loss/G",
                              stats["g_loss_sum"] / stats["replicas"], epoch)
            writer.add_scalar("Loss/D",
                              stats["d_loss_sum"] / stats["replicas"], epoch)
            writer.add_scalar("Performance/GlobalBatchsize",
                              b_size * stats["replicas"], epoch)
            writer.add_scalar("Performance/Replicas", stats["replicas"], epoch)
            writer.add_scalar("Stats/Variance",
                              stats["norm"] / stats["replicas"], epoch)
            writer.add_scalar("Stats/Norm", stats["var"] / stats["replicas"],
                              epoch)
Ejemplo n.º 20
0
def train(epoch, model, train_loader, device, optimizer, args, writer):
    model.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        storch.reset()

        # Denote the minibatch dimension as being independent
        data = storch.denote_independent(data.view(-1, 784), 0, "data")
        recon_batch, KLD, z = model(data)
        storch.add_cost(loss_function(recon_batch, data), "reconstruction")
        cost = backward()
        train_loss += cost.item()

        optimizer.step()

        cond_log = batch_idx % args.log_interval == 0

        if cond_log:
            step = 100.0 * batch_idx / len(train_loader)
            global_step = 100 * (epoch - 1) + step

            # Variance of expect method is 0 by definition.
            variances = {}
            if args.method != "expect" and args.variance_samples > 1:
                _consider_param = "probs"
                if args.latents < 3:
                    old_method = model.sampling_method
                    model.sampling_method = Expect("z")
                    optimizer.zero_grad()
                    recon_batch, _, z = model(data)
                    storch.add_cost(loss_function(recon_batch, data),
                                    "reconstruction")
                    backward()
                    expect_grad = storch.reduce_plates(
                        z.grad[_consider_param]).detach_tensor()

                    optimizer.zero_grad()
                    model.sampling_method = old_method
                grads = {n: [] for n in z.grad}

                for i in range(args.variance_samples):
                    optimizer.zero_grad()
                    recon_batch, _, z = model(data)
                    storch.add_cost(loss_function(recon_batch, data),
                                    "reconstruction")
                    backward()

                    for param_name, grad in z.grad.items():
                        # Make sure to reduce the data dimension and detach, for memory reasons.
                        grads[param_name].append(
                            storch.reduce_plates(grad).detach_tensor())

                variances = {}
                for param_name, gradz in grads.items():
                    # Create a new independent dimension for the different gradient samples
                    grad_samples = storch.gather_samples(gradz, "variance")
                    # Compute the variance over this independent dimension
                    variances[param_name] = storch.variance(
                        grad_samples, "variance")._tensor
                    if param_name == _consider_param and args.latents < 3:
                        mean = storch.reduce_plates(grad_samples, "variance")
                        mse = storch.reduce_plates(
                            (grad_samples - expect_grad)**2).sum()
                        bias = (storch.reduce_plates(
                            (mean - expect_grad)**2)).sum()
                        print("mse", mse._tensor.item())
                        # Should approach 0 when increasing variance_samples for unbiased estimators.
                        print("bias", bias._tensor.item())
                        writer.add_scalar("train/probs_bias", bias._tensor,
                                          global_step)
                        writer.add_scalar("train/probs_mse", mse._tensor,
                                          global_step)

            print(
                "Train Epoch: {} [{}/{} ({:.0f}%)]\tCost: {:.6f}\t Logits var {}"
                .format(
                    epoch,
                    batch_idx * len(data),
                    len(train_loader.dataset),
                    step,
                    cost.item(),
                    variances,
                ))
            writer.add_scalar("train/ELBO", cost, global_step)
            for param_name, var in variances.items():
                writer.add_scalar("train/variance/" + param_name, var,
                                  global_step)
    avg_train_loss = train_loss / (batch_idx + 1)
    print("====> Epoch: {} Average loss: {:.4f}".format(epoch, avg_train_loss))
    return avg_train_loss
Ejemplo n.º 21
0
def train(args):
    is_distributed = len(args.hosts) > 1 and args.backend is not None
    logger.debug("Distributed training - {}".format(is_distributed))
    use_cuda = args.num_gpus > 0
    logger.debug("Number of gpus available - {}".format(args.num_gpus))
    kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {}
    device = torch.device("cuda" if use_cuda else "cpu")

    if is_distributed:
        # Initialize the distributed environment.
        world_size = len(args.hosts)
        os.environ["WORLD_SIZE"] = str(world_size)
        host_rank = args.hosts.index(args.current_host)
        os.environ["RANK"] = str(host_rank)
        dist.init_process_group(backend=args.backend,
                                rank=host_rank,
                                world_size=world_size)
        logger.info(
            "Initialized the distributed environment: '{}' backend on {} nodes. "
            .format(args.backend, dist.get_world_size()) +
            "Current host rank is {}. Number of gpus: {}".format(
                dist.get_rank(), args.num_gpus))

    # set the seed for generating random numbers
    torch.manual_seed(args.seed)
    if use_cuda:
        torch.cuda.manual_seed(args.seed)

    train_loader = _get_train_data_loader(args.batch_size, args.data_dir,
                                          is_distributed, **kwargs)
    test_loader = _get_test_data_loader(args.test_batch_size, args.data_dir,
                                        **kwargs)

    logger.debug("Processes {}/{} ({:.0f}%) of train data".format(
        len(train_loader.sampler),
        len(train_loader.dataset),
        100.0 * len(train_loader.sampler) / len(train_loader.dataset),
    ))

    logger.debug("Processes {}/{} ({:.0f}%) of test data".format(
        len(test_loader.sampler),
        len(test_loader.dataset),
        100.0 * len(test_loader.sampler) / len(test_loader.dataset),
    ))

    model = Net().to(device)
    if is_distributed and use_cuda:
        # multi-machine multi-gpu case
        model = torch.nn.parallel.DistributedDataParallel(model)
    else:
        # single-machine multi-gpu case or single-machine or multi-machine cpu case
        model = torch.nn.DataParallel(model)

    wandb.watch(model)
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader, 1):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            if is_distributed and not use_cuda:
                # average gradients manually for multi-machine cpu case only
                _average_gradients(model)
            optimizer.step()
            wandb.log({"training/loss": loss.item()})
            if batch_idx % args.log_interval == 0:
                logger.info(
                    "Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}".format(
                        epoch,
                        batch_idx * len(data),
                        len(train_loader.sampler),
                        100.0 * batch_idx / len(train_loader),
                        loss.item(),
                    ))
        test(model, test_loader, device)
    save_model(model, args.model_dir)
Ejemplo n.º 22
0
def main(args):
    def get_model_type(model_name):
        model_type = {
            'models/modelA': 0,
            'models/modelA_adv': 0,
            'models/modelA_ens': 0,
            'models/modelB': 1,
            'models/modelB_adv': 1,
            'models/modelB_ens': 1,
            'models/modelC': 2,
            'models/modelC_adv': 2,
            'models/modelC_ens': 2,
            'models/modelD': 3,
            'models/modelD_adv': 3,
            'models/modelD_ens': 3,
        }
        if model_name not in model_type.keys():
            raise ValueError('Unknown model: {}'.format(model_name))
        return model_type[model_name]

    torch.manual_seed(args.seed)
    device = torch.device('cuda' if args.cuda else 'cpu')
    '''
    Preprocess MNIST dataset
    '''
    kwargs = {'num_workers': 20, 'pin_memory': True} if args.cuda else {}
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../attack_mnist',
        train=True,
        download=True,
        transform=transforms.ToTensor()),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../attack_mnist', train=False, transform=transforms.ToTensor()),
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              **kwargs)

    eps = args.eps

    # if src_models is not None, we train on adversarial examples that come
    # from multiple models
    adv_model_names = args.adv_models
    adv_models = [None] * len(adv_model_names)
    for i in range(len(adv_model_names)):
        type = get_model_type(adv_model_names[i])
        adv_models[i] = load_model(adv_model_names[i], type=type).to(device)

    model = model_mnist(type=args.type).to(device)
    optimizer = optim.Adam(model.parameters())

    # Train on MNIST model
    x_advs = [None] * (len(adv_models) + 1)
    for epoch in range(args.epochs):
        for batch_idx, (data, labels) in enumerate(train_loader):
            data, labels = data.to(device), labels.to(device)
            for i, m in enumerate(adv_models + [model]):
                grad = gen_grad(data, m, labels, loss='training')
                x_advs[i] = symbolic_fgs(data, grad, eps=eps)
            train(epoch,
                  batch_idx,
                  model,
                  data,
                  labels,
                  optimizer,
                  x_advs=x_advs)

    # Finally print the result
    correct = 0
    with torch.no_grad():
        for (data, labels) in test_loader:
            data, labels = data.to(device), labels.to(device)
            correct += test(model, data, labels)
    test_error = 100. - 100. * correct / len(test_loader.dataset)
    print('Test Set Error Rate: {:.2f}%'.format(test_error))

    torch.save(model.state_dict(), args.model + '.pkl')
Ejemplo n.º 23
0
def train(args):
    world_size = len(args.hosts)
    is_distributed = world_size > 1
    logger.debug('Number of hosts {}. Distributed training - {}'.format(world_size, is_distributed))
    use_cuda = args.num_gpus > 0
    logger.debug('Number of gpus available - {}'.format(args.num_gpus))
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    device = torch.device('cuda' if use_cuda else 'cpu')

    if is_distributed:
        # Initialize the distributed environment.
        backend = 'gloo'
        os.environ['WORLD_SIZE'] = str(world_size)
        host_rank = args.hosts.index(args.current_host)
        dist.init_process_group(backend=backend, rank=host_rank, world_size=world_size)
        logger.info('Initialized the distributed environment: \'{}\' backend on {} nodes. '.format(
            backend, dist.get_world_size()) + 'Current host rank is {}. Is cuda available: {}. Number of gpus: {}'.format(
            dist.get_rank(), torch.cuda.is_available(), args.num_gpus))

    # set the seed for generating random numbers
    seed = 1
    torch.manual_seed(seed)
    if use_cuda:
        torch.cuda.manual_seed(seed)

    train_sampler, train_loader = _get_train_data_loader(args.data_dir, is_distributed, args.batch_size, **kwargs)
    test_loader = _get_test_data_loader(args.data_dir, **kwargs)

    logger.debug('Processes {}/{} ({:.0f}%) of train data'.format(
        len(train_loader.sampler), len(train_loader.dataset),
        100. * len(train_loader.sampler) / len(train_loader.dataset)
    ))

    logger.debug('Processes {}/{} ({:.0f}%) of test data'.format(
        len(test_loader.sampler), len(test_loader.dataset),
        100. * len(test_loader.sampler) / len(test_loader.dataset)
    ))

    model = Net().to(device)
    if is_distributed and use_cuda:
        # multi-machine multi-gpu case
        logger.debug('Multi-machine multi-gpu: using DistributedDataParallel.')
        model = torch.nn.parallel.DistributedDataParallel(model)
    elif use_cuda:
        # single-machine multi-gpu case
        logger.debug('Single-machine multi-gpu: using DataParallel().cuda().')
        model = torch.nn.DataParallel(model)
    else:
        # single-machine or multi-machine cpu case
        logger.debug('Single-machine/multi-machine cpu: using DataParallel.')
        model = torch.nn.DataParallel(model)

    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.5)

    log_interval = 100
    for epoch in range(1, args.epochs + 1):
        if is_distributed:
            train_sampler.set_epoch(epoch)
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader, 1):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            if is_distributed and not use_cuda:
                # average gradients manually for multi-machine cpu case only
                _average_gradients(model)
            optimizer.step()
            if batch_idx % log_interval == 0:
                logger.debug('Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.sampler),
                    100. * batch_idx / len(train_loader), loss.item()))
        accuracy = test(model, test_loader, device)
    save_model(model, args.model_dir)

    logger.debug('Overall test accuracy: {}'.format(accuracy))
Ejemplo n.º 24
0
    def train_epoch(self, epoch):
        self.model.train()
        if not os.path.exists(self.csv_path):
            os.mkdir(self.csv_path)
        train_csv = os.path.join(self.csv_path, 'train.csv')
        pred_list, target_list, loss_list, pos_list = [], [], [], []
        print('epoch: ', epoch)
        #print (self.model.dislstmcell.a)

        for batch_idx, item in enumerate(self.train_loader):
            if self.cfig['model_name'] in ['disrnn', 'trnn']:
                data, target, dist = item
                data, target, dist = data.to(self.device), target.to(
                    self.device), dist.to(self.device)
            else:
                data, target, ID = item
                data, target = data.to(self.device), target.to(self.device)

            if self.cfig['model_name'][-3:] == 'rnn':
                data = data.permute([1, 0, 2, 3, 4])
            #print ('data shape', data.shape, self.cfig['batch_size'])
            #data = pack_padded_sequence(data, [3] * self.cfig['batch_size'])   # if use cell, we don't need it.
            self.optim.zero_grad()
            #print ('=================',data.shape)
            if self.cfig['model_name'] in ['disrnn', 'trnn']:
                pred = self.model(data, dist)
            else:
                pred = self.model(data)  # here should be careful
            pred_prob = F.softmax(pred)
            #loss = self.criterion(pred, target)
            #print (pred.shape, target.shape)
            if batch_idx == 0:
                print('data.shape', data.shape)
                print('pred.shape', pred.shape)
                print('Epoch: ', epoch)
            loss = nn.CrossEntropyLoss()(pred, target)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 4)
            self.optim.step()
            print_str = 'train epoch=%d, batch_idx=%d/%d, loss=%.4f\n' % (
                epoch, batch_idx, len(self.train_loader), loss.data[0])
            #print(print_str)
            pred_cls = pred.data.max(1)[1]
            pos_list += pred_prob[:, 1].data.cpu().numpy().tolist()
            pred_list += pred_cls.data.cpu().numpy().tolist()
            target_list += target.data.cpu().numpy().tolist()
            loss_list.append(loss.data.cpu().numpy().tolist())

        print(confusion_matrix(target_list, pred_list))
        accuracy = accuracy_score(target_list, pred_list)
        fpr, tpr, threshold = metrics.roc_curve(target_list, pos_list)
        roc_auc = metrics.auc(fpr, tpr)
        #-------------------------save to csv -----------------------#
        if not os.path.exists(train_csv):
            csv_info = ['epoch', 'loss', 'auc', 'accuracy']
            init_csv = pd.DataFrame()
            for key in csv_info:
                init_csv[key] = []
            init_csv.to_csv(train_csv)
        df = pd.read_csv(train_csv)
        data = pd.DataFrame()
        tmp_epoch = df['epoch'].tolist()
        tmp_epoch.append(epoch)
        tmp_auc = df['auc'].tolist()
        tmp_auc.append(roc_auc)
        #print('------------------', tmp_epoch)
        tmp_loss = df['loss'].tolist()
        tmp_loss.append(np.mean(loss_list))

        tmp_acc = df['accuracy'].tolist()
        tmp_acc.append(accuracy)

        data['epoch'], data['loss'], data['auc'], data[
            'accuracy'] = tmp_epoch, tmp_loss, tmp_auc, tmp_acc
        print('train accuracy: ', accuracy, 'train auc: ', roc_auc)
        data.to_csv(train_csv)
Ejemplo n.º 25
0
def eval_unpadded_loss(data, target, model, vocab, device, target_scale):
    data, target = data.to(device), target.to(device)
    with torch.no_grad():
        data, target = autograd.Variable(data), autograd.Variable(target)
        output = model(data, vocab, device)
        return output, loss_function(output, target, target_scale)
Ejemplo n.º 26
0
def main():
    args, save_dir = parse_arguments()

    ngpu = args.ngpu
    z_dim = args.z_dim
    batchSize = args.batch_size
    imageSize = args.image_size
    nepoch = args.nepoch
    data_dir = args.data_dir
    outf_img = check_folder(os.path.join(save_dir, 'img'))
    beta1 = 0.0
    cuda = True
    cudnn.benchmark = True
    device = torch.device("cuda:0" if cuda else "cpu")

    ## set seed
    manualSeed = random.randint(1, 10000)
    print("Random Seed: ", manualSeed)
    random.seed(manualSeed)
    torch.manual_seed(manualSeed)

    ### load data
    dataset = CelebaDataseat(data_dir=data_dir, resolution=imageSize)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batchSize,
                                             shuffle=True,
                                             num_workers=int(10),
                                             drop_last=True)
    dataloader_iterator = iter(dataloader)
    print('Size of the training set: ', len(dataset))

    ### set up models
    netE = Encoder(z_dim=z_dim).to(device)
    netE.apply(weights_init)
    netG = Generator(z_dim=z_dim).to(device)
    netDl = DiscriminatorL(z_dim=z_dim, ngpu=ngpu).to(
        device)  # discriminator(on the latent variable)
    netD = Discriminator().to(device)  # discriminator(on the image)

    ### define the losses
    criterion = nn.BCELoss()
    real_label = 1
    fake_label = 0
    fixed_noise = torch.randn(batchSize, z_dim, 1, 1, device=device)

    ### setup optimizer
    optimizerD = optim.Adam(netD.parameters(), lr=0.0004, betas=(beta1, 0.9))
    optimizerDl = optim.Adam(netDl.parameters(), lr=0.0002, betas=(beta1, 0.9))
    optimizerG = optim.Adam(netG.parameters(), lr=0.0001, betas=(beta1, 0.9))
    optimizerE = optim.Adam(netE.parameters(), lr=0.0001, betas=(beta1, 0.9))

    ### load previous trained model
    if_load, counter, checkpoint = load(save_dir)
    if if_load:
        netG.load_state_dict(checkpoint['netG_state_dict'])
        netE.load_state_dict(checkpoint['netE_state_dict'])
        netD.load_state_dict(checkpoint['netD_state_dict'])
        netDl.load_state_dict(checkpoint['netDl_state_dict'])
        optimizerG.load_state_dict(checkpoint['optG_state_dict'])
        optimizerE.load_state_dict(checkpoint['optE_state_dict'])
        optimizerD.load_state_dict(checkpoint['optD_state_dict'])
        optimizerDl.load_state_dict(checkpoint['optDl_state_dict'])

    for epoch in range(counter, nepoch):
        for i in range(5000 // (batchSize)):
            ############################
            # (1) Update Dl network: maximize log(D(x)) + log(1 - D(G(z)))
            ###########################
            for _ in range(1):
                netDl.zero_grad()

                # train with real
                try:
                    data = next(dataloader_iterator)
                except StopIteration:
                    dataloader_iterator = iter(dataloader)
                    data = next(dataloader_iterator)
                real_ = data.to(device)
                label = torch.full((batchSize, ), fake_label, device=device)

                output = netE(real_)
                output = netDl(output)
                errDl_real = criterion(output, label)
                errDl_real.backward()
                errDl = errDl_real

                # train with fake
                noise = torch.randn(batchSize, z_dim, device=device)
                label = torch.full((batchSize, ), real_label, device=device)
                output = netDl(noise)
                errDl_real = criterion(output, label)
                errDl_real.backward()
                errDl += errDl_real

                optimizerDl.step()

            ############################
            # (2) Update D network: Hinge loss
            ###########################
            for _ in range(2):
                netD.zero_grad()

                # train with real
                try:
                    data = next(dataloader_iterator)
                except StopIteration:
                    dataloader_iterator = iter(dataloader)
                    data = next(dataloader_iterator)
                real_ = data.to(device)
                out_real = netD(real_)

                noise = torch.randn(batchSize, z_dim, 1, 1, device=device)
                fake = netG(noise)
                out_fake = netD(fake.detach())

                errD_real = (nn.ReLU()(0.5 + out_real)).mean()
                errD_real.backward()
                errD_fake = (nn.ReLU()(0.5 - out_fake)).mean()
                errD_fake.backward()
                errD = errD_real + errD_fake

                optimizerD.step()

            ############################
            # (3) Update G & E network: maximize log(D(G(z)))
            ###########################
            for _ in range(1):
                netG.zero_grad()
                netE.zero_grad()

                try:
                    data = next(dataloader_iterator)
                except StopIteration:
                    dataloader_iterator = iter(dataloader)
                    data = next(dataloader_iterator)

                real_ = data.to(device)
                real_ = real_.unsqueeze(1).repeat(1, 1, 1, 1, 1)
                real_ = real_.view(batchSize * 1, 3, imageSize, imageSize)

                encoded = netE(real_)
                fake_noise = encoded
                encoded = encoded.view(batchSize * 1, z_dim, 1, 1)

                rec_fake = netG(encoded)
                output = netD(rec_fake)
                outputN = netDl(fake_noise)

                label = torch.full((batchSize * 1, ),
                                   real_label,
                                   device=device)
                errG = criterionG(output, label, real_, rec_fake, outputN,
                                  batchSize)
                errG.backward()
                optimizerG.step()
                optimizerE.step()

            if i % 100 == 0:
                print(
                    '[%d/%d][%d] Loss_D: %.4f, Loss_Dfake: %.4f, Loss_Dreal: %.4f, Loss_Dl: %.4f, Loss_G: %.4f'
                    % (epoch, nepoch, i, errD.item(), errD_fake.item(),
                       errD_real.item(), errDl.item(), errG.item()))

        if epoch % 20 == 0:
            noise = fixed_noise
            fake = netG(noise)
            fake = fake.view(batchSize, 3, imageSize, imageSize)

            vutils.save_image(fake.detach(),
                              '%s/epoch_%04d.png' % (outf_img, epoch),
                              normalize=True)
            vutils.save_image(real_.detach(),
                              '%s/real_%04d.png' % (outf_img, epoch),
                              normalize=True)
            vutils.save_image(rec_fake.detach(),
                              '%s/reconst_%04d.png' % (outf_img, epoch),
                              normalize=True)

        if epoch % 10 == 0:
            save_dict = {
                'steps': epoch,
                'netE_state_dict': netE.state_dict(),
                'netG_state_dict': netG.state_dict(),
                'netD_state_dict': netD.state_dict(),
                'netDl_state_dict': netDl.state_dict(),
                'optD_state_dict': optimizerD.state_dict(),
                'optDl_state_dict': optimizerDl.state_dict(),
                'optG_state_dict': optimizerG.state_dict(),
                'optE_state_dict': optimizerE.state_dict()
            }

            torch.save(save_dict, os.path.join(save_dir, 'checkpoint.pkl'))
            torch.save(netE, os.path.join(save_dir, 'netE.pt'))
            torch.save(netG, os.path.join(save_dir, 'netG.pt'))
Ejemplo n.º 27
0
def train(args):
    is_distributed = len(args.hosts) > 1 and args.backend is not None
    logger.debug("Distributed training - {}".format(is_distributed))
    use_cuda = args.num_gpus > 0
    logger.debug("Number of gpus available - {}".format(args.num_gpus))
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    device = torch.device("cuda" if use_cuda else "cpu")

    if is_distributed:
        # Initialize the distributed environment.
        world_size = len(args.hosts)
        os.environ['WORLD_SIZE'] = str(world_size)
        host_rank = args.hosts.index(args.current_host)
        dist.init_process_group(backend=args.backend, rank=host_rank, world_size=world_size)
        logger.info('Initialized the distributed environment: \'{}\' backend on {} nodes. '.format(
            args.backend, dist.get_world_size()) + 'Current host rank is {}. Number of gpus: {}'.format(
            dist.get_rank(), args.num_gpus))

    # set the seed for generating random numbers
    torch.manual_seed(args.seed)
    if use_cuda:
        torch.cuda.manual_seed(args.seed)

    train_loader = _get_train_data_loader(args.batch_size, args.data_dir, is_distributed, **kwargs)
    test_loader = _get_test_data_loader(args.test_batch_size, args.data_dir, **kwargs)

    logger.debug("Processes {}/{} ({:.0f}%) of train data".format(
        len(train_loader.sampler), len(train_loader.dataset),
        100. * len(train_loader.sampler) / len(train_loader.dataset)
    ))

    logger.debug("Processes {}/{} ({:.0f}%) of test data".format(
        len(test_loader.sampler), len(test_loader.dataset),
        100. * len(test_loader.sampler) / len(test_loader.dataset)
    ))

    model = Net().to(device)
    if is_distributed and use_cuda:
        # multi-machine multi-gpu case
        model = torch.nn.parallel.DistributedDataParallel(model)
    else:
        # single-machine multi-gpu case or single-machine or multi-machine cpu case
        model = torch.nn.DataParallel(model)

    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader, 1):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            if is_distributed and not use_cuda:
                # average gradients manually for multi-machine cpu case only
                _average_gradients(model)
            optimizer.step()
            if batch_idx % args.log_interval == 0:
                logger.info('Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.sampler),
                    100. * batch_idx / len(train_loader), loss.item()))
        test(model, test_loader, device)
    save_model(model, args.model_dir)
Ejemplo n.º 28
0
def train(opt, vocab):
    # record starting time of the program
    start_time = time.time()

    torch.manual_seed(1)

    # load training data
    train_src_loc = opt.train_src
    train_tgt_loc = opt.train_tgt
    valid_src_loc = opt.valid_src
    valid_tgt_loc = opt.valid_tgt
    EMBEDDING_DIM = opt.embedding_dim
    HIDDEN_DIM = opt.hidden_dim
    BATCH_SIZE = opt.batch_size
    EPOCHS = opt.epochs
    device = opt.device
    train_from = opt.train_from
    LOAD_MODEL = len(train_from) > 0
    train_data_limit = opt.train_data_limit
    valid_data_limit = opt.valid_data_limit
    save_model_loc = opt.save_model
    save_checkpoint_epochs = opt.save_checkpoint_epochs

    training_data = []

    target_scale = 1
    #scaler = MinMaxScaler(feature_range=(0, 1))

    # word_to_ix = {}
    # word_to_ix['PAD'] = 0
    # word_to_ix['UNK'] = 1
    # word_to_ix['BOS'] = 2
    # word_to_ix['EOS'] = 3

    if train_data_limit > 0:
        print('Limited the training data to first {} samples'.format(
            train_data_limit))
    else:
        print('Using the full training dataset.')
    print('Loading the training dataset...')

    with open(train_src_loc,
              'r') as train_src_file, open(train_tgt_loc,
                                           'r') as train_tgt_file:
        for train_src_line, train_tgt_line in zip(train_src_file,
                                                  train_tgt_file):
            sent, ratio = (train_src_line.strip().split(),
                           float(train_tgt_line))
            # for word in sent:
            #     if word not in word_to_ix:
            #         word_to_ix[word] = len(word_to_ix)
            sent = prepare_sequence(sent, vocab)
            training_data.append((sent, ratio))
            train_data_limit -= 1
            if train_data_limit == 0:
                break
    # x_max = max([v for (k,v) in training_data])
    # x_min = min([v for (k,v) in training_data])
    # scaled_training_data = []
    # for item in training_data:
    #     scaled_training_data.append(item[0], item[1]/(x_max - x_min))
    print('Successfully loaded the training dataset.')
    print("EMBEDDING_DIM = {}\nHIDDEN_DIM = {}\nBATCH_SIZE = {}\nEPOCHS = {}".
          format(EMBEDDING_DIM, HIDDEN_DIM, BATCH_SIZE, EPOCHS))

    # Train the model:
    model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, vocab, device).to(device)

    if LOAD_MODEL:
        model.load_state_dict(torch.load(train_from))
        print('Resuming the model from {0}'.format(train_from))
        model.train()

    optimizer = optim.Adam(model.parameters(), lr=1.0e-6)

    print("Model's state_dict:")
    for param_tensor in model.state_dict():
        print("\t", param_tensor, "\t",
              model.state_dict()[param_tensor].size())

    # Print optimizer's state_dict
    print("Optimizer's state_dict:")
    for var_name in optimizer.state_dict():
        print("\t", var_name, "\t", optimizer.state_dict()[var_name])

    valid_data = []
    if valid_data_limit > 0:
        print('Limited the test data to first {} samples'.format(
            valid_data_limit))
    else:
        print('Using the full validation dataset.')
    print('Loading the validation dataset...')
    with open(valid_src_loc,
              'r') as valid_src_file, open(valid_tgt_loc,
                                           'r') as valid_tgt_file:
        for valid_src_line, valid_tgt_line in zip(valid_src_file,
                                                  valid_tgt_file):
            sent, tag = (valid_src_line.strip().split(), float(valid_tgt_line))
            sent = prepare_sequence(sent, vocab)
            valid_data.append((sent, tag))
            valid_data_limit -= 1
            if valid_data_limit == 0:
                break
    #valid_data = scaler.transform(valid_data)
    print('Successfully loaded the validation dataset.')

    my_collator = MyCollator(vocab)
    # collate also does the normalization of lengths
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=8,
                                               collate_fn=my_collator)

    # calculate total MSE loss on the test dataset before training the model
    initial_total_loss = 0
    i = 0
    for batch_idx, (data, target) in enumerate(valid_loader):
        _, loss = eval_unpadded_loss(data, target, model, vocab, device,
                                     target_scale)
        initial_total_loss += loss
        i += 1
    initial_total_loss /= i
    print('Total MSE loss before training: {}'.format(initial_total_loss))

    for epoch in range(EPOCHS):

        print("Starting epoch {}/{}...".format(epoch + 1, EPOCHS))
        # this is a batch
        # collate also does the normalization of lengths
        train_loader = torch.utils.data.DataLoader(training_data,
                                                   batch_size=BATCH_SIZE,
                                                   shuffle=True,
                                                   num_workers=8,
                                                   collate_fn=my_collator)

        for batch_idx, (data, target) in enumerate(train_loader):
            # print('Train batch id: {}/{}'.format(batch_idx, train_loader.__len__()))
            data, target = data.to(device), target.to(device)
            # Get our inputs ready for the network (turn them into Variables of word indices)
            data, target = autograd.Variable(data), autograd.Variable(target)

            # Remember that Pytorch accumulates gradients.
            # We need to clear them out before each instance
            model.zero_grad()

            # Also, we need to clear out the hidden state of the LSTM,
            # detaching it from its history on the last instance.
            # model.hidden = model.init_hidden()
            # ? why the line above is commented?

            # Run our forward pass.
            output = model(data, vocab, device)
            loss = loss_function(output, target, target_scale)
            loss.backward()
            optimizer.step()
            # if divmod(batch_idx, 100)[1] == 0:
            #     gc.collect()
            #     print('Collected garbage.')
        # calculate total MSE loss on the test dataset after training the model on each epoch
        total_loss = 0
        i = 0
        for batch_idx, (data, target) in enumerate(valid_loader):
            _, loss = eval_unpadded_loss(data, target, model, vocab, device,
                                         target_scale)
            total_loss += loss
            i += 1
        total_loss /= i
        print('Total MSE loss after training on epoch {}: {}'.format(
            epoch + 1, total_loss))
        if divmod(epoch + 1, save_checkpoint_epochs)[1] == 0:
            save_model_on_epoch_n = save_model_loc + '_epoch_' + repr(
                epoch + 1) + '.pt'
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'total_loss': total_loss,
                    'opt': opt
                }, save_model_on_epoch_n)
            print('Saved the model to {0}'.format(save_model_on_epoch_n))

    print('Training completed!')
    # show the time consumed by the program
    print("Total run time: {}".format(str(time.time() - start_time)))
Ejemplo n.º 29
0
    img_list = []
    G_losses = []
    D_losses = []
    iters = 0

    for epoch in range(num_epochs):
        for i, data in enumerate(train_loader):

            ############################
            # (1) Update D network
            ###########################

            net_d.zero_grad()

            # create training data
            real_img = data.to(device)
            b_size = real_img.size(0)
            real_label = torch.full((b_size, ),
                                    real_idx,
                                    device=device,
                                    dtype=torch.float)

            noise = torch.randn(b_size,
                                nz,
                                1,
                                1,
                                device=device,
                                dtype=torch.float)
            fake_img = net_g(noise)
            fake_label = torch.full((b_size, ),
                                    fake_idx,
Ejemplo n.º 30
0
                        lr=lr,
                        betas=(beta1, 0.999),
                        weight_decay=1e-5)  # 識別器D用
optimizerG = optim.Adam(netG.parameters(),
                        lr=lr,
                        betas=(beta1, 0.999),
                        weight_decay=1e-5)  # 生成器G用

fixed_noise = torch.randn(1, 1, batch_size, 128, device=device)  # 確認用の固定したノイズ

# In[7]:

# 学習のループ
for epoch in range(n_epoch):
    for itr, data in enumerate(dataloader):
        real_image = data.to(device)  # 元画像
        sample_size = real_image.size(0)  # 画像枚数
        #バッチサイズ×128のノイズを一つ
        noise = torch.randn(1, 1, sample_size, 128,
                            device=device)  # 正規分布からノイズを生成

        real_target = torch.full((sample_size, ), 1.,
                                 device=device)  # 元画像に対する識別信号の目標値「1」
        fake_target = torch.full((sample_size, ), 0.,
                                 device=device)  # 贋作画像に対する識別信号の目標値「0」

        ############################
        # 識別器Dの更新
        ###########################
        netD.zero_grad()  # 勾配の初期化
    discriminator = Discriminator(args.classes).to(device)
    discriminator.apply(weights_init)
    if args.discriminator != None:
        discriminator.load_state_dict(torch.load(args.discriminator, map_location=device_name), strict=False)

    loss = nn.BCELoss()

    optimizerG = optim.Adam(generator.parameters(), lr=args.learning_rate)
    optimizerD = optim.Adam(discriminator.parameters(), lr=args.learning_rate)

    labels = torch.LongTensor([args.class_label]).repeat(args.batch_size).to(device)
    fixed_noise = generator.build_input(args.batch_size, labels)

    for epoch in range(1, args.epochs + 1):
        for i, (data, class_labels) in enumerate(dataloader, 0):
            real_data = data.to(device)
            batch_size = real_data.size(0)

            discriminator.zero_grad()

            input_data = discriminator.build_input(real_data, class_labels)
            output = discriminator(input_data)

            label = torch.full((batch_size,), 1, device=device)
            loss_with_real = loss(output, label)
            loss_with_real.backward()
            D_x = output.mean().item()

            generator_input = generator.build_input(args.batch_size, class_labels)
            fake_data = generator(generator_input)
            fake_data = discriminator.build_input(fake_data, class_labels)
Ejemplo n.º 32
0
model.fc = nn.Linear(2048, 16)
model.aux_logits = False
model = model.to(device)
print('params to update:')
params_to_update = []
for name, param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print('\t', name)
optimizer = torch.optim.Adam(params_to_update, lr=opt.lr, betas=(opt.beta1, opt.beta2), eps=opt.eps)

print('|  - training...')
for epoch in range(1, opt.epochs + 1):
    model.train()
    for idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if idx % opt.log_freq:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, idx * len(data), len(train_loader.dataset),
                100. * idx / len(train_loader), loss.item()))
    model.eval()
    test_loss, correct = 0, 0
    batch_size = test_loader.batch_size
    error, i = [], 0
    with torch.no_grad():
        for data, target in test_loader: