def __init__(self):
     super(seg_loss, self).__init__()
     self.loss = nn.CrossEntropyLoss()
Esempio n. 2
0
    def train(self, device):
        optimizer = optim.Adam(self.parameters(), lr=0.0001)
        path = 'FCNN_module.tar'
        initepoch = 0

        if os.path.exists(path) is not True:
            loss = nn.CrossEntropyLoss()
            # optimizer = optim.SGD(self.parameters(),lr=0.01)

        else:
            checkpoint = torch.load(path)
            self.load_state_dict(checkpoint['model_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            initepoch = checkpoint['epoch']
            loss = checkpoint['loss']
        lossss = []
        accccc = []

        for epoch in range(initepoch,
                           100):  # loop over the dataset multiple times
            timestart = time.time()

            running_loss = 0.0
            total = 0
            correct = 0
            for i, data in enumerate(self.trainloader, 0):
                # get the inputs
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = self(inputs)
                l = loss(outputs, labels)
                l.backward()
                optimizer.step()

                # print statistics
                running_loss += l.item()
                # print("i ",i)
                if i % 500 == 0:  # print every 500 mini-batches
                    print('[%d, %5d] loss: %.4f' %
                          (epoch, i, running_loss / 500))
                    lossss.append(running_loss / 500)

                    running_loss = 0.0
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                    print(
                        'Accuracy of the network on the %d tran images: %.3f %%'
                        % (total, 100.0 * correct / total))
                    accccc.append(100.0 * correct / total)
                    total = 0
                    correct = 0
                    torch.save(
                        {
                            'epoch': epoch,
                            'model_state_dict': self.state_dict(),
                            'optimizer_state_dict': optimizer.state_dict(),
                            'loss': loss
                        }, path)

            print('epoch %d cost %3f sec' % (epoch, time.time() - timestart))
        with open('./fcnnAns.csv', 'w') as fw:
            for i in range(len(lossss)):
                fw.write(str(lossss[i]) + ',' + str(accccc[i]) + '\n')
        print('Finished Training')
Esempio n. 3
0
def main():
    global best_acc

    if not os.path.isdir(args.out):
        mkdir_p(args.out)

    # Data
    print(f'==> Preparing cifar10')
    transform_train = transforms.Compose([
        dataset.RandomPadandCrop(32),
        dataset.RandomFlip(),
        dataset.ToTensor(),
    ])

    transform_val = transforms.Compose([
        dataset.ToTensor(),
    ])

    train_labeled_set, train_unlabeled_set, val_set, test_set = dataset.get_cifar10(
        './data',
        args.n_labeled,
        transform_train=transform_train,
        transform_val=transform_val)
    labeled_trainloader = data.DataLoader(train_labeled_set,
                                          batch_size=args.batch_size,
                                          shuffle=True,
                                          num_workers=0,
                                          drop_last=True)
    unlabeled_trainloader = data.DataLoader(train_unlabeled_set,
                                            batch_size=args.batch_size,
                                            shuffle=True,
                                            num_workers=0,
                                            drop_last=True)
    val_loader = data.DataLoader(val_set,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=0)
    test_loader = data.DataLoader(test_set,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=0)

    # Model
    print("==> creating WRN-28-2")

    def create_model(ema=False):
        model = models.WideResNet(num_classes=10)
        model = model.cuda()

        if ema:
            for param in model.parameters():
                param.detach_()

        return model

    model = create_model()
    ema_model = create_model(ema=True)

    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    train_criterion = SemiLoss()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    ema_optimizer = WeightEMA(model, ema_model, alpha=args.ema_decay)
    start_epoch = 0

    # Resume
    title = 'noisy-cifar-10'
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.out = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        ema_model.load_state_dict(checkpoint['ema_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.out, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.out, 'log.txt'), title=title)
        logger.set_names([
            'Train Loss', 'Train Loss X', 'Train Loss U', 'Valid Loss',
            'Valid Acc.', 'Test Loss', 'Test Acc.'
        ])

    writer = SummaryWriter(args.out)
    step = 0
    test_accs = []
    # Train and val
    for epoch in range(start_epoch, args.epochs):

        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))

        train_loss, train_loss_x, train_loss_u = train(
            labeled_trainloader, unlabeled_trainloader, model, optimizer,
            ema_optimizer, train_criterion, epoch, use_cuda)
        _, train_acc = validate(labeled_trainloader,
                                ema_model,
                                criterion,
                                epoch,
                                use_cuda,
                                mode='Train Stats')
        val_loss, val_acc = validate(val_loader,
                                     ema_model,
                                     criterion,
                                     epoch,
                                     use_cuda,
                                     mode='Valid Stats')
        test_loss, test_acc = validate(test_loader,
                                       ema_model,
                                       criterion,
                                       epoch,
                                       use_cuda,
                                       mode='Test Stats ')

        step = args.val_iteration * (epoch + 1)

        writer.add_scalar('losses/train_loss', train_loss, step)
        writer.add_scalar('losses/valid_loss', val_loss, step)
        writer.add_scalar('losses/test_loss', test_loss, step)

        writer.add_scalar('accuracy/train_acc', train_acc, step)
        writer.add_scalar('accuracy/val_acc', val_acc, step)
        writer.add_scalar('accuracy/test_acc', test_acc, step)

        # append logger file
        logger.append([
            train_loss, train_loss_x, train_loss_u, val_loss, val_acc,
            test_loss, test_acc
        ])

        # save model
        is_best = val_acc > best_acc
        best_acc = max(val_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'ema_state_dict': ema_model.state_dict(),
                'acc': val_acc,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            }, is_best)
        test_accs.append(test_acc)
    logger.close()
    writer.close()

    print('Best acc:')
    print(best_acc)

    print('Mean acc:')
    print(np.mean(test_accs[-20:]))
Esempio n. 4
0
def train_correspondence_block(json_file, cls, gpu, synthetic, epochs=50, batch_size=64, val_ratio=0.2,
                               save_model=True, iter_print=10):
    """
    Training a UNnet for each class using real train and/or synthetic data
    Args:
        json_file: .txt file which stores the directory of the training images
        cls: the class to train on, from 1 to 6
        gpu: gpu id to use
        synthetic: whether use synthetic data or not
        epochs: number of epochs to train
        batch_size: batch size
        val_ratio: validation ratio during training
        save_model: save model or not
        iter_print: print training results per iter_print iterations

    """
    train_data = NOCSDataset(json_file, cls, synthetic=synthetic, resize=64,
                             transform=transforms.Compose([transforms.ColorJitter(brightness=(0.6, 1.4),
                                                                                  contrast=(0.8, 1.2),
                                                                                  saturation=(0.8, 1.2),
                                                                                  hue=(-0.01, 0.01)),
                                                           AddGaussianNoise(10 / 255)]))
    print('Size of trainset ', len(train_data))
    indices = list(range(len(train_data)))
    np.random.shuffle(indices)

    num_train = len(indices)
    split = int(np.floor(num_train * val_ratio))
    train_idx, valid_idx = indices[split:], indices[:split]

    # define samplers for obtaining training and validation batches
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    # prepare data loaders (combine dataset and sampler)
    num_workers = 4
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
                                               sampler=train_sampler, num_workers=num_workers)
    val_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
                                             sampler=valid_sampler, num_workers=num_workers)
    device = torch.device(f"cuda:{gpu}" if torch.cuda.is_available() else "cpu")
    print("device: ", f"cuda:{gpu}" if torch.cuda.is_available() else "cpu")
    # architecture for correspondence block - 13 objects + backgound = 14 channels for ID masks
    correspondence_block = UNet()
    correspondence_block = correspondence_block.to(device)

    # custom loss function and optimizer
    criterion_x = nn.CrossEntropyLoss()
    criterion_y = nn.CrossEntropyLoss()
    criterion_z = nn.CrossEntropyLoss()

    # specify optimizer
    optimizer = optim.Adam(correspondence_block.parameters(), lr=3e-4, weight_decay=3e-5)

    # training loop
    val_loss_min = np.Inf
    save_path = model_save_path(cls)
    writer = SummaryWriter(save_path.parent / save_path.stem / datetime.now().strftime("%d%H%M"))

    for epoch in range(epochs):
        t0 = time.time()
        train_loss = 0
        val_loss = 0
        print("------ Epoch ", epoch, " ---------")
        correspondence_block.train()
        print("training")
        for iter, (rgb, xmask, ymask, zmask, adr_rgb) in enumerate(train_loader):

            rgb = rgb.to(device)
            xmask = xmask.to(device)
            ymask = ymask.to(device)
            zmask = zmask.to(device)

            optimizer.zero_grad()
            xmask_pred, ymask_pred, zmask_pred = correspondence_block(rgb)

            loss_x = criterion_x(xmask_pred, xmask)
            loss_y = criterion_y(ymask_pred, ymask)
            loss_z = criterion_z(zmask_pred, zmask)

            loss = loss_x + loss_y + loss_z

            loss.backward()
            optimizer.step()
            train_loss += loss.item()

            if iter % iter_print == 0:
                print(
                    'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.
                        format(epoch, iter * len(rgb), len(train_loader.dataset),
                               100. * iter / len(train_loader), loss.item()))

        correspondence_block.eval()

        print("validating")
        for rgb, xmask, ymask, zmask, _ in val_loader:
            rgb = rgb.to(device)
            xmask = xmask.to(device)
            ymask = ymask.to(device)
            zmask = zmask.to(device)

            xmask_pred, ymask_pred, zmask_pred = correspondence_block(rgb)

            loss_x = criterion_x(xmask_pred, xmask)
            loss_y = criterion_y(ymask_pred, ymask)
            loss_z = criterion_z(zmask_pred, zmask)

            loss = loss_x + loss_y + loss_z
            val_loss += loss.item()

        # calculate average losses
        train_loss = train_loss / len(train_loader.sampler)
        val_loss = val_loss / len(val_loader.sampler)
        t_end = time.time()
        print(f'{t_end - t0} seconds')
        writer.add_scalar('train loss', train_loss, epoch)
        writer.add_scalar('val loss', val_loss, epoch)
        writer.add_scalar('epoch time', t_end - t0, epoch)

        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, train_loss, val_loss))

        # save model if validation loss has decreased
        if val_loss <= val_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
                val_loss_min,
                val_loss))
            if save_model:
                torch.save(correspondence_block.state_dict(), save_path)
            val_loss_min = val_loss
    writer.close()
    Ntest = len(test_loader.dataset)


model = model.stl10(n_channel=args.channel)
model = torch.nn.DataParallel(model, device_ids= range(args.ngpu))
if args.cuda:
    print('USING CUDA')
    model.cuda()

# optimizer
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd)
decreasing_lr = list(map(int, args.decreasing_lr.split(',')))
print('decreasing_lr: ' + str(decreasing_lr))
best_acc, old_file = 0, None
t_begin = time.time()
crit0 = nn.CrossEntropyLoss()
crit1 = OLELoss(lambda_=args.lambda_)



# ready to go
for epoch in range(args.epochs):
    model.train()
    if epoch in decreasing_lr:
        optimizer.param_groups[0]['lr'] *= 0.1
    for batch_idx, (data, target) in enumerate(train_loader):
        indx_target = target.clone()

        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
Esempio n. 6
0
def main():
    global args
    args = parser.parse_args()

    # create Light CNN for face recognition
    if args.model == 'LightCNN-9':
        model = LightCNN_9Layers(num_classes=args.num_classes)
    elif args.model == 'LightCNN-29':
        model = LightCNN_29Layers(num_classes=args.num_classes)
    elif args.model == 'LightCNN-29v2':
        model = LightCNN_29Layers_v2(num_classes=args.num_classes)
    else:
        print('Error model type\n')

    if args.cuda:
        model = torch.nn.DataParallel(model).cuda()

    print(model)

    # large lr for last fc parameters
    params = []
    for name, value in model.named_parameters():
        if 'bias' in name:
            if 'fc2' in name:
                params += [{
                    'params': value,
                    'lr': 20 * args.lr,
                    'weight_decay': 0
                }]
            else:
                params += [{
                    'params': value,
                    'lr': 2 * args.lr,
                    'weight_decay': 0
                }]
        else:
            if 'fc2' in name:
                params += [{'params': value, 'lr': 10 * args.lr}]
            else:
                params += [{'params': value, 'lr': 1 * args.lr}]

    optimizer = torch.optim.SGD(params,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    #load image
    train_loader = torch.utils.data.DataLoader(ImageList(
        root=args.root_path,
        fileList=args.train_list,
        transform=transforms.Compose([
            transforms.RandomCrop(128),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(ImageList(
        root=args.root_path,
        fileList=args.val_list,
        transform=transforms.Compose([
            transforms.CenterCrop(128),
            transforms.ToTensor(),
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function and optimizer
    criterion = nn.CrossEntropyLoss()

    if args.cuda:
        criterion.cuda()

    validate(val_loader, model, criterion)

    for epoch in range(args.start_epoch, args.epochs):

        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        save_name = args.save_path + 'lightCNN_' + str(
            epoch + 1) + '_checkpoint.pth.tar'
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'prec1': prec1,
            }, save_name)
Esempio n. 7
0
 def __init__(self,weight=None):
     super(DLPCNNLoss,self).__init__()
     self.loss=nn.CrossEntropyLoss(weight=weight)
     self.lamda=0.003
     self.k=20
Esempio n. 8
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    if not os.path.exists(args.save):
        os.makedirs(args.save)

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if args.gpu is not None:
        model = model.cuda(args.gpu)
    elif args.distributed:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)
    else:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch)

        #####################################################################################################
        num_parameters = get_conv_zero_param(model)
        print('Zero parameters: {}'.format(num_parameters))
        num_parameters = sum(
            [param.nelement() for param in model.parameters()])
        print('Parameters: {}'.format(num_parameters))
        #####################################################################################################

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            checkpoint=args.save)
    return
Esempio n. 9
0
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  torch.cuda.set_device(args.gpu)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device = %d' % args.gpu)
  logging.info("args = %s", args)

  genotype = eval("genotypes.%s" % args.arch)
  model = Network(args.init_channels, CLASSES, args.layers, args.auxiliary, genotype)

  if args.parallel:
    model = nn.DataParallel(model).cuda()
  else:
    model = model.cuda()

  #input = torch.randn(1,3,224,224).cuda()
  #macs, params = profile(model, inputs=(input,))
  #print('flops: {}, params: {}'.format(macs, params)) #arch2vec_bo: 580M, 5.18M; arch2vec_rl: 533M, 4.82M
  #print("param size = %fMB", utils.count_parameters_in_MB(model))
  #exit()

  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  criterion = nn.CrossEntropyLoss()
  criterion = criterion.cuda()
  criterion_smooth = CrossEntropyLabelSmooth(CLASSES, args.label_smooth)
  criterion_smooth = criterion_smooth.cuda()

  optimizer = torch.optim.SGD(
    model.parameters(),
    args.learning_rate,
    momentum=args.momentum,
    weight_decay=args.weight_decay
    )

  traindir = os.path.join(args.data, 'train')
  validdir = os.path.join(args.data, 'val')
  normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  train_data = dset.ImageFolder(
    traindir,
    transforms.Compose([
      transforms.RandomResizedCrop(224),
      transforms.RandomHorizontalFlip(),
      transforms.ColorJitter(
        brightness=0.4,
        contrast=0.4,
        saturation=0.4,
        hue=0.2),
      transforms.ToTensor(),
      normalize,
    ]))
  valid_data = dset.ImageFolder(
    validdir,
    transforms.Compose([
      transforms.Resize(256),
      transforms.CenterCrop(224),
      transforms.ToTensor(),
      normalize,
    ]))

  train_queue = torch.utils.data.DataLoader(
    train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=4)

  valid_queue = torch.utils.data.DataLoader(
    valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=4)

  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_period, gamma=args.gamma)

  best_acc_top1 = 0
  for epoch in range(args.epochs):
    logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
    model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

    train_acc, train_obj = train(train_queue, model, criterion_smooth, optimizer)
    logging.info('train_acc %f', train_acc)

    valid_acc_top1, valid_acc_top5, valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_acc_top1 %f', valid_acc_top1)
    logging.info('valid_acc_top5 %f', valid_acc_top5)

    is_best = False
    if valid_acc_top1 > best_acc_top1:
      best_acc_top1 = valid_acc_top1
      is_best = True

    utils.save_checkpoint({
      'epoch': epoch + 1,
      'state_dict': model.state_dict(),
      'best_acc_top1': best_acc_top1,
      'optimizer' : optimizer.state_dict(),
      }, is_best, args.save)

    scheduler.step()
Esempio n. 10
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))

        if args.my_model == 'zerocenter':
            import my_models.zerocenter as my_model
            model = my_model.__dict__[args.arch]()
            prefix = args.my_model
        elif args.my_model == 'zerocenter2':
            import my_models.zerocenter2 as my_model
            model = my_model.__dict__[args.arch]()
            prefix = args.my_model
        elif args.my_model == 'doublenorm':
            import my_models.doublenorm as my_model
            model = my_model.__dict__[args.arch]()
            prefix = args.my_model
        elif args.my_model == 'doublenorm2':
            import my_models.doublenorm2 as my_model
            model = my_model.__dict__[args.arch]()
            prefix = args.my_model
        else:
            model = models.__dict__[args.arch]()
            prefix = 'normal'

        model_full_name = prefix + '_' + args.arch

        print("=> model is ", model_full_name)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                if isinstance(best_acc1, float):
                    best_acc1 = torch.tensor(best_acc1)
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train_stats = train(train_loader, model, criterion, optimizer, epoch,
                            args)

        # evaluate on validation set
        val_stats = validate(val_loader, model, criterion, args)
        acc1 = val_stats[1]

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'train_stats': train_stats,
                    'val_stats': val_stats,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                filename=model_full_name + '_' + str(epoch + 1))
Esempio n. 11
0
def main():
    setup_default_logging()
    args, args_text = _parse_args()
    
    if args.log_wandb:
        if has_wandb:
            wandb.init(project=args.experiment, config=args)
        else: 
            _logger.warning("You've requested to log metrics to wandb but package not found. "
                            "Metrics not being logged to wandb, try `pip install wandb`")
             
    args.prefetcher = not args.no_prefetcher
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1
    args.device = 'cuda:0'
    args.world_size = 1
    args.rank = 0  # global rank
    if args.distributed:
        args.device = 'cuda:%d' % args.local_rank
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl', init_method='env://')
        args.world_size = torch.distributed.get_world_size()
        args.rank = torch.distributed.get_rank()
        _logger.info('Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
                     % (args.rank, args.world_size))
    else:
        _logger.info('Training with a single process on 1 GPUs.')
    assert args.rank >= 0

    # resolve AMP arguments based on PyTorch / Apex availability
    use_amp = None
    if args.amp:
        # `--amp` chooses native amp before apex (APEX ver not actively maintained)
        if has_native_amp:
            args.native_amp = True
        elif has_apex:
            args.apex_amp = True
    if args.apex_amp and has_apex:
        use_amp = 'apex'
    elif args.native_amp and has_native_amp:
        use_amp = 'native'
    elif args.apex_amp or args.native_amp:
        _logger.warning("Neither APEX or native Torch AMP is available, using float32. "
                        "Install NVIDA apex or upgrade to PyTorch 1.6")

    random_seed(args.seed, args.rank)

    model = create_model(
        args.model,
        pretrained=args.pretrained,
        num_classes=args.num_classes,
        drop_rate=args.drop,
        drop_connect_rate=args.drop_connect,  # DEPRECATED, use drop_path
        drop_path_rate=args.drop_path,
        drop_block_rate=args.drop_block,
        global_pool=args.gp,
        bn_tf=args.bn_tf,
        bn_momentum=args.bn_momentum,
        bn_eps=args.bn_eps,
        scriptable=args.torchscript,
        checkpoint_path=args.initial_checkpoint)
    if args.num_classes is None:
        assert hasattr(model, 'num_classes'), 'Model must have `num_classes` attr if not set on cmd line/config.'
        args.num_classes = model.num_classes  # FIXME handle model default vs config num_classes more elegantly

    if args.local_rank == 0:
        _logger.info(
            f'Model {safe_model_name(args.model)} created, param count:{sum([m.numel() for m in model.parameters()])}')

    data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0)

    # setup augmentation batch splits for contrastive loss or split bn
    num_aug_splits = 0
    if args.aug_splits > 0:
        assert args.aug_splits > 1, 'A split of 1 makes no sense'
        num_aug_splits = args.aug_splits

    # enable split bn (separate bn stats per batch-portion)
    if args.split_bn:
        assert num_aug_splits > 1 or args.resplit
        model = convert_splitbn_model(model, max(num_aug_splits, 2))

    # move model to GPU, enable channels last layout if set
    model.cuda()
    if args.channels_last:
        model = model.to(memory_format=torch.channels_last)

    # setup synchronized BatchNorm for distributed training
    if args.distributed and args.sync_bn:
        assert not args.split_bn
        if has_apex and use_amp != 'native':
            # Apex SyncBN preferred unless native amp is activated
            model = convert_syncbn_model(model)
        else:
            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
        if args.local_rank == 0:
            _logger.info(
                'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using '
                'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.')

    if args.torchscript:
        assert not use_amp == 'apex', 'Cannot use APEX AMP with torchscripted model'
        assert not args.sync_bn, 'Cannot use SyncBatchNorm with torchscripted model'
        model = torch.jit.script(model)

    optimizer = create_optimizer_v2(model, **optimizer_kwargs(cfg=args))

    # setup automatic mixed-precision (AMP) loss scaling and op casting
    amp_autocast = suppress  # do nothing
    loss_scaler = None
    if use_amp == 'apex':
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
        loss_scaler = ApexScaler()
        if args.local_rank == 0:
            _logger.info('Using NVIDIA APEX AMP. Training in mixed precision.')
    elif use_amp == 'native':
        amp_autocast = torch.cuda.amp.autocast
        loss_scaler = NativeScaler()
        if args.local_rank == 0:
            _logger.info('Using native Torch AMP. Training in mixed precision.')
    else:
        if args.local_rank == 0:
            _logger.info('AMP not enabled. Training in float32.')

    # optionally resume from a checkpoint
    resume_epoch = None
    if args.resume:
        resume_epoch = resume_checkpoint(
            model, args.resume,
            optimizer=None if args.no_resume_opt else optimizer,
            loss_scaler=None if args.no_resume_opt else loss_scaler,
            log_info=args.local_rank == 0)

    # setup exponential moving average of model weights, SWA could be used here too
    model_ema = None
    if args.model_ema:
        # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
        model_ema = ModelEmaV2(
            model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else None)
        if args.resume:
            load_checkpoint(model_ema.module, args.resume, use_ema=True)

    # setup distributed training
    if args.distributed:
        if has_apex and use_amp != 'native':
            # Apex DDP preferred unless native amp is activated
            if args.local_rank == 0:
                _logger.info("Using NVIDIA APEX DistributedDataParallel.")
            model = ApexDDP(model, delay_allreduce=True)
        else:
            if args.local_rank == 0:
                _logger.info("Using native Torch DistributedDataParallel.")
            model = NativeDDP(model, device_ids=[args.local_rank])  # can use device str in Torch >= 1.1
        # NOTE: EMA model does not need to be wrapped by DDP

    # setup learning rate schedule and starting epoch
    lr_scheduler, num_epochs = create_scheduler(args, optimizer)
    start_epoch = 0
    if args.start_epoch is not None:
        # a specified start_epoch will always override the resume epoch
        start_epoch = args.start_epoch
    elif resume_epoch is not None:
        start_epoch = resume_epoch
    if lr_scheduler is not None and start_epoch > 0:
        lr_scheduler.step(start_epoch)

    if args.local_rank == 0:
        _logger.info('Scheduled epochs: {}'.format(num_epochs))

    # create the train and eval datasets
    dataset_train = create_dataset(
        args.dataset,
        root=args.data_dir, split=args.train_split, is_training=True,
        batch_size=args.batch_size, repeats=args.epoch_repeats)
    dataset_eval = create_dataset(
        args.dataset, root=args.data_dir, split=args.val_split, is_training=False, batch_size=args.batch_size)

    # setup mixup / cutmix
    collate_fn = None
    mixup_fn = None
    mixup_active = args.mixup > 0 or args.cutmix > 0. or args.cutmix_minmax is not None
    if mixup_active:
        mixup_args = dict(
            mixup_alpha=args.mixup, cutmix_alpha=args.cutmix, cutmix_minmax=args.cutmix_minmax,
            prob=args.mixup_prob, switch_prob=args.mixup_switch_prob, mode=args.mixup_mode,
            label_smoothing=args.smoothing, num_classes=args.num_classes)
        if args.prefetcher:
            assert not num_aug_splits  # collate conflict (need to support deinterleaving in collate mixup)
            collate_fn = FastCollateMixup(**mixup_args)
        else:
            mixup_fn = Mixup(**mixup_args)

    # wrap dataset in AugMix helper
    if num_aug_splits > 1:
        dataset_train = AugMixDataset(dataset_train, num_splits=num_aug_splits)

    # create data loaders w/ augmentation pipeiine
    train_interpolation = args.train_interpolation
    if args.no_aug or not train_interpolation:
        train_interpolation = data_config['interpolation']
    loader_train = create_loader(
        dataset_train,
        input_size=data_config['input_size'],
        batch_size=args.batch_size,
        is_training=True,
        use_prefetcher=args.prefetcher,
        no_aug=args.no_aug,
        re_prob=args.reprob,
        re_mode=args.remode,
        re_count=args.recount,
        re_split=args.resplit,
        scale=args.scale,
        ratio=args.ratio,
        hflip=args.hflip,
        vflip=args.vflip,
        color_jitter=args.color_jitter,
        auto_augment=args.aa,
        num_aug_repeats=args.aug_repeats,
        num_aug_splits=num_aug_splits,
        interpolation=train_interpolation,
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        collate_fn=collate_fn,
        pin_memory=args.pin_mem,
        use_multi_epochs_loader=args.use_multi_epochs_loader
    )

    loader_eval = create_loader(
        dataset_eval,
        input_size=data_config['input_size'],
        batch_size=args.validation_batch_size or args.batch_size,
        is_training=False,
        use_prefetcher=args.prefetcher,
        interpolation=data_config['interpolation'],
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        crop_pct=data_config['crop_pct'],
        pin_memory=args.pin_mem,
    )

    # setup loss function
    if args.jsd_loss:
        assert num_aug_splits > 1  # JSD only valid with aug splits set
        train_loss_fn = JsdCrossEntropy(num_splits=num_aug_splits, smoothing=args.smoothing)
    elif mixup_active:
        # smoothing is handled with mixup target transform which outputs sparse, soft targets
        if args.bce_loss:
            train_loss_fn = nn.BCEWithLogitsLoss()
        else:
            train_loss_fn = SoftTargetCrossEntropy()
    elif args.smoothing:
        if args.bce_loss:
            train_loss_fn = DenseBinaryCrossEntropy(smoothing=args.smoothing)
        else:
            train_loss_fn = LabelSmoothingCrossEntropy(smoothing=args.smoothing)
    else:
        train_loss_fn = nn.CrossEntropyLoss()
    train_loss_fn = train_loss_fn.cuda()
    validate_loss_fn = nn.CrossEntropyLoss().cuda()

    # setup checkpoint saver and eval metric tracking
    eval_metric = args.eval_metric
    best_metric = None
    best_epoch = None
    saver = None
    output_dir = None
    if args.rank == 0:
        if args.experiment:
            exp_name = args.experiment
        else:
            exp_name = '-'.join([
                datetime.now().strftime("%Y%m%d-%H%M%S"),
                safe_model_name(args.model),
                str(data_config['input_size'][-1])
            ])
        output_dir = get_outdir(args.output if args.output else './output/train', exp_name)
        decreasing = True if eval_metric == 'loss' else False
        saver = CheckpointSaver(
            model=model, optimizer=optimizer, args=args, model_ema=model_ema, amp_scaler=loss_scaler,
            checkpoint_dir=output_dir, recovery_dir=output_dir, decreasing=decreasing, max_history=args.checkpoint_hist)
        with open(os.path.join(output_dir, 'args.yaml'), 'w') as f:
            f.write(args_text)

    try:
        for epoch in range(start_epoch, num_epochs):
            if args.distributed and hasattr(loader_train.sampler, 'set_epoch'):
                loader_train.sampler.set_epoch(epoch)

            train_metrics = train_one_epoch(
                epoch, model, loader_train, optimizer, train_loss_fn, args,
                lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir,
                amp_autocast=amp_autocast, loss_scaler=loss_scaler, model_ema=model_ema, mixup_fn=mixup_fn)

            if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
                if args.local_rank == 0:
                    _logger.info("Distributing BatchNorm running means and vars")
                distribute_bn(model, args.world_size, args.dist_bn == 'reduce')

            eval_metrics = validate(model, loader_eval, validate_loss_fn, args, amp_autocast=amp_autocast)

            if model_ema is not None and not args.model_ema_force_cpu:
                if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
                    distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce')
                ema_eval_metrics = validate(
                    model_ema.module, loader_eval, validate_loss_fn, args, amp_autocast=amp_autocast, log_suffix=' (EMA)')
                eval_metrics = ema_eval_metrics

            if lr_scheduler is not None:
                # step LR for next epoch
                lr_scheduler.step(epoch + 1, eval_metrics[eval_metric])

            if output_dir is not None:
                update_summary(
                    epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'),
                    write_header=best_metric is None, log_wandb=args.log_wandb and has_wandb)

            if saver is not None:
                # save proper checkpoint with eval metric
                save_metric = eval_metrics[eval_metric]
                best_metric, best_epoch = saver.save_checkpoint(epoch, metric=save_metric)

    except KeyboardInterrupt:
        pass
    if best_metric is not None:
        _logger.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch))
Esempio n. 12
0
for param in recyclingNet.features.parameters():
    param.requires_grad = False


data_dir = './trash_data/Garbage classification/Garbage classification'

transform = transforms.Compose([transforms.Resize(225), transforms.ToTensor(), transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])])


data = datasets.ImageFolder(data_dir, transform=transform)

data_loader = torch.utils.data.DataLoader(data, batch_size=30, num_workers=0, shuffle=True )

if use_cuda:
    criterion = nn.CrossEntropyLoss().cuda()
else:
    criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(recyclingNet.parameters(), lr=.003, momentum=0.9)

def train(epochs, loader, model, optimize, criter, use_cuda, save_path):

  min_loss = np.Inf

  for ii in range(1, epochs+1):
    current_loss = 0

    for batch_idx, (data, target) in enumerate(data_loader):
      if use_cuda:
          data, target = data.cuda(), target.cuda()
def main():
    title = 'Plant-' + args.arch
    best_acc = 0
    cudnn.benchmark = True
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch
    num_classes = 100
    '''
    num_classes = dset.hierarchy.get_class_level_size(0)
    parent_num_classes = dset.hierarchy.get_class_level_size(2)
    hierarchy_matrix = dset.hierarchy.get_hierarchy_mask(2, 0)
    if use_cuda:
        hierarchy_matrix = torch.FloatTensor(hierarchy_matrix)
    else:
        hierarchy_matrix = torch.FloatTensor(hierarchy_matrix)
    '''

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    model = chooser.create_model(chooser.predefined_model(args, cuda=use_cuda), num_classes, cuda=use_cuda)

    #criterion = CrossEntropyLossTSoftmax(hierarchy_matrix=hierarchy_matrix)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc. Top 1', 'Train Acc. Top 5', 'Valid Acc. Top 1', 'Valid Acc. Top 5', 'USM Alpha'])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(dataloaders['val'], model, criterion, start_epoch, use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args)

        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc, train_acc5 = train(dataloaders['train'], model, criterion, optimizer, epoch, use_cuda)
        test_loss, test_acc, test_acc5 = test(dataloaders['val'], model, criterion, epoch, use_cuda)

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, train_acc, train_acc5, test_acc, test_acc5, float(model.filter.alpha.detach().data)])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
            }, is_best, checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
Esempio n. 14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--word-dim', type=int, default=300, help='size of word embeddings')
    parser.add_argument('--hidden-dim', type=int, default=300, help='number of hidden units per layer')
    parser.add_argument('--num-layers', type=int, default=1, help='number of layers in BiLSTM')
    parser.add_argument('--att-dim', type=int, default=350, help='number of attention unit')
    parser.add_argument('--att-hops', type=int, default=4, help='number of attention hops, for multi-hop attention model')
    parser.add_argument('--clf-hidden-dim', type=int, default=512, help='hidden (fully connected) layer size for classifier MLP')
    parser.add_argument('--clip', type=float, default=0.5, help='clip to prevent the too large grad in LSTM')
    parser.add_argument('--lr', type=float, default=.001, help='initial learning rate')
    parser.add_argument('--weight-decay', type=float, default=1e-5, help='weight decay rate per batch')
    parser.add_argument('--dropout', type=float, default=0.3)
    parser.add_argument('--max-epoch', type=int, default=8)
    parser.add_argument('--seed', type=int, default=666)
    parser.add_argument('--cuda', action='store_true', default=True)
    parser.add_argument('--optimizer', default='adam', choices=['adam', 'sgd'])
    parser.add_argument('--batch-size', type=int, default=32, help='batch size for training')
    parser.add_argument('--penalization-coeff', type=float, default=0.1, help='the penalization coefficient')
    parser.add_argument('--fix-word-embedding', action='store_true')


    parser.add_argument('--model-type', required=True, choices=['sa', 'avgblock', 'hard'])
    parser.add_argument('--data-type', required=True, choices=['age2', 'dbpedia', 'yahoo'])
    parser.add_argument('--data', required=True, help='pickle file obtained by dataset dump')
    parser.add_argument('--save-dir', type=str, required=True, help='path to save the final model')
    parser.add_argument('--block-size', type=int, default=-1, help='block size only when model-type is avgblock')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    random.seed(args.seed)
    if torch.cuda.is_available():
        if not args.cuda:
            print("WARNING: You have a CUDA device, so you should probably run with --cuda")
        else:
            torch.cuda.manual_seed(args.seed)
    #######################################
    # a simple log file, the same content as stdout
    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s')
    logFormatter = logging.Formatter('%(asctime)s %(levelname)-8s %(message)s')
    rootLogger = logging.getLogger()
    fileHandler = logging.FileHandler(os.path.join(args.save_dir, 'stdout.log'))
    fileHandler.setFormatter(logFormatter)
    rootLogger.addHandler(fileHandler)
    ########################################
    for k, v in vars(args).items():
        logging.info(k+':'+str(v))

    #####################################################################
    if args.data_type == 'age2':
        data = AGE2(datapath=args.data, batch_size=args.batch_size)
        num_classes = 5
    elif args.data_type == 'dbpedia':
        data = DBpedia(datapath=args.data, batch_size=args.batch_size)
        num_classes = 14
    elif args.data_type == 'yahoo':
        data = Yahoo(datapath=args.data, batch_size=args.batch_size)
        num_classes = 10
    else:
        raise Exception('Invalid argument data-type')
    #####################################################################
    if args.model_type == 'avgblock':
        assert args.block_size > 0
    #####################################################################


    tic = time.time()
    model = Classifier(
        dictionary=data,
        dropout=args.dropout,
        num_words=data.num_words,
        num_layers=args.num_layers,
        hidden_dim=args.hidden_dim,
        word_dim=args.word_dim,
        att_dim=args.att_dim,
        att_hops=args.att_hops,
        clf_hidden_dim=args.clf_hidden_dim,
        num_classes=num_classes,
        model_type=args.model_type,
        block_size=args.block_size,
    )
    print('It takes %.2f sec to build the model.' % (time.time() - tic))
    logging.info(model)

    model.word_embedding.weight.data.set_(data.weight)
    if args.fix_word_embedding:
        model.word_embedding.weight.requires_grad = False
    if args.cuda:
        model = model.cuda()
    ''' count parameters
    num_params = sum(np.prod(p.size()) for p in model.parameters())
    num_embedding_params = np.prod(model.word_embedding.weight.size())
    print('# of parameters: %d' % num_params)
    print('# of word embedding parameters: %d' % num_embedding_params)
    print('# of parameters (excluding word embeddings): %d' % (num_params - num_embedding_params))
    '''
    if args.optimizer == 'adam':
        optimizer_class = optim.Adam
    elif args.optimizer == 'sgd':
        optimizer_class = optim.SGD
    else:
        raise Exception('For other optimizers, please add it yourself. supported ones are: SGD and Adam.')
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optimizer_class(params=params, lr=args.lr, weight_decay=args.weight_decay)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='max', factor=0.5, patience=10, verbose=True)
    criterion = nn.CrossEntropyLoss()
    # Identity matrix for each batch
    I = Variable(torch.eye(args.att_hops).unsqueeze(0).expand(args.batch_size, -1, -1))
    if args.cuda:
        I = I.cuda()
    trpack = {
            'model': model,
            'params': params, 
            'criterion': criterion, 
            'optimizer': optimizer,
            'I': I,
            }

    train_summary_writer = tensorboard.FileWriter(
        logdir=os.path.join(args.save_dir, 'log', 'train'), flush_secs=10)
    valid_summary_writer = tensorboard.FileWriter(
        logdir=os.path.join(args.save_dir, 'log', 'valid'), flush_secs=10)
    tsw, vsw = train_summary_writer, valid_summary_writer

    logging.info('number of train batches: %d' % data.train_num_batch)
    validate_every = data.train_num_batch // 10
    best_vaild_accuacy = 0
    iter_count = 0
    tic = time.time()

    for epoch_num in range(args.max_epoch):
        for batch_iter, train_batch in enumerate(data.train_minibatch_generator()):
            progress = epoch_num + batch_iter / data.train_num_batch 
            iter_count += 1

            train_loss, train_accuracy = train_iter(args, train_batch, **trpack)
            add_scalar_summary(tsw, 'loss', train_loss, iter_count)
            add_scalar_summary(tsw, 'acc', train_accuracy, iter_count)

            if (batch_iter + 1) % (data.train_num_batch // 100) == 0:
                tac = (time.time() - tic) / 60
                print('   %.2f minutes\tprogress: %.2f' % (tac, progress))
            if (batch_iter + 1) % validate_every == 0:
                correct_sum = 0
                for valid_batch in data.dev_minibatch_generator():
                    correct, supplements = eval_iter(args, model, valid_batch)
                    correct_sum += unwrap_scalar_variable(correct)
                valid_accuracy = correct_sum / data.dev_size 
                scheduler.step(valid_accuracy)
                add_scalar_summary(vsw, 'acc', valid_accuracy, iter_count)
                logging.info('Epoch %.2f: valid accuracy = %.4f' % (progress, valid_accuracy))
                if valid_accuracy > best_vaild_accuacy:
                    correct_sum = 0
                    for test_batch in data.test_minibatch_generator():
                        correct, supplements = eval_iter(args, model, test_batch)
                        correct_sum += unwrap_scalar_variable(correct)
                    test_accuracy = correct_sum / data.test_size
                    best_vaild_accuacy = valid_accuracy
                    model_filename = ('model-%.2f-%.4f-%.4f.pkl' % (progress, valid_accuracy, test_accuracy))
                    model_path = os.path.join(args.save_dir, model_filename)
                    torch.save(model.state_dict(), model_path)
                    print('Saved the new best model to %s' % model_path)
Esempio n. 15
0
lr_change_epoch = 20
momentum = 0.9
verbose = True
check_period = 750
loss_rise_threshold = 20
# net = network.LeNet()
# net = pickle.load(open("best_net_unfreezed1.pickle", "rb"))
# net = network.ResNet()
net = network.ResNet34()
# net = network.ResNet18_extended()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_loader, test_loader, classes = data_loading.prepare_data(batch_size, num_workers)
net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum)
loss_function = nn.CrossEntropyLoss()

train_loss = checks.test_loss(train_loader, net, device, loss_function)
train_loss_list = [train_loss]
test_loss = checks.test_loss(test_loader, net, device, loss_function)
test_loss_list = [test_loss]
accuracy = checks.accuracy(test_loader, net, device)
accuracy_list = [accuracy]
if verbose:
    print(f'[epoch {0}] train loss = {train_loss:.3f}, '
          f'test loss = {test_loss:.3f}, accuracy = {accuracy * 100:.2f}%')

prev_loss = float("inf")
lowest_loss = float("inf")
        TP, TN, FP, FN = 0, 0, 0, 0
        for batch_index, data in enumerate(dataloader, 0):
            points, target = data
            points, target = Variable(points), Variable(target)
            points = points.transpose(2, 1)
            points, target = points.cuda(), target.cuda()
            optimizer.zero_grad()
            pred, trans = net(points)

            pred = pred.view(-1, 2)
            target = target.view(-1)

            weight = np.array([1.0, 1.0])
            weight = torch.from_numpy(weight)
            weight = weight.float().cuda()
            criterion = nn.CrossEntropyLoss(weight=weight)

            loss = criterion(pred, target) * 10
            norm = torch.matmul(trans, trans.transpose(2, 1))
            I = Variable(torch.eye(64).repeat(loss.size()[0], 1, 1)).cuda()
            L2_loss = torch.norm(norm - I, 2)                                  # L2 loss: norm(I - A*(AT)); A: feature alignment matrix
            loss += L2_loss

            loss.backward()
            optimizer.step()

            pred_choice = pred.data.max(1)[1]                                  # set the most probable one to 1

            correct += pred_choice.eq(target.data).cpu().sum()                 # sum the right prediction

            # TP (true positive): predict = 1, label = 1
 def training_step(self, batch, batch_idx):
     x, y = batch
     y_hat = self.model(x)
     loss = nn.CrossEntropyLoss()(y_hat, y)
     logs = {'train_loss': loss}
     return {'loss': loss, 'log': logs}
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)

    print("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch](pretrained=True)
    model.fc = torch.nn.Linear(512, 100)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # Define advnet resnet
    # advnet = ResNet(
    #     epsilon=args.advnet_epsilon, 
    #     advnet_norm_factor=args.advnet_norm_factor
    # ).cuda()
    # advnet = torch.nn.DataParallel(advnet).cuda()
    advnet = ParallelResNet(
        epsilon=args.advnet_epsilon, 
        advnet_norm_factor=args.advnet_norm_factor
    )
    advnet.load_state_dict(torch.load(args.resume_advnet)['advnet_state_dict'])
    advnet.eval()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay, nesterov=True)
    

    # optionally resume from a checkpoint
    args.start_epoch = 0
    if False:#args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            print('Start epoch:', args.start_epoch)
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    if args.data_standard == None:
        print("No Standard Data! Only using --data-distorted datasets")

    if args.data_distorted != None:
        if args.symlink_distorted_data_dirs:
            print("Mixing together data directories: ", args.data_distorted)

            train_dataset = torch.utils.data.ConcatDataset([
                CombinedDistortedDatasetFolder(
                    args.data_distorted,
                    transform=transforms.Compose([
                        transforms.RandomResizedCrop(224),
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        normalize,
                    ])
                ),
                ImageNetSubsetDataset(
                    args.data_standard,
                    transform=transforms.Compose([
                        transforms.RandomResizedCrop(224),
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        normalize,
                    ])
                ) if args.data_standard != None else []
            ])
        else:
            print(f"Concatenating Datasets {args.data_standard} and {args.data_distorted}")

            datasets = [
                # args.data_standard
                ImageNetSubsetDataset(
                    args.data_standard,
                    transform=transforms.Compose([
                        transforms.RandomResizedCrop(224),
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        normalize,
                    ])
                ) if args.data_standard != None else []
            ]

            for distorted_data_dir in args.data_distorted:
                datasets.append(
                    ImageNetSubsetDataset(
                        distorted_data_dir,
                        transform=transforms.Compose([
                            transforms.RandomResizedCrop(224),
                            transforms.RandomHorizontalFlip(),
                            transforms.ToTensor(),
                            normalize,
                        ])
                    )
                )

            train_dataset = torch.utils.data.ConcatDataset(datasets)
    else:
        print(f"Only using Dataset {args.data_standard}")
        train_dataset = ImageNetSubsetDataset(
            args.data_standard,
            transform=transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ])
        )


    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(
        ImageNetSubsetDataset(
            args.data_val, 
            transform=transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ])
        ),
        batch_size=args.batch_size_val, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    def cosine_annealing(step, total_steps, lr_max, lr_min):
        return lr_min + (lr_max - lr_min) * 0.5 * (
                1 + np.cos(step / total_steps * np.pi))

    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda step: cosine_annealing(
            step,
            args.epochs * len(train_loader),
            1,  # since lr_lambda computes multiplicative factor
            1e-6 / (args.lr * args.batch_size / 256.)))
        
    if args.start_epoch != 0:
        scheduler.step(args.start_epoch * len(train_loader))

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    ###########################################################################
    ##### Main Training Loop
    ###########################################################################

    with open(os.path.join(args.save, 'training_log.csv'), 'w') as f:
        f.write('epoch,train_loss,train_acc1,train_acc5,val_loss,val_acc1,val_acc5\n')

    with open(os.path.join(args.save, 'command.txt'), 'w') as f:
        import pprint
        pprint.pprint(vars(args), stream=f) 

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        # train for one epoch
        train_losses_avg, train_top1_avg, train_top5_avg = train(train_loader, model, advnet, criterion, optimizer, scheduler, epoch, args)

        print("Evaluating on validation set")

        # evaluate on validation set
        val_losses_avg, val_top1_avg, val_top5_avg = validate(val_loader, model, criterion, args)

        print("Finished Evaluating on validation set")

        # Save results in log file
        with open(os.path.join(args.save, 'training_log.csv'), 'a') as f:
            f.write('%03d,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f\n' % (
                (epoch + 1),
                train_losses_avg, train_top1_avg, train_top5_avg,
                val_losses_avg, val_top1_avg, val_top5_avg
            ))

        # remember best acc@1 and save checkpoint
        is_best = val_top1_avg > best_acc1
        best_acc1 = max(val_top1_avg, best_acc1)

        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer' : optimizer.state_dict(),
                'advnet_state_dict' : advnet.state_dict(),
            }, is_best)
Esempio n. 19
0
def train_net(net,
              device,
              epochs=5,
              batch_size=1,
              lr=0.1,
              img_scale=0.4,
              save_cp=True,
              startepoch=0):

    logging.info(f'''Starting training:
        Epochs:          {epochs}
        Batch size:      {batch_size}
        Learning rate:   {lr}
        Checkpoints:     {save_cp}
        Device:          {device.type}
        Images scaling:  {img_scale}
    ''')

    n_train = 18
    n_val = 1

    val = val_imgs_and_masks(img_scale)

    val_score = eval_net(net, val, device, n_val)
    if net.n_classes > 1:
        logging.info('Validation cross entropy: {}'.format(val_score))

    else:
        logging.info('Validation Dice Coeff: {}'.format(val_score))

    optimizer = optim.Adam(net.parameters(), lr=lr)
    if net.n_classes > 1:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.BCEWithLogitsLoss()

    for epoch in range(epochs):
        net.train()

        # reset the generators
        train = train_imgs_and_masks(img_scale)
        val = val_imgs_and_masks(img_scale)

        epoch_loss = 0
        with tqdm(total=n_train,
                  desc=f'Epoch {epoch + startepoch + 1}/{epochs + startepoch}',
                  unit='img') as pbar:
            for b in batch(train, batch_size):
                imgs = np.array([i[0] for i in b]).astype(np.float32)
                true_masks = np.array([i[1] for i in b]).astype(np.float32)

                imgs = torch.from_numpy(imgs)
                true_masks = torch.from_numpy(true_masks)

                imgs = imgs.to(device=device)
                true_masks = true_masks.to(device=device)

                masks_pred = net(imgs)

                masks_pred = masks_pred[true_masks != 255]
                true_masks = true_masks[true_masks != 255]

                loss = criterion(masks_pred, true_masks)
                epoch_loss += loss.item()

                pbar.set_postfix(**{'loss (batch)': loss.item()})

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                pbar.update(batch_size)

            pbar.set_postfix(**{'loss (epoch)': epoch_loss})

        if save_cp and ((epoch + 1) % 1 == 0 or (epoch + 1) == epochs):
            try:
                os.mkdir(dir_checkpoint)
                logging.info('Created checkpoint directory')
            except OSError:
                pass
            torch.save(
                net.state_dict(),
                dir_checkpoint + f'CP_epoch{epoch + startepoch + 1}.pth')
            logging.info(f'Checkpoint {epoch + startepoch + 1} saved !')

        if (epoch + 1) % 50 == 0:
            val_score = eval_net(net, val, device, n_val)
            if net.n_classes > 1:
                logging.info('Validation cross entropy: {}'.format(val_score))

            else:
                logging.info('Validation Dice Coeff: {}'.format(val_score))
def run(args):
    if not args.save:
        print("This experiment requires an expid.")
        quit()

    ## Random Seed and Device ##
    torch.manual_seed(args.seed)
    device = load.device(args.gpu)

    ## Data ##
    input_shape, num_classes = load.dimension(args.dataset)
    data_loader = load.dataloader(
        args.dataset,
        args.prune_batch_size,
        True,
        args.workers,
        args.prune_dataset_ratio * num_classes,
    )

    ## Model, Loss, Optimizer ##
    model = load.model(args.model, args.model_class)(
        input_shape, num_classes, args.dense_classifier, args.pretrained
    ).to(device)
    loss = nn.CrossEntropyLoss()

    ## Compute Layer Name and Inv Size ##
    def layer_names(model):
        names = []
        inv_size = []
        for name, module in model.named_modules():
            if isinstance(module, (layers.Linear, layers.Conv2d)):
                num_elements = np.prod(module.weight.shape)
                if module.bias is not None:
                    num_elements += np.prod(module.bias.shape)
                names.append(name)
                inv_size.append(1.0 / num_elements)
        return names, inv_size

    ## Compute Average Layer Score ##
    def average_layer_score(model, scores):
        average_scores = []
        for name, module in model.named_modules():
            if isinstance(module, (layers.Linear, layers.Conv2d)):
                W = module.weight
                W_score = scores[id(W)].detach().cpu().numpy()
                score_sum = W_score.sum()
                num_elements = np.prod(W.shape)

                if module.bias is not None:
                    b = module.bias
                    b_score = scores[id(b)].detach().cpu().numpy()
                    score_sum += b_score.sum()
                    num_elements += np.prod(b.shape)

                average_scores.append(np.abs(score_sum / num_elements))
        return average_scores

    ## Loop through Pruners and Save Data ##
    names, inv_size = layer_names(model)
    average_scores = []
    unit_scores = []
    for i, p in enumerate(args.pruner_list):
        pruner = load.pruner(p)(
            generator.masked_parameters(
                model, args.prune_bias, args.prune_batchnorm, args.prune_residual
            )
        )
        sparsity = 10 ** (-float(args.compression))
        prune_loop(
            model,
            loss,
            pruner,
            prune_loader,
            device,
            sparsity,
            args.compression_schedule,
            args.mask_scope,
            args.prune_epochs,
            args.reinitialize,
        )
        average_score = average_layer_score(model, pruner.scores)
        average_scores.append(average_score)
        np.save("{}/{}".format(args.result_dir, p), np.array(average_score))
    np.save("{}/{}".format(args.result_dir, "inv-size"), inv_size)
Esempio n. 21
0
def main(model_id, use_element, is_save):
    config = Config()
    print("epoch num: ", config.epoch_num)
    config.use_element = use_element
    print("loading data...")
    # 原始数据 切分3列-list id  data  label
    ids, data, labels = bd.load_data(config.data_path)
    train_ids, valid_ids = bd.split_data(ids, radio=0.7)
    train_data, valid_data = bd.split_data(data, radio=0.7)
    train_labels, valid_labels = bd.split_data(labels, radio=0.7)

    # 求数据中所有词汇个数
    total_vocab_size = sd.count_vocab_size(data)
    print("total vocab size", total_vocab_size)
    print("load word2index")
    dict_word2index = bpe.load_pickle(config.word2index_path)
    # print(len(dict_word2index))

    train_ids, train_X, train_y = bd.build_dataset(
        train_ids,
        train_data,
        train_labels,
        dict_word2index,
        max_text_len=config.max_text_len)
    print(train_ids[0:4])
    print(train_X[0:4])
    print(train_y[0:4])
    valid_ids, valid_X, valid_y = bd.build_dataset(
        valid_ids,
        valid_data,
        valid_labels,
        dict_word2index,
        max_text_len=config.max_text_len)
    print("trainset size:", len(train_ids))
    print("validset size:", len(valid_ids))

    dataset_train = MingLueData(train_ids, train_X, train_y)
    dataset_valid = MingLueData(valid_ids, valid_X, valid_y)

    batch_size = config.batch_size
    train_loader = DataLoader(dataset=dataset_train,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=config.num_workers)

    valid_loader = DataLoader(dataset=dataset_valid,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=config.num_workers)

    config.vocab_size = len(dict_word2index)
    print('config vocab size:', config.vocab_size)
    model = model_selector(config, model_id, use_element)
    if config.has_cuda:
        model = model.cuda()

    loss_weight = torch.FloatTensor(config.loss_weight_value)
    loss_weight = loss_weight + 1 - loss_weight.mean()
    print("loss weight:", loss_weight)
    loss_fun = nn.CrossEntropyLoss(loss_weight.cuda())
    optimizer = model.get_optimizer(config.learning_rate,
                                    config.learning_rate2, config.weight_decay)
    print("training...")
    weight_count = 0
    max_score = 0
    total_loss_weight = torch.FloatTensor(torch.zeros(8))
    for epoch in range(config.epoch_num):
        print("lr:", config.learning_rate, "lr2:", config.learning_rate2)
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            ids, texts, labels = data
            if config.has_cuda:
                inputs, labels = Variable(texts.cuda()), Variable(
                    labels.cuda())
            else:
                inputs, labels = Variable(texts), Variable(labels)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_fun(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.data[0]

            if i % config.step == config.step - 1:
                if epoch % config.epoch_step == config.epoch_step - 1:
                    _, predicted = torch.max(outputs.data, 1)
                    predicted = predicted.cpu().numpy().tolist()
                    running_acc = accuracy(predicted,
                                           labels.data.cpu().numpy())
                    print('[%d, %5d] loss: %.3f, acc: %.3f' %
                          (epoch + 1, i + 1, running_loss / config.step,
                           running_acc))
                running_loss = 0.0

        if is_save != 'y' and epoch % config.epoch_step == config.epoch_step - 1:
            print("predicting...")
            loss_weight, score = do_eval(valid_loader, model, model_id,
                                         config.has_cuda)
            if score >= 0.478 and score > max_score:
                max_score = score
                save_path = config.model_path + "." + str(
                    score) + "." + config.model_names[model_id]
                torch.save(model.state_dict(), save_path)

            if epoch >= 3:
                weight_count += 1
                total_loss_weight += loss_weight
                print("avg_loss_weight:", total_loss_weight / weight_count)

        if epoch >= config.begin_epoch - 1:
            if epoch >= config.begin_epoch and config.learning_rate2 == 0:
                config.learning_rate2 = 2e-4
            elif config.learning_rate2 > 0:
                config.learning_rate2 *= config.lr_decay
                if config.learning_rate2 <= 1e-5:
                    config.learning_rate2 = 1e-5
            config.learning_rate = config.learning_rate * config.lr_decay
            optimizer = model.get_optimizer(config.learning_rate,
                                            config.learning_rate2,
                                            config.weight_decay)
Esempio n. 22
0
    # Initialize the network
    # with profiler.profile() as prof:  # profile network_initialization
    #     with profiler.record_function("network_initialization"):
    network_model = config.model_name
    pretrained = config.pretrained
    model = SemanticSegmentation(
        get_model(network_model, device, (len(config.classes) + 1),
                  pretrained), device)
    if torch.cuda.device_count() > 1:
        print(f"Lets use {torch.cuda.device_count()}, GPUs!")
        model = nn.DataParallel(model).to(device)
    model.to(device)

    # prof.export_chrome_trace(os.path.join(tb_path, "network_trace.json"))
    # Initialize the loss function
    cross_entropy_loss_fn = nn.CrossEntropyLoss()
    # setup the optimizer
    sgd_optimizer = torch.optim.SGD(model.parameters(), config.learning_rate)
    exp_lr_scheduler = lr_scheduler.StepLR(
        sgd_optimizer,
        step_size=config.lr_scheduler_step_size,
        gamma=config.lr_depreciation)
    # Train the model
    # TODO: Profiler is here but it is too big, try and profile individual processes once
    # with profiler.profile() as prof:  # profile training and validation process
    #     with profiler.record_function("learning"):
    model_pipeline(config, model, cross_entropy_loss_fn, sgd_optimizer,
                   exp_lr_scheduler)
    # prof.export_chrome_trace(os.path.join(tb_path, "learning_trace.json"))
    # Close the tensorboard summary writer
    writer.close()
Esempio n. 23
0
    def train_net(self,
                  epochs=5,
                  batch_size=1,
                  lr=0.001,
                  val_percent=0.1,
                  save_cp=True,
                  img_scale=0.5,
                  dir_checkpoint='checkpoints/'):
        """Runs training based on paramaters on the data

        Args:
            epochs (int, optional): Number of epochs to run the model. Defaults to 5.
            batch_size (int, optional): Batchsize number to be taken from model. Defaults to 1.
            lr (float, optional): Learning rate for stepping. Defaults to 0.001.
            val_percent (float, optional): Percentage of data to be taken for validation. Defaults to 0.1.
            save_cp (bool, optional): Save the weights or not. Defaults to True.
            img_scale (float, optional): Scale percentage of the original image to use. Defaults to 0.5.
            dir_checkpoint (str, optional): path to save the trained weights. Defaults to 'checkpoints/'.

        Returns:
            int: best validation score recorded in one training
        """

        device = self.device
        net = self.net
        mode = self.mode

        # Randomly determines the training and validation dataset
        file_list = [
            os.path.splitext(file)[0] for file in os.listdir(self.dir_img)
            if not file.startswith('.')
        ]
        random.shuffle(file_list)
        n_val = int(len(file_list) * val_percent)
        n_train = len(file_list) - n_val
        train_list = file_list[:n_train]
        val_list = file_list[n_train:]
        dataset_train = BasicDataset(train_list, self.dir_img, self.dir_mask,
                                     epochs, img_scale, 'train', mode)
        dataset_val = BasicDataset(val_list, self.dir_img, self.dir_mask,
                                   epochs, img_scale, 'val', mode)
        train_loader = DataLoader(dataset_train,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=8,
                                  pin_memory=True)
        val_loader = DataLoader(dataset_val,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=8,
                                pin_memory=True,
                                drop_last=True)

        # Tensorboard initialization
        writer = SummaryWriter(
            comment=f'LR_{lr}_BS_{batch_size}_SCALE_{img_scale}')
        global_step = 0
        val_score_list = []
        logging.info(f'''Starting training:
            Epochs:          {epochs}
            Batch size:      {batch_size}
            Learning rate:   {lr}
            Training size:   {n_train}
            Validation size: {n_val}
            Checkpoints:     {save_cp}
            Device:          {self.device.type}
            Images scaling:  {img_scale}
        ''')

        # Gradient descent method
        optimizer = optim.RMSprop(net.parameters(),
                                  lr=lr,
                                  weight_decay=1e-8,
                                  momentum=0.9)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, 'min' if net.n_classes > 1 else 'max', patience=2)
        if net.n_classes > 1:
            criterion = nn.CrossEntropyLoss()
        else:
            criterion = nn.BCEWithLogitsLoss()

        for epoch in range(epochs):
            net.train()
            epoch_loss = 0

            # Progress bar shown on the terminal
            with tqdm(total=n_train,
                      desc=f'Epoch {epoch + 1}/{epochs}',
                      unit='img') as pbar:
                for batch in train_loader:
                    imgs = batch['image']
                    true_masks = batch['mask']
                    assert imgs.shape[1] == net.n_channels, \
                        f'Network has been defined with {net.n_channels} input channels, ' \
                        f'but loaded images have {imgs.shape[1]} channels. Please check that ' \
                        'the images are loaded correctly.'

                    imgs = imgs.to(device=device, dtype=torch.float32)
                    mask_type = torch.float32 if net.n_classes == 1 else torch.long
                    true_masks = true_masks.to(device=device, dtype=mask_type)

                    masks_pred = net(imgs)
                    loss = criterion(masks_pred, true_masks)
                    epoch_loss += loss.item()
                    writer.add_scalar('Loss/train', loss.item(), global_step)

                    pbar.set_postfix(**{'loss (batch)': loss.item()})

                    optimizer.zero_grad()
                    loss.backward()
                    nn.utils.clip_grad_value_(net.parameters(), 0.1)
                    optimizer.step()

                    pbar.update(imgs.shape[0])
                    global_step += 1

                    # Validation phase
                    if global_step % (n_train // (10 * batch_size)) == 0:
                        for tag, value in net.named_parameters():
                            tag = tag.replace('.', '/')
                            writer.add_histogram('weights/' + tag,
                                                 value.data.cpu().numpy(),
                                                 global_step)
                            writer.add_histogram('grads/' + tag,
                                                 value.grad.data.cpu().numpy(),
                                                 global_step)
                        val_score = eval_net(net, val_loader, device)
                        val_score_list.append(val_score)
                        scheduler.step(val_score)
                        writer.add_scalar('learning_rate',
                                          optimizer.param_groups[0]['lr'],
                                          global_step)

                        if net.n_classes > 1:
                            logging.info('Validation cross entropy: {}'.format(
                                val_score))
                            writer.add_scalar('Loss/test', val_score,
                                              global_step)
                        else:
                            logging.info(
                                'Validation Dice Coeff: {}'.format(val_score))
                            writer.add_scalar('Dice/test', val_score,
                                              global_step)
                        # If temporal, the images can't be added to Tensorboard
                        if mode != 'temporal' and mode != 'temporal_augmentation':
                            writer.add_images('images', imgs, global_step)
                        if net.n_classes == 1:
                            writer.add_images('masks/true', true_masks,
                                              global_step)
                            writer.add_images('masks/pred',
                                              torch.sigmoid(masks_pred) > 0.5,
                                              global_step)

            if save_cp:  #saves the trained weights
                try:
                    os.mkdir(dir_checkpoint)
                    logging.info('Created checkpoint directory')
                except OSError:
                    pass
                torch.save(net.state_dict(),
                           dir_checkpoint + f'CP_epoch{epoch + 1}.pth')
                logging.info(f'Checkpoint {epoch + 1} saved !')

        writer.close()
        return max(val_score_list)
Esempio n. 24
0
def train(args,device,net,train_iter,test_iter):
    best_test_acc = 0
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    print("模型开始训练")
    for epoch in range(args.num_epochs):
        start = time.time()
        train_loss, test_loss = 0, 0
        train_acc, test_acc = 0, 0
        n, m = 0, 0
        net.train()
        for x, y, mask in train_iter:
            pred = []
            x = x.to(device)
            y = y.to(device)
            mask = mask.to(device)
            n += 1
            feats = net(x)
            feats = feats.to(device)
            path_score, best_path = net.crf(feats, mask.bool())
            pred.extend([t for t in best_path])
            # print(feats)
            loss = net.loss(feats, mask, y)
            loss.backward()
            optimizer.step()
            # print(pred[0:2])
            # print(y[0:2])
            acc = acc_mask(pred, y, mask)
            train_loss += loss
            train_acc += acc
        with torch.no_grad():
            net.eval()
            for x, y, mask in test_iter:
                pred = []
                x = x.to(device)
                y = y.to(device)
                mask = mask.to(device)
                m += 1
                feats = net(x)
                feats = feats.to(device)
                path_score, best_path = net.crf(feats, mask.bool())
                pred.extend([t for t in best_path])
                # print(feats)
                loss = net.loss(feats, mask, y)
                loss.backward()
                optimizer.step()
                # print(pred[0:2])
                # print(y[0:2])
                acc = acc_mask(pred, y, mask)
                test_loss += loss
                test_acc += acc

        end = time.time()
        runtime = end - start
        print(
            'epoch: %d, train loss: %.4f, train acc: %.5f, test loss: %.4f, test acc: %.5f, best test acc: %.5f,time: %.4f \n' % (
                epoch, train_loss.data / n, train_acc / n, test_loss.data / m, test_acc / m, best_test_acc / m,
                runtime))
        if best_test_acc<test_acc / m and test_acc / m>0.8:
            best_test_acc=test_acc / m
            torch.save(net, args.save_path)
Esempio n. 25
0
cnn.cuda()      # Moves all model parameters and buffers to the GPU.

# following function (plot_with_labels) is for visualization, can be ignored if not interested
# from matplotlib import cm
# try: from sklearn.manifold import TSNE; HAS_SK = True
# except: HAS_SK = False; print('Please install sklearn for layer visualization')
# def plot_with_labels(lowDWeights, labels):
#     plt.cla()
#     X, Y = lowDWeights[:, 0], lowDWeights[:, 1]
#     for x, y, s in zip(X, Y, labels):
#         c = cm.rainbow(int(255 * s / 9)); plt.text(x, y, s, backgroundcolor=c, fontsize=9)
#     plt.xlim(X.min(), X.max()); plt.ylim(Y.min(), Y.max()); plt.title('Visualize last layer'); plt.show(); plt.pause(0.01)
# plt.ion()

optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
loss_func = nn.CrossEntropyLoss()

for epoch in range(EPOCH):
    for step, (x, y) in enumerate(train_loader):

        # !!!!!!!! Change in here !!!!!!!!! #
        b_x = x.cuda()    # Tensor on GPU
        b_y = y.cuda()    # Tensor on GPU

        output = cnn(b_x)[0]
        loss = loss_func(output, b_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 50 == 0:
Esempio n. 26
0
def main():
  cfg = Config()

  # Redirect logs to both console and file.
  #if cfg.log_to_file:
  #  ReDirectSTD(cfg.stdout_file, 'stdout', False)
  #  ReDirectSTD(cfg.stderr_file, 'stderr', False)

  # Lazily create SummaryWriter
  writer = None

  TVT, TMO = set_devices(cfg.sys_device_ids)

  if cfg.seed is not None:
    set_seed(cfg.seed)

  # Dump the configurations to log.
  import pprint
  print('-' * 60, file=sys.stderr)
  print('cfg.__dict__', file=sys.stderr)
  pprint.pprint(cfg.__dict__)
  print('-' * 60)

  ###########
  # Dataset #
  ###########

  train_set = create_dataset(**cfg.train_set_kwargs)

  test_sets = []
  test_set_names = []
  if cfg.dataset == 'combined':
    for name in ['market1501', 'duke']: #, 'cuhk03', 'duke']:
      cfg.test_set_kwargs['name'] = name
      test_sets.append(create_dataset(**cfg.test_set_kwargs))
      test_set_names.append(name)
  else:
    test_sets.append(create_dataset(**cfg.test_set_kwargs))
    test_set_names.append(cfg.dataset)

  ###########
  # Models  #
  ###########

  model = Model(local_conv_out_channels=cfg.local_conv_out_channels,
                num_classes=len(train_set.ids2labels))
  # Model wrapper
  model_w = DataParallel(model)

  #############################
  # Criteria and Optimizers   #
  #############################

  id_criterion = nn.CrossEntropyLoss()
  g_tri_loss = TripletLoss(margin=cfg.global_margin, margin_in=cfg.intra_global_margin)
  l_tri_loss = TripletLoss(margin=cfg.local_margin, margin_in=cfg.intra_local_margin)

  optimizer = optim.Adam(model.parameters(),
                         lr=cfg.base_lr,
                         weight_decay=cfg.weight_decay)

  # Bind them together just to save some codes in the following usage.
  modules_optims = [model, optimizer]

  ################################
  # May Resume Models and Optims #
  ################################

  if cfg.resume:
    resume_ep, scores = load_ckpt(modules_optims, cfg.ckpt_file)

  # May Transfer Models and Optims to Specified Device. Transferring optimizer
  # is to cope with the case when you load the checkpoint to a new device.
  TMO(modules_optims)

  ########
  # Test #
  ########

  def test(load_model_weight=False):
    if load_model_weight:
      if cfg.model_weight_file != '':
        map_location = (lambda storage, loc: storage)
        sd = torch.load(cfg.model_weight_file, map_location=map_location)
        if 'state_dicts' in sd:
          sd = sd['state_dicts'][0]
        load_state_dict(model, sd)
        print('Loaded model weights from {}'.format(cfg.model_weight_file))
      else:
        load_ckpt(modules_optims, cfg.ckpt_file)

    use_local_distance = (cfg.l_loss_weight > 0) \
                         and cfg.local_dist_own_hard_sample

    for test_set, name in zip(test_sets, test_set_names):
      test_set.set_feat_func(ExtractFeature(model_w, TVT))
      print('\n=========> Test on dataset: {} <=========\n'.format(name))
      test_set.eval(
        normalize_feat=cfg.normalize_feature,
        use_local_distance=use_local_distance)

  if cfg.only_test:
    test(load_model_weight=True)
    return

  ############
  # Training #
  ############

  start_ep = resume_ep if cfg.resume else 0
  for ep in range(start_ep, cfg.total_epochs):

    # Adjust Learning Rate
    if cfg.lr_decay_type == 'exp':
      adjust_lr_exp(
        optimizer,
        cfg.base_lr,
        ep + 1,
        cfg.total_epochs,
        cfg.exp_decay_at_epoch)
    else:
      adjust_lr_staircase(
        optimizer,
        cfg.base_lr,
        ep + 1,
        cfg.staircase_decay_at_epochs,
        cfg.staircase_decay_multiply_factor)

    may_set_mode(modules_optims, 'train')

    g_prec_meter = AverageMeter()
    g_m_meter = AverageMeter()
    g_dist_ap_meter = AverageMeter()
    g_dist_an_meter = AverageMeter()
    g_loss_meter = AverageMeter()

    l_prec_meter = AverageMeter()
    l_m_meter = AverageMeter()
    l_dist_ap_meter = AverageMeter()
    l_dist_an_meter = AverageMeter()
    l_loss_meter = AverageMeter()

    id_loss_meter = AverageMeter()

    loss_meter = AverageMeter()

    ep_st = time.time()
    step = 0
    epoch_done = False
    while not epoch_done:

      step += 1
      step_st = time.time()

      ims, im_names, labels, mirrored, epoch_done = train_set.next_batch()

      ims_var = Variable(TVT(torch.from_numpy(ims).float()))
      labels_t = TVT(torch.from_numpy(labels).long())
      labels_var = Variable(labels_t)

      global_feat, local_feat, logits = model_w(ims_var)

      g_loss, p_inds, n_inds, g_dist_ap, g_dist_an, g_dist_mat = global_loss(
        g_tri_loss, global_feat, labels_t,
        normalize_feature=cfg.normalize_feature)

      if cfg.l_loss_weight == 0:
        l_loss = 0
      elif cfg.local_dist_own_hard_sample:
        # Let local distance find its own hard samples.
        l_loss, l_dist_ap, l_dist_an, _ = local_loss(
          l_tri_loss, local_feat, None, None, labels_t,
          normalize_feature=cfg.normalize_feature)
      else:
        l_loss, l_dist_ap, l_dist_an = local_loss(
          l_tri_loss, local_feat, p_inds, n_inds, labels_t,
          normalize_feature=cfg.normalize_feature)

      id_loss = 0
      if cfg.id_loss_weight > 0:
        id_loss = id_criterion(logits, labels_var)

      loss = g_loss * cfg.g_loss_weight \
             + l_loss * cfg.l_loss_weight \
             + id_loss * cfg.id_loss_weight

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      ############
      # Step Log #
      ############

      # precision
      g_prec = (g_dist_an > g_dist_ap).data.float().mean()
      # the proportion of triplets that satisfy margin
      g_m = (g_dist_an > g_dist_ap + cfg.global_margin).data.float().mean()
      g_d_ap = g_dist_ap.data.mean()
      g_d_an = g_dist_an.data.mean()

      g_prec_meter.update(g_prec)
      g_m_meter.update(g_m)
      g_dist_ap_meter.update(g_d_ap)
      g_dist_an_meter.update(g_d_an)
      g_loss_meter.update(to_scalar(g_loss))

      if cfg.l_loss_weight > 0:
        # precision
        l_prec = (l_dist_an > l_dist_ap).data.float().mean()
        # the proportion of triplets that satisfy margin
        l_m = (l_dist_an > l_dist_ap + cfg.local_margin).data.float().mean()
        l_d_ap = l_dist_ap.data.mean()
        l_d_an = l_dist_an.data.mean()

        l_prec_meter.update(l_prec)
        l_m_meter.update(l_m)
        l_dist_ap_meter.update(l_d_ap)
        l_dist_an_meter.update(l_d_an)
        l_loss_meter.update(to_scalar(l_loss))

      if cfg.id_loss_weight > 0:
        id_loss_meter.update(to_scalar(id_loss))

      loss_meter.update(to_scalar(loss))

      if step % cfg.log_steps == 0:
        time_log = '\tStep {}/Ep {}, {:.2f}s'.format(
          step, ep + 1, time.time() - step_st, )

        if cfg.g_loss_weight > 0:
          g_log = (', gp {:.2%}, gm {:.2%}, '
                   'gd_ap {:.4f}, gd_an {:.4f}, '
                   'gL {:.4f}'.format(
            g_prec_meter.val, g_m_meter.val,
            g_dist_ap_meter.val, g_dist_an_meter.val,
            g_loss_meter.val, ))
        else:
          g_log = ''

        if cfg.l_loss_weight > 0:
          l_log = (', lp {:.2%}, lm {:.2%}, '
                   'ld_ap {:.4f}, ld_an {:.4f}, '
                   'lL {:.4f}'.format(
            l_prec_meter.val, l_m_meter.val,
            l_dist_ap_meter.val, l_dist_an_meter.val,
            l_loss_meter.val, ))
        else:
          l_log = ''

        if cfg.id_loss_weight > 0:
          id_log = (', idL {:.4f}'.format(id_loss_meter.val))
        else:
          id_log = ''

        total_loss_log = ', loss {:.4f}'.format(loss_meter.val)

        log = time_log + \
              g_log + l_log + id_log + \
              total_loss_log
        print(log)

    #############
    # Epoch Log #
    #############

    time_log = 'Ep {}, {:.2f}s'.format(ep + 1, time.time() - ep_st, )

    if cfg.g_loss_weight > 0:
      g_log = (', gp {:.2%}, gm {:.2%}, '
               'gd_ap {:.4f}, gd_an {:.4f}, '
               'gL {:.4f}'.format(
        g_prec_meter.avg, g_m_meter.avg,
        g_dist_ap_meter.avg, g_dist_an_meter.avg,
        g_loss_meter.avg, ))
    else:
      g_log = ''

    if cfg.l_loss_weight > 0:
      l_log = (', lp {:.2%}, lm {:.2%}, '
               'ld_ap {:.4f}, ld_an {:.4f}, '
               'lL {:.4f}'.format(
        l_prec_meter.avg, l_m_meter.avg,
        l_dist_ap_meter.avg, l_dist_an_meter.avg,
        l_loss_meter.avg, ))
    else:
      l_log = ''

    if cfg.id_loss_weight > 0:
      id_log = (', idL {:.4f}'.format(id_loss_meter.avg))
    else:
      id_log = ''

    total_loss_log = ', loss {:.4f}'.format(loss_meter.avg)

    log = time_log + \
          g_log + l_log + id_log + \
          total_loss_log
    print(log)

    # Log to TensorBoard

    if cfg.log_to_file:
      if writer is None:
        writer = SummaryWriter(log_dir=osp.join(cfg.exp_dir, 'tensorboard'))
      writer.add_scalars(
        'loss',
        dict(global_loss=g_loss_meter.avg,
             local_loss=l_loss_meter.avg,
             id_loss=id_loss_meter.avg,
             loss=loss_meter.avg, ),
        ep)
      writer.add_scalars(
        'tri_precision',
        dict(global_precision=g_prec_meter.avg,
             local_precision=l_prec_meter.avg, ),
        ep)
      writer.add_scalars(
        'satisfy_margin',
        dict(global_satisfy_margin=g_m_meter.avg,
             local_satisfy_margin=l_m_meter.avg, ),
        ep)
      writer.add_scalars(
        'global_dist',
        dict(global_dist_ap=g_dist_ap_meter.avg,
             global_dist_an=g_dist_an_meter.avg, ),
        ep)
      writer.add_scalars(
        'local_dist',
        dict(local_dist_ap=l_dist_ap_meter.avg,
             local_dist_an=l_dist_an_meter.avg, ),
        ep)

    # save ckpt
    if cfg.log_to_file:
      save_ckpt(modules_optims, ep + 1, 0, cfg.ckpt_file)

  ########
  # Test #
  ########

  test(load_model_weight=False)
        def __init__(self):
            super(Identity, self).__init__()

        def forward(self, x):
            return x

    my_model.conv1 = torch.nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
    my_model.maxpool = Identity()

my_model = my_model.to('cuda')

minibatch_size = 128
num_epochs = 120
lr = 1

criterion = nn.CrossEntropyLoss()

normalize = torchvision.transforms.Normalize(mean=(0.4914, 0.4822, 0.4465),
                                             std=(0.2023, 0.1994, 0.2010))

transform_train = torchvision.transforms.Compose([
    torchvision.transforms.RandomCrop(32, padding=4),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(), normalize
])

transform_test = torchvision.transforms.Compose(
    [torchvision.transforms.ToTensor(), normalize])

trainset = torchvision.datasets.CIFAR10(root='./data',
                                        train=True,
Esempio n. 28
0
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        return out


model = ConvNet(num_classes).to(device)

#Loss and optimizer
criterion = nn.CrossEntropyLoss()  #交差エントロピー誤差
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  #Adamによる最適化

#Training
print('training start ...')

#initialize list for plot graph after training
train_loss_list, train_acc_list, val_loss_list, val_acc_list = [], [], [], []

for epoch in range(num_epochs):
    #initialize each epoch
    train_loss, train_acc, val_loss, val_acc = 0, 0, 0, 0

    # -----  train mode -----
    model.train()
    for i, (images, labels) in enumerate(train_dataloader):
Esempio n. 29
0
    argv = sys.argv

    if len(argv) < 3:
        print("Usage: motion.py [train/test] [dir of Clipsets]")

    COMMON_PREFIX = argv[2]
    TAG_PREFIX = os.path.join(COMMON_PREFIX, "ClipSets")
    FLOW_PREFIX = os.path.join(COMMON_PREFIX, "OpticalFlows")
    FLOW_TRAIN_PREFIX = os.path.join(FLOW_PREFIX, "train_small")

    if argv[1] == "train":
        net = MotionNet().to(device)
        m = MotionData(dirname="train")
        loader = DataLoader(m, batch_size=32, shuffle=True, num_workers=0)

        criterion = nn.CrossEntropyLoss(weight=torch.from_numpy(np.array([0.1, 1.84])).float()).to(device)
        optimizer = optim.Adam(net.parameters())

        if os.path.isfile(os.path.join(COMMON_PREFIX, "model.pkl")):
            net.load_state_dict(torch.load(os.path.join(COMMON_PREFIX, "model.pkl")))

        print("start training!")
        print("Dataset size: {0}".format(m.length))
        for i in range(5):
            train(2)
            torch.save(net.state_dict(), os.path.join(COMMON_PREFIX, "model.pkl"))
            print("model saved to {0}".format(COMMON_PREFIX + "model.pkl"))
    else:
        net = MotionNet().cpu()
        net.load_state_dict(torch.load(os.path.join(COMMON_PREFIX, "model.pkl")))
        net.eval()
Esempio n. 30
0
def generate_mask_pattern(model, dl, target_class):
    mask_tanh_t = Variable(torch.tensor(mask_tanh.copy()).to(device=device),
                           requires_grad=True)
    pattern_tanh_t = Variable(torch.tensor(
        pattern_tanh.copy()).to(device=device),
                              requires_grad=True)
    mask_upsapler = nn.Upsample(scale_factor=UPSAMPLE_SIZE, mode="nearest")

    # Define optimizer
    # if args.dataset == "mnist":
    #     criterion = nn.NLLLoss()
    # else:
    criterion = nn.CrossEntropyLoss()

    opt = Adam([mask_tanh_t, pattern_tanh_t], lr=LR, betas=BETAS)

    cost = INIT_COST

    # best optimization results
    mask_best = None
    mask_upsample_best = None
    pattern_best = None
    reg_best = float("inf")

    # logs and counters for adjusting balance cost
    logs = []
    cost_set_counter = 0
    cost_up_counter = 0
    cost_down_counter = 0
    cost_up_flag = False
    cost_down_flag = False

    # counter for early stop
    early_stop_counter = 0
    early_stop_reg_best = reg_best

    # loop start
    for step in range(STEPS):
        # record loss for all mini-batches
        loss_ce_list = []
        loss_reg_list = []
        loss_list = []
        loss_acc_list = []

        for img, _ in dl:
            # Forward
            label = torch.Tensor([target_class] *
                                 img.shape[0]).long().to(device=device)
            img = img.permute(0, 3, 1, 2).to(device=device)

            mask_t = torch.tanh(mask_tanh_t) / (2 - EPSILON) + 0.5
            mask_t = mask_t.repeat(1, 1, IMG_COLOR).unsqueeze(0)
            mask_t_t = mask_t.permute(0, 3, 1, 2)
            mask_t = mask_upsapler(mask_t_t)
            mask_t = mask_t[:, :, :IMG_ROWS, :IMG_COLS]
            rev_mask_t = 1 - mask_t

            pattern_t = (torch.tanh(pattern_tanh_t) /
                         (2 - EPSILON) + 0.5) * 255.0
            pattern_t = pattern_t.unsqueeze(0)
            pattern_t = pattern_t.permute(0, 3, 1, 2)

            X_t = rev_mask_t * img + mask_t * pattern_t
            if NORMALIZE:
                X_t = X_t / 255.0

            if args.dataset == "mnist":
                _ = model(X_t.float())
                out = hook_fn_feat_layer.outputs
            else:
                out = model(X_t.float())
            loss_ce = criterion(out, label)

            if REGULARIZATION is None:
                loss_reg = 0
            elif REGULARIZATION is "l1":
                loss_reg = mask_t.abs().sum() / IMG_COLOR
            elif REGULARIZATION is "l2":
                loss_reg = torch.sqrt(torch.square(mask_t).sum()) / IMG_COLOR

            loss = loss_ce + cost * loss_reg
            loss_acc = (out.argmax(-1) == label).float().sum() / len(label)

            model.zero_grad()
            loss.backward()
            opt.step()

            loss_ce_list.append(loss_ce.item())
            loss_reg_list.append(loss_reg.item())
            loss_list.append(loss.item())
            loss_acc_list.append(loss_acc.item())

        avg_loss_ce = np.mean(loss_ce_list)
        avg_loss_reg = np.mean(loss_reg_list)
        avg_loss = np.mean(loss_list)
        avg_loss_acc = np.mean(loss_acc_list)

        # check to save best mask or not
        if avg_loss_acc >= ATTACK_SUCC_THRESHOLD and avg_loss_reg < reg_best:
            mask_best = mask_t_t[0, 0, ...].data.cpu().numpy()
            mask_upsample_best = mask_t[0, 0, ...].data.cpu().numpy()
            pattern_best = pattern_t.data.cpu().squeeze(0).permute(1, 2,
                                                                   0).numpy()
            reg_best = avg_loss_reg

        _log_txt = (
            "step: %3d, cost: %.2E, attack: %.3f, loss: %f, ce: %f, reg: %f, reg_best: %f"
            % (
                step,
                Decimal(cost),
                avg_loss_acc,
                avg_loss,
                avg_loss_ce,
                avg_loss_reg,
                reg_best,
            ))
        # verbose
        if VERBOSE != 0:
            if VERBOSE == 2 or step % (STEPS // 10) == 0:
                print(_log_txt)

        # save log
        logs.append(_log_txt)

        # check early stop
        if EARLY_STOP:
            # only terminate if a valid attack has been found
            if reg_best < float("inf"):
                if reg_best >= EARLY_STOP_THRESHOLD * early_stop_reg_best:
                    early_stop_counter += 1
                else:
                    early_stop_counter = 0
            early_stop_reg_best = min(reg_best, early_stop_reg_best)

            if (cost_down_flag and cost_up_flag
                    and early_stop_counter >= EARLY_STOP_PATIENCE):
                print("early stop")
                break

        # check cost modification
        if cost == 0 and avg_loss_acc >= ATTACK_SUCC_THRESHOLD:
            cost_set_counter += 1
            if cost_set_counter >= PATIENCE:
                cost = INIT_COST
                cost_up_counter = 0
                cost_down_counter = 0
                cost_up_flag = False
                cost_down_flag = False
                print("initialize cost to %.2E" % Decimal(self.cost))
        else:
            cost_set_counter = 0

        if avg_loss_acc >= ATTACK_SUCC_THRESHOLD:
            cost_up_counter += 1
            cost_down_counter = 0
        else:
            cost_up_counter = 0
            cost_down_counter += 1

        if cost_up_counter >= PATIENCE:
            cost_up_counter = 0
            if VERBOSE == 2:
                print("up cost from %.2E to %.2E" %
                      (Decimal(cost), Decimal(cost * COST_MULTIPLIER_UP)))
            cost *= COST_MULTIPLIER
            cost_up_flag = True
        elif cost_down_counter >= COST_MULTIPLIER_UP:
            cost_down_counter = 0
            if VERBOSE == 2:
                print("down cost from %.2E to %.2E" %
                      (Decimal(cost), Decimal(cost / COST_MULTIPLIER_DOWN)))
            cost /= COST_MULTIPLIER_DOWN
            cost_down_flag = True

    #         if self.save_tmp:
    #             self.save_tmp_func(step)

    # save the final version
    if mask_best is None:
        mask_best = mask_t_t[0, 0, ...].data.cpu().numpy()
        mask_upsample_best = mask_t[0, 0, ...].data.cpu().numpy()
        pattern_best = pattern_t.data.cpu().squeeze(0).permute(1, 2, 0).numpy()

    return pattern_best, mask_best, mask_upsample_best, logs