def create_opt(parameters: Iterator, opt: Optimizer, lr: float = None, l2: float = None, lr_patience: int = None):
    if opt == Optimizer.AdaBound:
        optimizer = AdaBound(parameters, lr=lr if lr is not None else 0.001,
                             weight_decay=l2 if l2 is not None else 0.)
        lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 150, gamma=0.1)
    elif opt == Optimizer.SGD:
        optimizer = optim.SGD(parameters, lr=lr if lr is not None else 0.1,
                              weight_decay=l2 if l2 is not None else 0.)
        lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5,
                                                            patience=lr_patience if lr_patience is not None else 5)
    elif opt == Optimizer.Adam:
        optimizer = optim.Adam(parameters, lr=lr if lr is not None else 0.001,
                               weight_decay=l2 if l2 is not None else 0.)
        lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1,
                                                            patience=lr_patience if lr_patience is not None else 3)
    else:
        raise ValueError
    return optimizer, lr_scheduler
Exemple #2
0
    def _optimizer(self, parameters):
        """

        Args:
          parameters: 

        Returns:

        """
        return AdaBound(
            parameters,
            lr=self.learning_rate,
            betas=self.betas,
            final_lr=self.final_learning_rate,
            gamma=self.gamma,
            eps=self.epsilon,
            weight_decay=self.weight_decay,
            amsbound=self.amsbound,
        )
Exemple #3
0
def create_optimizer(args, model_params):
    if args.optim == 'sgd':
        return optim.SGD(model_params, args.lr, momentum=args.momentum,
                         weight_decay=args.weight_decay)
    elif args.optim == 'adagrad':
        return optim.Adagrad(model_params, args.lr, weight_decay=args.weight_decay)
    elif args.optim == 'adam':
        return optim.Adam(model_params, args.lr, betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay)
    elif args.optim == 'amsgrad':
        return optim.Adam(model_params, args.lr, betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay, amsgrad=True)
    elif args.optim == 'adabound':
        return NesterovAdaBound(model_params, args.lr, betas=(args.beta1, args.beta2),
                        final_lr=args.final_lr, gamma=args.gamma,
                        weight_decay=args.weight_decay)
    else:
        assert args.optim == 'amsbound'
        return AdaBound(model_params, args.lr, betas=(args.beta1, args.beta2),
                        final_lr=args.final_lr, gamma=args.gamma,
                        weight_decay=args.weight_decay, amsbound=True)
def get_optimizer(model, hp: RunConfiguration) -> torch.optim.Optimizer:

    if hp.use_random_classifier:
        return LazyOptimizer()

    if hp.optimizer_type == OptimizerType.Adam:
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=hp.learning_rate,
                                     betas=(hp.adam_beta1, hp.adam_beta2),
                                     eps=hp.adam_eps,
                                     weight_decay=hp.adam_weight_decay,
                                     amsgrad=hp.adam_amsgrad)

    elif hp.optimizer_type == OptimizerType.SGD:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=hp.learning_rate,
                                    momentum=hp.sgd_momentum,
                                    dampening=hp.sgd_dampening,
                                    nesterov=hp.sgd_nesterov)

    elif hp.optimizer_type == OptimizerType.RMS_PROP:
        optimizer = torch.optim.RMSprop(model.parameters(),
                                        lr=hp.learning_rate,
                                        alpha=hp.rmsprop_alpha,
                                        eps=hp.rmsprop_eps,
                                        weight_decay=hp.rmsprop_weight_decay,
                                        centered=hp.rmsprop_centered,
                                        momentum=hp.rmsprop_momentum)

    elif hp.optimizer_type == OptimizerType.AdaBound:
        from adabound import AdaBound
        optimizer = AdaBound(model.parameters(),
                             lr=hp.learning_rate,
                             final_lr=hp.adabound_finallr)

    # elif hp.learning_rate_type == LearningSchedulerType.Adadelta:
    # 	optimizer = torch.optim.Adadelta(model.parameters(),
    # 	lr=hp.learning_rate)

    return wrap_optimizer(hp, optimizer)
Exemple #5
0
def GetOptimizer(conf, parameter, **kwargs):
    # 必須パラメータ:
    if 'optimizer' not in conf:
        raise NameError('オプティマイザが指定されていません (--optimizer)')
    name = conf['optimizer'].lower()
    if 'lr' not in conf:
        conf['lr'] = 1e-3
    lr = conf['lr']
    # 任意パラメータ:
    option = {}
    if 'weight_decay' in conf:
        option['weight_decay'] = conf['weight_decay']
    # オプティマイザ選択:
    if name == 'sgd':
        if 'momentum' in conf:
            option['momentum'] = conf['momentum']
        if 'nesterov' in conf:
            option['nesterov'] = conf['momentum']
        optim = torch.optim.SGD(parameter, lr=lr, **option)
    elif name == 'adam':
        optim = torch.optim.Adam(parameter, lr=lr, **option)
    elif name == 'adadelta':
        optim = torch.optim.Adadelta(parameter, lr=lr, **option)
    elif name == 'adagrad':
        optim = torch.optim.Adagrad(parameter, lr=lr, **option)
    elif name == 'adamw':
        optim = torch.optim.AdamW(parameter, lr=lr, **option)
    elif name == 'adamax':
        optim = torch.optim.Adamax(parameter, lr=lr, **option)
    elif name == 'adabound' and AVAILABLE_OPTIM_ADABOUND:
        optim = AdaBound(parameter, lr=lr, **option)
    elif name == 'radam' and AVAILABLE_OPTIM_RADAM:
        optim = RAdam(parameter, lr=lr, **option)
    else:
        raise NameError(
            '指定された名前のオプティマイザは定義されていません (--optimizer={})'.format(name))
    return optim
def set_model(args, cfg, checkpoint):
    # model
    if checkpoint:
        model = Classifier(pretrained=False)
        model.load_state_dict(checkpoint['model'])
    else:
        model = Classifier(pretrained=True)
    if args.data_parallel:
        model = DataParallel(model)
    model = model.to(device=args.device)

    # optimizer
    if cfg['optimizer'] == 'sgd':
        optimizer = optim.ASGD(model.parameters(),
                               lr=cfg['learning_rate'],
                               weight_decay=cfg['weight_decay'])
    elif cfg['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=cfg['learning_rate'],
                               weight_decay=cfg['weight_decay'])
    elif cfg['optimizer'] == 'adabound':
        optimizer = AdaBound(model.parameters(),
                             lr=cfg['learning_rate'],
                             final_lr=0.1,
                             weight_decay=cfg['weight_decay'])
    elif cfg['optimizer'] == 'amsbound':
        optimizer = AdaBound(model.parameters(),
                             lr=cfg['learning_rate'],
                             final_lr=0.1,
                             weight_decay=cfg['weight_decay'],
                             amsbound=True)

    # checkpoint
    if checkpoint and args.load_optimizer:
        optimizer.load_state_dict(checkpoint['optimizer'])

    return model, optimizer
Exemple #7
0
def main_worker(gpu, ngpus_per_node, args):
    filename = 'model-{}-optimizer-{}-lr-{}-epochs-{}-decay-epoch-{}-eps{}-beta1{}-beta2{}-centralize-{}-reset{}-start-epoch-{}-l2-decay{}-l1-decay{}-batch-{}-warmup-{}-fixed-decay-{}'.format(
        args.arch, args.optimizer, args.lr, args.epochs, args.when, args.eps,
        args.beta1, args.beta2, args.centralize, args.reset, args.start_epoch,
        args.weight_decay, args.l1_decay, args.batch_size, args.warmup,
        args.fixed_decay)

    print(filename)

    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        if args.arch == 'shufflenet_v2_x0_5':
            model = shufflenet_v2_x0_5(pretrained=False)
        elif args.arch == 'se_resnet18':
            model = se_resnet18()
        else:
            model = models.__dict__[args.arch]()
    '''
    model.half()  # convert to half precision
    for layer in model.modules():
      if isinstance(layer, nn.BatchNorm2d):
        layer.float()
    '''
    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    if args.optimizer == 'sgd' and (not args.centralize):
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
    elif args.optimizer == 'sgd' and args.centralize:
        optimizer = SGD_GC(model.parameters(),
                           args.lr,
                           momentum=args.momentum,
                           weight_decay=args.weight_decay)
    elif args.optimizer == 'adabound':
        optimizer = AdaBound(model.parameters(),
                             args.lr,
                             eps=args.eps,
                             betas=(args.beta1, args.beta2))
    elif args.optimizer == 'adabelief':
        optimizer = AdaBelief(model.parameters(),
                              args.lr,
                              eps=args.eps,
                              betas=(args.beta1, args.beta2),
                              weight_decouple=args.weight_decouple,
                              weight_decay=args.weight_decay,
                              fixed_decay=args.fixed_decay,
                              rectify=False)
    elif args.optimizer == 'adamw':
        optimizer = AdamW(model.parameters(),
                          args.lr,
                          eps=args.eps,
                          betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay)
    #elif args.optimizer == 'msvag':
    #    optimizer = MSVAG(model.parameters(), args.lr, eps=args.eps, betas=(args.beta1, args.beta2), weight_decay = args.weight_decay)
    else:
        print('Optimizer not found')

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)

            if args.start_epoch is None:
                args.start_epoch = checkpoint['epoch'] + 1
                df = pd.read_csv(filename + '.csv')
                train1, train5, test1, test5 = df['train1'].tolist(
                ), df['train5'].tolist(), df['test1'].tolist(
                ), df['test5'].tolist()
            else:  # if specify start epoch, and resume from checkpoint, not resume previous accuracy curves
                train1, train5, test1, test5 = [], [], [], []
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])

            if not args.reset_resume_optim:
                optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        if args.start_epoch is None:
            args.start_epoch = 0
        train1, train5, test1, test5 = [], [], [], []

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    train_loader, val_loader = DataPrefetcher(train_loader), DataPrefetcher(
        val_loader)

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        _train1, _train5 = train(train_loader, model, criterion, optimizer,
                                 epoch, args)

        # evaluate on validation set
        acc1, _test5 = validate(val_loader, model, criterion, args)

        train1.append(_train1.data.cpu().numpy())
        train5.append(_train5.data.cpu().numpy())
        test1.append(acc1.data.cpu().numpy())
        test5.append(_test5.data.cpu().numpy())
        results = {}
        results['train1'] = train1
        results['train5'] = train5
        results['test1'] = test1
        results['test5'] = test5
        df = pd.DataFrame(data=results)
        df.to_csv(filename + '.csv')

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                filename=filename,
                epoch=epoch,
                decay_epoch=args.decay_epoch)
Exemple #8
0
    opt = Opt().parse()

    ########################################
    #                 Model                #
    ########################################
    torch.manual_seed(opt.manual_seed)
    model = get_model(opt)

    if opt.optimizer == 'Adam':
        optimizer = torch.optim.Adam(
            model.parameters(), lr=opt.lr,
            weight_decay=opt.weight_decay)
    elif opt.optimizer == 'AdaBound':
        optimizer = AdaBound(
            model.parameters(),lr=opt.lr,final_lr=0.1,
            weight_decay=opt.weight_decay)
    elif opt.optimizer == 'SGD':
        optimizer = torch.optim.SGD(
            model.parameters(), lr=opt.lr,
            momentum=opt.momentum, weight_decay=opt.weight_decay)
    else:
        NotImplementedError("Only Adam and SGD are supported")

    best_mAP = 0


    ########################################
    #              Transforms              #
    ########################################
    if not opt.no_train:
Exemple #9
0
def main(args):
    """

    :param args:
    :return:

    """
    print('Training pretrained {} with images on {}'.format(args.model_name, args.data_file))

    batch_size = args.batch_size
    n_epochs = args.epoch
    log_interval = args.log_interval
    lr = args.learning_rate
    data_file = args.data_file
    optimizer_name = args.optimizer
    momentum = args.momentum
    embedding_size = args.embedding_size

    model_name = args.model_name

    print('Train Parameters: \n'
          'Batch_size: {}; \n'
          'Epoches: {}; \n'
          'log_interval: {}; \n'
          'Learning Rate: {}: \n'
          'Data File: {}: \n'
          'Embedding Size: {}\n'
          'Model: {}'.format(batch_size, n_epochs, log_interval, lr, data_file, embedding_size, model_name))

    writer_train = SummaryWriter(comment='multilabel_training pretrain-{}-train_{}-{}'.format(model_name,
                                                                                              embedding_size,
                                                                                              data_file))
    writer_test = SummaryWriter(comment='multilabel_training pretrain-{}-test_{}-{}'.format(model_name,
                                                                                            embedding_size,
                                                                                            data_file))

    # Prepare the dataloader
    loc_path = os.path.join(DATA_FOLDER_PATH, data_file)
    x, y = csv_to_x_y(pd.read_csv(loc_path))

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.01, random_state=RANDOM_STATE)

    num_class = len(y_train[0])
    train_dataset = MeshImageDataset(x_train, y_train, IMAGE_FOLDER_PATH, normalize=True)
    val_dataset = MeshImageDataset(x_test, y_test, IMAGE_FOLDER_PATH, normalize=True)

    kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                                               **kwargs)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True,
                                             **kwargs)

    # Prepare the model
    if model_name == 'Resnet-18':
        embedding_net = Resent18EmbeddingNet(embedding_size=embedding_size, pretrained=True)
    elif model_name == 'Resnet-50':
        embedding_net = Resent50EmbeddingNet(embedding_size=embedding_size, pretrained=True)
    elif model_name == 'Dense-121':
        embedding_net = Densenet121EmbeddingNet(embedding_size=embedding_size, pretrained=True)

    model = MultiLabelClassifer(embedding_net, num_class, embedding_size=embedding_size)

    if CUDA:
        model.cuda()

    loss_fn = nn.MultiLabelSoftMarginLoss()

    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    elif optimizer_name == 'AdaBound':
        optimizer = AdaBound(model.parameters(), lr=lr, betas=(0.9, 0.999),
                             final_lr=0.1, gamma=0.001, weight_decay=5e-4)
    elif optimizer_name == 'AMSBound':
        optimizer = AdaBound(model.parameters(), lr=lr, etas=(0.9, 0.999),
                             final_lr=0.1, gamma=0.001, weight_decay=5e-4, amsbound=True)

    scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1)

    # train_loader, test_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda,
    #                    log_interval, embedding_size, writer, start_epoch=0
    _, _, test_p, test_a, test_r, test_f1, test_a2, test_m = fit_multilabel(train_loader,
                                                                            val_loader, model,
                                                                            loss_fn,
                                                                            optimizer, scheduler,
                                                                            n_epochs, CUDA,
                                                                            log_interval,
                                                                            writer_train, writer_test)

    # print("Best precision = {} at epoch {};" \
    #       "Best accuracy = {} at epoch {};" \
    #       "Best recall = {} at epoch {};" \
    #       "Best f1 score = {} at epoch {}; ".format(max(test_p), test_p.index(max(test_p)),
    #                                                 max(test_a), test_p.index(max(test_a)),
    #                                                 max(test_r), test_p.index(max(test_r)),
    #                                                 max(test_f1), test_p.index(max(test_f1))))

    folder_path = os.path.join(MODEL_PATH, datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + 'b_' + str(batch_size) +
                               '_eb_' + str(embedding_size) + '_epoch_' + str(n_epochs) + '_' + optimizer_name +
                               '_multilabel_pretrained_{}_'.format(model_name) + data_file[:-4])
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    torch.save(model.state_dict(), os.path.join(folder_path, 'trained_model'))
    writer_train.close()
    writer_test.close()
Exemple #10
0
                           betas=(opt.beta1, opt.beta2),
                           eps=opt.eps)
    optimizerG = AdaBelief(netG.parameters(),
                           lr=opt.lr,
                           betas=(opt.beta1, opt.beta2),
                           eps=opt.eps)
elif opt.optimizer == 'sgd':
    optimizerD = torch.optim.SGD(netD.parameters(), lr=opt.lr)
    optimizerG = torch.optim.SGD(netG.parameters(), lr=opt.lr)
elif opt.optimizer == 'rmsprop':
    optimizerD = torch.optim.RMSprop(netD.parameters(), lr=opt.lr)
    optimizerG = torch.optim.RMSprop(netG.parameters(), lr=opt.lr)
elif opt.optimizer == 'adabound':
    optimizerD = AdaBound(netD.parameters(),
                          lr=opt.lr,
                          betas=(opt.beta1, opt.beta2),
                          eps=opt.eps,
                          final_lr=opt.final_lr)
    optimizerG = AdaBound(netG.parameters(),
                          lr=opt.lr,
                          betas=(opt.beta1, opt.beta2),
                          eps=opt.eps,
                          final_lr=opt.final_lr)
elif opt.optimizer == 'yogi':
    optimizerD = Yogi(netD.parameters(),
                      lr=opt.lr,
                      betas=(opt.beta1, opt.beta2),
                      eps=opt.eps)
    optimizerG = Yogi(netG.parameters(),
                      lr=opt.lr,
                      betas=(opt.beta1, opt.beta2),
Exemple #11
0
class CQTModel(BaseModel):
    @staticmethod
    def modify_commandline_options(parser, is_train=True):
        """Add new model-specific options and rewrite default values for existing options.

        Parameters:
            parser -- the option parser
            is_train -- if it is training phase or test phase. You can use this flag to add training-specific or test-specific options.

        Returns:
            the modified parser.
        """
        opt, _ = parser.parse_known_args() 
        preprocess = 'mulaw,normalize,cqt'
        parser.set_defaults(preprocess=preprocess)
        parser.add_argument('--wavenet_layers', type=int, default=30, help='wavenet layers')
        parser.add_argument('--wavenet_blocks', type=int, default=15, help='wavenet layers')
        parser.add_argument('--width', type=int, default=128, help='width')
        return parser

    def __init__(self, opt):
        BaseModel.__init__(self, opt)  # call the initialization method of BaseModel
        self.loss_names = ['D_A', 'D_B']
        if opt.isTrain:
            self.output_names = [] # ['aug_A', 'aug_B', 'rec_A', 'rec_B']
        else:
            self.output_names = ['real_A', 'real_B', 'fake_B', 'fake_A']
            self.params_names = ['params_A', 'params_B'] * 2
        self.model_names = ['D_A', 'D_B'] 

        if 'stft' in self.preprocess:
            stride = 2 * ((opt.nfft // 8) - 1)
            window =  opt.nfft // opt.duration_ratio
        elif 'cqt' in self.preprocess:
            stride = opt.hop_length
            window = opt.hop_length
 
        self.netD_A = WaveNet(opt.mu+1, opt.wavenet_layers, opt.wavenet_blocks, 
                              opt.width, 256, 256,
                              opt.tensor_height, window, stride).to(self.devices[-1]) 
        self.netD_B = WaveNet(opt.mu+1, opt.wavenet_layers, opt.wavenet_blocks,
                              opt.width, 256, 256,
                              opt.tensor_height, window, stride).to(self.devices[-1])
        self.softmax = nn.LogSoftmax(dim=1) # (1, 256, audio_len) -> pick 256
        
        if self.isTrain:
            self.criterionDecode = nn.CrossEntropyLoss(reduction='mean')
            self.optimizer_D_A = AdaBound(self.netD_A.parameters(), lr=opt.lr, final_lr=0.1)
            self.optimizer_D_B = AdaBound(self.netD_B.parameters(), lr=opt.lr, final_lr=0.1)
            self.optimizers = [self.optimizer_D_A, self.optimizer_D_B] 
        else:
            self.preprocesses = []
            load_suffix = str(opt.load_iter) if opt.load_iter > 0 else opt.epoch
            self.load_networks(load_suffix)
            self.netD_A.eval()
            self.netD_B.eval()
             
            self.infer_A = NVWaveNet(**(self.netD_A.export_weights()))
            self.infer_B = NVWaveNet(**(self.netD_B.export_weights()))

    def set_input(self, input): 
        A, params_A = input[0]  
        B, params_B = input[1] 
         
        self.real_A = params_A['original'].to(self.devices[0])
        self.real_B = params_B['original'].to(self.devices[0])
        self.aug_A = A.to(self.devices[0])
        self.aug_B = B.to(self.devices[0])

        self.params_A = self.decollate_params(params_A)
        self.params_B = self.decollate_params(params_B)

    def get_indices(self, y):
        y = (y + 1.) * .5 * self.opt.mu
        return y.long() 

    def inv_indices(self, y):
        return y.float() / self.opt.mu * 2. - 1.
 
    def train(self): 
        self.optimizer_D_A.zero_grad() 
        real_A = self.get_indices(self.real_A).to(self.devices[-1])
        pred_D_A = self.netD_A((self.aug_A, real_A))
        self.loss_D_A = self.criterionDecode(pred_D_A, real_A)
        self.loss_D_A.backward()
        self.optimizer_D_A.step() 

        self.optimizer_D_B.zero_grad() 
        real_B = self.get_indices(self.real_B).to(self.devices[-1]) 
        pred_D_B = self.netD_B((self.aug_B, real_B))
        self.loss_D_B = self.criterionDecode(pred_D_B, real_B)
        self.loss_D_B.backward()
        self.optimizer_D_B.step() 
  
    def test(self):  
        with torch.no_grad():   
            self.fake_B = self.infer_A.infer(self.netD_A.get_cond_input(self.aug_A), Impl.AUTO)
            self.fake_A = self.infer_B.infer(self.netD_B.get_cond_input(self.aug_B), Impl.AUTO)
            self.fake_B = self.inv_indices(self.fake_B)
            self.fake_A = self.inv_indices(self.fake_A)
Exemple #12
0
    def __init__(self,
                 import_trained=(False, ''),
                 model_pretrained=(True, True),
                 save_model=True,
                 resnet_depth=50,
                 lr=1e-3,
                 momentum=0.09,
                 nesterov=False,
                 threshold=0.5,
                 epochs=50,
                 batch_size=64,
                 train_val_split=0.7,
                 data_interval='1min',
                 predict_period=1,
                 mins_interval=30,
                 start_date='2020-08-24',
                 end_date='2020-08-29'):
        '''
        import_trained = (whether if you want to import a trained pth file, if yes what is the filename)
        model_pretrained = (whether if you want to import a pretrained model, whether if you want to only want to train the linear layers)
        save_model = whether to save model when training finished
        resnet_depth = to decide the depth of the residual network
        lr = learning rate for the stochastic gradient descend optimizer
        momentum = momentum for the sgd
        nesterov = whether to use nesterov momentum for sgd
        threshold = investment threshold, advices to invest if the returned probability > threshold
        epochs = training hyperparameter: the number of times the entire dataset is exposed to the neural network
        batch_size = training hyperparameter: the number of items to show the dataset at once
        train_val_split = training hyperparameter: how to split the data
        data_interval = the time interval between each datapoint
        predict_period = the amount of time period to predict forwards
        days = the amount of days to use
        mins_interval = the amount of minutes to show in the graph
        start_date = the first date to get data - data for each day would start from 9am and end at 8pm
        end_date = the last date to get data - data for each day would start from 9am and end at 8pm
        '''

        self.__import_trained = import_trained
        self.__model_pretrained = model_pretrained
        self.__saveModel = save_model
        self.__resnet_depth = resnet_depth
        self.__threshold = threshold
        self.__epochs = epochs
        self.__batch_size = batch_size
        data = dataset.stockGraphGenerator(split=train_val_split,
                                           interval=data_interval,
                                           predict_period=predict_period,
                                           mins_interval=mins_interval,
                                           start_date=start_date,
                                           end_date=end_date,
                                           stride=15)
        self.__train_set = torch.utils.data.DataLoader(
            data.train_data, batch_size=self.__batch_size, shuffle=False)
        self.__test_set = torch.utils.data.DataLoader(
            data.test_data, batch_size=self.__batch_size, shuffle=False)
        self.__model = self.__loadmodelInstance(
        ) if self.__import_trained[0] else self.__createmodelInstance()
        self.__criterion = nn.BCELoss()
        self.__optim = AdaBound(self.__model.parameters(),
                                amsbound=True,
                                lr=lr,
                                final_lr=0.1)
        self.__trainHist = [[], [], [], []]
Exemple #13
0
class OriginalModel(BaseModel):
    @staticmethod
    def modify_commandline_options(parser, is_train=True):
        """Add new model-specific options and rewrite default values for existing options.

        Parameters:
            parser -- the option parser
            is_train -- if it is training phase or test phase. You can use this flag to add training-specific or test-specific options.

        Returns:
            the modified parser.
        """
        preprocess = 'normalize,mulaw,cqt'
        parser.set_defaults(preprocess=preprocess, flatten=True)
        parser.add_argument('--wavenet_layers', type=int, default=40, help='wavenet layers')
        parser.add_argument('--wavenet_blocks', type=int, default=10, help='wavenet layers')
        parser.add_argument('--width', type=int, default=128, help='width')
        parser.add_argument('--dc_lambda', type=float, default=0.01, help='dc lambda') 
        parser.add_argument('--tanh', action='store_true', help='tanh')
        parser.add_argument('--sigmoid', action='store_true', help='sigmoid')
        return parser

    def __init__(self, opt):
        BaseModel.__init__(self, opt)  # call the initialization method of BaseModel
        self.loss_names = ['C_A_right', 'C_B_right', 'C_A_wrong', 'C_B_wrong', 'D_A', 'D_B']
        if opt.isTrain:
            self.output_names = [] # ['aug_A', 'aug_B', 'rec_A', 'rec_B']
        else:
            self.output_names = ['real_A', 'real_B', 'fake_B', 'fake_A']
        self.params_names = ['params_A', 'params_B'] * 2
        self.model_names = ['E', 'C', 'D_A', 'D_B']

        # use get generator
        self.netE = getGenerator(self.devices[0], opt)
        self.netC = getDiscriminator(opt, self.devices[0])

        self.netD_A = WaveNet(opt.mu+1, opt.wavenet_layers, opt.wavenet_blocks, 
                              opt.width, 256, 256,
                              opt.tensor_height, 1, 1).to(self.devices[-1]) # opt.pool_length, opt.pool_length
        self.netD_B = WaveNet(opt.mu+1, opt.wavenet_layers, opt.wavenet_blocks,
                              opt.width, 256, 256,
                              opt.tensor_height, 1, 1).to(self.devices[-1]) # opt.pool_length, opt.pool_length
        self.softmax = nn.LogSoftmax(dim=1) # (1, 256, audio_len) -> pick 256
        
        if self.isTrain:
            self.A_target = torch.zeros(opt.batch_size).to(self.devices[0])
            self.B_target = torch.ones(opt.batch_size).to(self.devices[0])
            self.criterionDC = nn.MSELoss(reduction='mean')
            self.criterionDecode = nn.CrossEntropyLoss(reduction='mean')
            self.optimizer_C = AdaBound(self.netC.parameters(), lr=opt.lr, final_lr=0.1)
            self.optimizer_D = AdaBound(itertools.chain(self.netE.parameters(), self.netD_A.parameters(), self.netD_B.parameters()), lr=opt.lr, final_lr=0.1)
            self.optimizers = [self.optimizer_C, self.optimizer_D] 
        else:
            self.preprocesses = []
            # TODO change structure of test.py and setup() instead
            load_suffix = str(opt.load_iter) if opt.load_iter > 0 else opt.epoch
            self.load_networks(load_suffix)
            self.netC.eval()
            self.netD_A.eval()
            self.netD_B.eval()
             
            self.infer_A = NVWaveNet(**(self.netD_A.export_weights()))
            self.infer_B = NVWaveNet(**(self.netD_B.export_weights()))

    def set_input(self, input): 
        A, params_A = input[0]  
        B, params_B = input[1] 
         
        self.real_A = params_A['original'].to(self.devices[0])
        self.real_B = params_B['original'].to(self.devices[0])
        self.aug_A = A.to(self.devices[0])
        self.aug_B = B.to(self.devices[0])

        self.params_A = self.decollate_params(params_A)
        self.params_B = self.decollate_params(params_B)

    def get_indices(self, y):
        y = (y + 1.) * .5 * self.opt.mu
        return y.long() 

    def inv_indices(self, y):
        return y.float() / self.opt.mu * 2. - 1.
 
    def train(self): 
        self.optimizer_C.zero_grad() 
        encoded_A = self.netE(self.aug_A) # Input range: (-1, 1) Output: R^64
        encoded_A = nn.functional.interpolate(encoded_A, size=self.opt.audio_length).to(self.devices[-1])
        pred_C_A = self.netC(encoded_A)
        self.loss_C_A_right = self.opt.dc_lambda * self.criterionDC(pred_C_A, self.A_target)
        self.loss_C_A_right.backward()

        encoded_B = self.netE(self.aug_B) 
        encoded_B = nn.functional.interpolate(encoded_B, size=self.opt.audio_length).to(self.devices[-1])
        pred_C_B = self.netC(encoded_B)
        self.loss_C_B_right = self.opt.dc_lambda * self.criterionDC(pred_C_B, self.B_target)
        self.loss_C_B_right.backward()
        self.optimizer_C.step()
  
        self.optimizer_D.zero_grad() 
        encoded_A = self.netE(self.aug_A) # Input range: (-1, 1) Output: R^64
        encoded_A = nn.functional.interpolate(encoded_A, size=self.opt.audio_length).to(self.devices[-1])
        pred_C_A = self.netC(encoded_A) 
        self.loss_C_A_wrong = self.criterionDC(pred_C_A, self.A_target)
        real_A = self.get_indices(self.real_A).to(self.devices[-1])
        pred_D_A = self.netD_A((encoded_A, real_A))
        self.loss_D_A = self.criterionDecode(pred_D_A, real_A)
        loss = self.loss_D_A - self.opt.dc_lambda * self.loss_C_A_wrong
        loss.backward()
        
        encoded_B = self.netE(self.aug_B)
        encoded_B = nn.functional.interpolate(encoded_B, size=self.opt.audio_length).to(self.devices[-1])
        pred_C_B = self.netC(encoded_B) 
        self.loss_C_B_wrong = self.criterionDC(pred_C_B, self.B_target)
        real_B = self.get_indices(self.real_B).to(self.devices[-1]) 
        pred_D_B = self.netD_B((encoded_B, real_B))
        self.loss_D_B = self.criterionDecode(pred_D_B, real_B)
        loss = self.loss_D_B - self.opt.dc_lambda * self.loss_C_B_wrong
        loss.backward()
        self.optimizer_D.step() 
  
    def test(self):  
        with torch.no_grad():   
            encoded_A = self.netE(self.aug_A)
            encoded_B = self.netE(self.aug_B)
            self.fake_B = self.infer_A.infer(self.netD_A.get_cond_input(encoded_B), Impl.AUTO)
            self.fake_A = self.infer_B.infer(self.netD_B.get_cond_input(encoded_A), Impl.AUTO)
            self.fake_B = self.inv_indices(self.fake_B)
            self.fake_A = self.inv_indices(self.fake_A)
batch_size = 64
nb_epoch = 30

img_rows, img_cols = 32, 32
img_channels = 3

model = densenet.DenseNet([1, 2, 3, 2],
                          include_top=True,
                          weights=None,
                          pooling='avg',
                          input_shape=(img_rows, img_cols, img_channels),
                          classes=10)
model.compile(
    loss='categorical_crossentropy',
    optimizer=AdaBound(),  #keras.optimizers.SGD(momentum=0.9),
    metrics=['acc'])
model.summary()

(trainX, trainY), (testX, testY) = keras.datasets.cifar10.load_data()

trainX = trainX.astype('float32')
testX = testX.astype('float32')

trainX = densenet.preprocess_input(trainX)
testX = densenet.preprocess_input(testX)

Y_train = keras.utils.to_categorical(trainY, 10)
Y_test = keras.utils.to_categorical(testY, 10)

history = model.fit(trainX,
Exemple #15
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True)))
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info('Using random seed {}...'.format(args.seed))
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vectors = util.torch_from_json(args.char_emb_file)

    # Get model
    log.info('Building model...')
    model = BiDAF(word_vectors=word_vectors,
                  char_vectors=char_vectors,
                  hidden_size=args.hidden_size,
                  drop_prob=args.drop_prob)
    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info('Loading checkpoint from {}...'.format(args.load_path))
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    #optimizer = optim.Adamax(model.parameters(), args.lr,
    #                            weight_decay=args.l2_wd)
    #scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR
    optimizer = AdaBound(model.parameters())

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info('Starting epoch {}...'.format(epoch))
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids, cwf in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                cc_idxs = cc_idxs.to(device)
                qc_idxs = qc_idxs.to(device)
                batch_size = cw_idxs.size(0)
                cwf = cwf.to(device)
                optimizer.zero_grad()

                # Forward
                log_p1, log_p2 = model(cc_idxs, qc_idxs, cw_idxs, qw_idxs, cwf)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                #scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info('Evaluating at step {}...'.format(step))
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join('{}: {:05.2f}'.format(k, v)
                                            for k, v in results.items())
                    log.info('Dev {}'.format(results_str))

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar('dev/{}'.format(k), v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Exemple #16
0
def train_model(cfg: DictConfig) -> None:
    output_dir = Path.cwd()
    logging.basicConfig(format='%(asctime)s\t%(levelname)s\t%(message)s',
                        datefmt='%Y/%m/%d %H:%M:%S',
                        filename=str(output_dir / 'log.txt'),
                        level=logging.DEBUG)
    # hydraでlogがコンソールにも出力されてしまうのを抑制する
    logger = logging.getLogger()
    assert isinstance(logger.handlers[0], logging.StreamHandler)
    logger.handlers[0].setLevel(logging.CRITICAL)

    if cfg.gpu >= 0:
        device = torch.device(f"cuda:{cfg.gpu}")
        # noinspection PyUnresolvedReferences
        torch.backends.cudnn.benchmark = True
    else:
        device = torch.device("cpu")
    model = load_model(model_name=cfg.model_name)
    model.to(device)
    if cfg.swa.enable:
        swa_model = AveragedModel(model=model, device=device)
    else:
        swa_model = None

    # optimizer = optim.SGD(
    #     model.parameters(), lr=cfg.optimizer.lr,
    #     momentum=cfg.optimizer.momentum,
    #     weight_decay=cfg.optimizer.weight_decay,
    #     nesterov=cfg.optimizer.nesterov
    # )
    optimizer = AdaBound(model.parameters(),
                         lr=cfg.optimizer.lr,
                         final_lr=cfg.optimizer.final_lr,
                         weight_decay=cfg.optimizer.weight_decay,
                         amsbound=False)
    scaler = torch.cuda.amp.GradScaler(enabled=cfg.use_amp)
    if cfg.scheduler.enable:
        scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer=optimizer,
            T_0=1,
            T_mult=1,
            eta_min=cfg.scheduler.eta_min)
        # scheduler = optim.lr_scheduler.CyclicLR(
        #     optimizer, base_lr=cfg.scheduler.base_lr,
        #     max_lr=cfg.scheduler.max_lr,
        #     step_size_up=cfg.scheduler.step_size,
        #     mode=cfg.scheduler.mode
        # )
    else:
        scheduler = None
    if cfg.input_dir is not None:
        input_dir = Path(cfg.input_dir)
        model_path = input_dir / 'model.pt'
        print('load model from {}'.format(model_path))
        model.load_state_dict(torch.load(model_path))

        state_path = input_dir / 'state.pt'
        print('load optimizer state from {}'.format(state_path))
        checkpoint = torch.load(state_path, map_location=device)
        epoch = checkpoint['epoch']
        t = checkpoint['t']
        optimizer.load_state_dict(checkpoint['optimizer'])
        if cfg.swa.enable and 'swa_model' in checkpoint:
            swa_model.load_state_dict(checkpoint['swa_model'])
        if cfg.scheduler.enable and 'scheduler' in checkpoint:
            scheduler.load_state_dict(checkpoint['scheduler'])
        if cfg.use_amp and 'scaler' in checkpoint:
            scaler.load_state_dict(checkpoint['scaler'])
    else:
        epoch = 0
        t = 0

    # カレントディレクトリが変更されるので、データのパスを修正
    if isinstance(cfg.train_data, str):
        train_path_list = (hydra.utils.to_absolute_path(cfg.train_data), )
    else:
        train_path_list = [
            hydra.utils.to_absolute_path(path) for path in cfg.train_data
        ]
    logging.info('train data path: {}'.format(train_path_list))

    train_data = load_train_data(path_list=train_path_list)
    train_dataset = train_data
    train_data = train_dataset[0]
    test_data = load_test_data(
        path=hydra.utils.to_absolute_path(cfg.test_data))

    logging.info('train position num = {}'.format(len(train_data)))
    logging.info('test position num = {}'.format(len(test_data)))

    train_loader = DataLoader(train_data,
                              device=device,
                              batch_size=cfg.batch_size,
                              shuffle=True)
    validation_loader = DataLoader(test_data[:cfg.test_batch_size * 10],
                                   device=device,
                                   batch_size=cfg.test_batch_size)
    test_loader = DataLoader(test_data,
                             device=device,
                             batch_size=cfg.test_batch_size)

    train_writer = SummaryWriter(log_dir=str(output_dir / 'train'))
    test_writer = SummaryWriter(log_dir=str(output_dir / 'test'))

    train_metrics = Metrics()
    eval_interval = cfg.eval_interval
    total_epoch = cfg.epoch + epoch
    for e in range(cfg.epoch):
        train_metrics_epoch = Metrics()

        model.train()
        desc = 'train [{:03d}/{:03d}]'.format(epoch + 1, total_epoch)
        train_size = len(train_loader) * 4
        for x1, x2, t1, t2, z, value, mask in tqdm(train_loader, desc=desc):
            with torch.cuda.amp.autocast(enabled=cfg.use_amp):
                model.zero_grad()

                metric_value = compute_metric(model=model,
                                              x1=x1,
                                              x2=x2,
                                              t1=t1,
                                              t2=t2,
                                              z=z,
                                              value=value,
                                              mask=mask,
                                              val_lambda=cfg.val_lambda,
                                              beta=cfg.beta)

            scaler.scale(metric_value.loss).backward()
            if cfg.clip_grad_max_norm:
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               cfg.clip_grad_max_norm)
            scaler.step(optimizer)
            scaler.update()
            if cfg.swa.enable and t % cfg.swa.freq == 0:
                swa_model.update_parameters(model=model)

            t += 1
            if cfg.scheduler.enable:
                scheduler.step(t / train_size)

            train_metrics.update(metric_value=metric_value)
            train_metrics_epoch.update(metric_value=metric_value)

            # print train loss
            if t % eval_interval == 0:
                model.eval()

                validation_metrics = Metrics()
                with torch.no_grad():
                    # noinspection PyAssignmentToLoopOrWithParameter
                    for x1, x2, t1, t2, z, value, mask in validation_loader:
                        m = compute_metric(model=model,
                                           x1=x1,
                                           x2=x2,
                                           t1=t1,
                                           t2=t2,
                                           z=z,
                                           value=value,
                                           mask=mask,
                                           val_lambda=cfg.val_lambda)
                        validation_metrics.update(metric_value=m)

                last_lr = (scheduler.get_last_lr()[-1]
                           if cfg.scheduler.enable else cfg.optimizer.lr)
                logging.info(
                    'epoch = {}, iteration = {}, lr = {}, {}, {}'.format(
                        epoch + 1, t, last_lr,
                        make_metric_log('train', train_metrics),
                        make_metric_log('validation', validation_metrics)))
                write_summary(writer=train_writer,
                              metrics=train_metrics,
                              t=t,
                              prefix='iteration')
                write_summary(writer=test_writer,
                              metrics=validation_metrics,
                              t=t,
                              prefix='iteration')
                train_metrics = Metrics()

                train_writer.add_scalar('learning_rate',
                                        last_lr,
                                        global_step=t)

                model.train()
            elif t % cfg.train_log_interval == 0:
                last_lr = (scheduler.get_last_lr()[-1]
                           if cfg.scheduler.enable else cfg.optimizer.lr)
                logging.info('epoch = {}, iteration = {}, lr = {}, {}'.format(
                    epoch + 1, t, last_lr,
                    make_metric_log('train', train_metrics)))
                write_summary(writer=train_writer,
                              metrics=train_metrics,
                              t=t,
                              prefix='iteration')
                train_metrics = Metrics()

                train_writer.add_scalar('learning_rate',
                                        last_lr,
                                        global_step=t)

        if cfg.swa.enable:
            with torch.cuda.amp.autocast(enabled=cfg.use_amp):
                desc = 'update BN [{:03d}/{:03d}]'.format(
                    epoch + 1, total_epoch)
                np.random.shuffle(train_data)
                # モーメントの計算にはそれなりのデータ数が必要
                # 1/16に減らすより全部使ったほうが精度が高かった
                # データ量を10分程度で処理できる分量に制限
                # メモリが連続でないとDataLoaderで正しく処理できないかもしれない
                train_data = np.ascontiguousarray(train_data[::4])
                torch.optim.swa_utils.update_bn(loader=tqdm(
                    hcpe_loader(data=train_data,
                                device=device,
                                batch_size=cfg.batch_size),
                    desc=desc,
                    total=len(train_data) // cfg.batch_size),
                                                model=swa_model)

        # print train loss for each epoch
        test_metrics = Metrics()

        if cfg.swa.enable:
            test_model = swa_model
        else:
            test_model = model
        test_model.eval()
        with torch.no_grad():
            desc = 'test [{:03d}/{:03d}]'.format(epoch + 1, total_epoch)
            for x1, x2, t1, t2, z, value, mask in tqdm(test_loader, desc=desc):
                metric_value = compute_metric(model=test_model,
                                              x1=x1,
                                              x2=x2,
                                              t1=t1,
                                              t2=t2,
                                              z=z,
                                              value=value,
                                              mask=mask,
                                              val_lambda=cfg.val_lambda)

                test_metrics.update(metric_value=metric_value)

        logging.info('epoch = {}, iteration = {}, {}, {}'.format(
            epoch + 1, t, make_metric_log('train', train_metrics_epoch),
            make_metric_log('test', test_metrics)))
        write_summary(writer=train_writer,
                      metrics=train_metrics_epoch,
                      t=epoch + 1,
                      prefix='epoch')
        write_summary(writer=test_writer,
                      metrics=test_metrics,
                      t=epoch + 1,
                      prefix='epoch')

        epoch += 1

        if e != cfg.epoch - 1:
            # 訓練データを入れ替える
            train_data = train_dataset[e + 1]
            train_loader.data = train_data

    train_writer.close()
    test_writer.close()

    print('save the model')
    torch.save(model.state_dict(), output_dir / 'model.pt')

    print('save the optimizer')
    state = {'epoch': epoch, 't': t, 'optimizer': optimizer.state_dict()}
    if cfg.scheduler.enable:
        state['scheduler'] = scheduler.state_dict()
    if cfg.swa.enable:
        state['swa_model'] = swa_model.state_dict()
    if cfg.use_amp:
        state['scaler'] = scaler.state_dict()
    torch.save(state, output_dir / 'state.pt')
Exemple #17
0
                      hidden_dim=1024,
                      dropout=0.1,
                      emb_share=True).to(device)
criterion = nn.CrossEntropyLoss(ignore_index=vocab[PAD],
                                reduction="none").to(device)
# crit = LabelSmoothing(size=vocab_size, padding_idx=vocab[PAD], smoothing=0.1).to(device)
# def criterion(x,y):
#     x = F.log_softmax(x, dim=-1)
#     n_token = (y != vocab[PAD]).data.sum().item()
#     # n_token = y.shape[0]
#     return crit(x, y)/n_token
# optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3)
lr = 1e-3
w_decay = 1e-6
optimizer = AdaBound(model.parameters(),
                     lr=lr,
                     final_lr=0.1,
                     weight_decay=w_decay)

# In[7]:

import wandb

wandb.init(entity="george0828zhang",
           project="contextual-matching-policy-gradient")
wandb.config.update({
    "batch_size": batch_size,
    "learning rate": lr,
    "weight decay": w_decay
})
wandb.watch([model])
Exemple #18
0
class ECGTrainer(object):

    def __init__(self, block_config='small', num_threads=2):
        torch.set_num_threads(num_threads)
        self.n_epochs = 60
        self.batch_size = 128
        self.scheduler = None
        self.num_threads = num_threads
        self.cuda = torch.cuda.is_available()

        if block_config == 'small':
            self.block_config = (3, 6, 12, 8)
        else:
            self.block_config = (6, 12, 24, 16)

        self.__build_model()
        self.__build_criterion()
        self.__build_optimizer()
        self.__build_scheduler()
        return

    def __build_model(self):
        self.model = DenseNet(
            num_classes=55, block_config=self.block_config
        )
        if self.cuda:
            self.model.cuda()
        return

    def __build_criterion(self):
        self.criterion = ComboLoss(
            losses=['mlsml', 'f1', 'focal'], weights=[1, 1, 3]
        )
        return

    def __build_optimizer(self):
        opt_params = {'lr': 1e-3, 'weight_decay': 0.0,
                      'params': self.model.parameters()}
        self.optimizer = AdaBound(amsbound=True, **opt_params)
        return

    def __build_scheduler(self):
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer, 'max', factor=0.333, patience=5,
            verbose=True, min_lr=1e-5)
        return

    def run(self, trainset, validset, model_dir):
        print('=' * 100 + '\n' + 'TRAINING MODEL\n' + '-' * 100 + '\n')
        model_path = os.path.join(model_dir, 'model.pth')
        thresh_path = os.path.join(model_dir, 'threshold.npy')

        dataloader = {
            'train': ECGLoader(trainset, self.batch_size, True, self.num_threads).build(),
            'valid': ECGLoader(validset, 64, False, self.num_threads).build()
        }

        best_metric, best_preds = None, None
        for epoch in range(self.n_epochs):
            e_message = '[EPOCH {:0=3d}/{:0=3d}]'.format(epoch + 1, self.n_epochs)

            for phase in ['train', 'valid']:
                ep_message = e_message + '[' + phase.upper() + ']'
                if phase == 'train':
                    self.model.train()
                else:
                    self.model.eval()

                losses, preds, labels = [], [], []
                batch_num = len(dataloader[phase])
                for ith_batch, data in enumerate(dataloader[phase]):
                    ecg, label = [d.cuda() for d in data] if self.cuda else data

                    pred = self.model(ecg)
                    loss = self.criterion(pred, label)
                    if phase == 'train':
                        self.optimizer.zero_grad()
                        loss.backward()
                        self.optimizer.step()

                    pred = torch.sigmoid(pred)
                    pred = pred.data.cpu().numpy()
                    label = label.data.cpu().numpy()

                    bin_pred = np.copy(pred)
                    bin_pred[bin_pred > 0.5] = 1
                    bin_pred[bin_pred <= 0.5] = 0
                    f1 = f1_score(label.flatten(), bin_pred.flatten())

                    losses.append(loss.item())
                    preds.append(pred)
                    labels.append(label)

                    sr_message = '[STEP {:0=3d}/{:0=3d}]-[Loss: {:.6f} F1: {:.6f}]'
                    sr_message = ep_message + sr_message
                    print(sr_message.format(ith_batch + 1, batch_num, loss, f1), end='\r')

                preds = np.concatenate(preds, axis=0)
                labels = np.concatenate(labels, axis=0)
                bin_preds = np.copy(preds)
                bin_preds[bin_preds > 0.5] = 1
                bin_preds[bin_preds <= 0.5] = 0

                avg_loss = np.mean(losses)
                avg_f1 = f1_score(labels.flatten(), bin_preds.flatten())
                er_message = '-----[Loss: {:.6f} F1: {:.6f}]'
                er_message = '\n\033[94m' + ep_message + er_message + '\033[0m'
                print(er_message.format(avg_loss, avg_f1))

                if phase == 'valid':
                    if self.scheduler is not None:
                        self.scheduler.step(avg_f1)
                    if best_metric is None or best_metric < avg_f1:
                        best_metric = avg_f1
                        best_preds = [labels, preds]
                        best_loss_metrics = [epoch + 1, avg_loss, avg_f1]
                        torch.save(self.model.state_dict(), model_path)
                        print('[Best validation metric, model: {}]'.format(model_path))
                    print()

        best_f1, best_th = best_f1_score(*best_preds)
        np.save(thresh_path, np.array(best_th))
        print('[Searched Best F1: {:.6f}]\n'.format(best_f1))
        res_message = '[VALIDATION PERFORMANCE: BEST F1]' + '\n' \
            + '[EPOCH:{} LOSS:{:.6f} F1:{:.6f} BEST F1:{:.6f}]\n'.format(
                best_loss_metrics[0], best_loss_metrics[1],
                best_loss_metrics[2], best_f1) \
            + '[BEST THRESHOLD:\n{}]\n'.format(best_th) \
            + '=' * 100 + '\n'
        print(res_message)
        return
Exemple #19
0
 def __build_optimizer(self):
     opt_params = {'lr': 1e-3, 'weight_decay': 0.0,
                   'params': self.model.parameters()}
     self.optimizer = AdaBound(amsbound=True, **opt_params)
     return
Exemple #20
0
def main(args):
    # load data
    datapath = "./data"
    validation_size = args.valid
    train_imgs, train_lbls, validation_imgs, validation_lbls = KMNISTDataLoader(
        validation_size).load(datapath)
    test_imgs = LoadTestData(datapath)

    # dir settings
    settings = f'{args.model}_o{args.optimizer}_b{args.batchsize}_e{args.epochs}_f{args.factor}_p{args.patience}_m{args.mixup}_v{args.valid}'
    if args.swa:
        settings = f'{settings}_SWA'
    dir_name = f'./out/{settings}'
    nowtime = datetime.now().strftime("%y%m%d_%H%M")
    if args.force:
        dir_name = f'{dir_name}_{nowtime}'
    if args.ensemble > 1:
        settings = f'{settings}_ensemble{args.ensemble}'
        dir_name_base = f'{dir_name}_ensemble{args.ensemble}'
        models = []
        results = np.zeros((test_imgs.shape[0], 10))

    # define model
    for i in range(args.ensemble):
        model = eval(f'{args.model}')
        loss = keras.losses.categorical_crossentropy
        if args.optimizer == 'adam':
            optimizer = keras.optimizers.Adam(lr=0.001,
                                              beta_1=0.9,
                                              beta_2=0.999)
        if args.optimizer == 'adabound':
            optimizer = AdaBound(lr=1e-03,
                                 final_lr=0.1,
                                 gamma=1e-03,
                                 weight_decay=5e-4,
                                 amsbound=False)
        model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
        # model.summary()
        if args.ensemble > 1:
            models.append(model)

    # data generator
    datagen = MyImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.2,
        zoom_range=0.08,
        mix_up_alpha=args.mixup,
        #random_crop=(28, 28),
        random_erasing=True,
    )

    # train each model
    for i in range(args.ensemble):
        # train settings
        batch_size = args.batchsize
        initial_epoch = args.initialepoch
        epochs = args.epochs
        steps_per_epoch = train_imgs.shape[0] // batch_size

        if epochs > initial_epoch:
            if args.ensemble > 1:
                dir_name = f'{dir_name_base}/{i}'
                model = models[i]
            # load best weight if only already trained
            if len(sorted(glob.glob(f'./{dir_name}/*.hdf5'))):
                best_weight_path = sorted(
                    glob.glob(f'./{dir_name}/*.hdf5'))[-1]
                model.load_weights(best_weight_path)
                initial_epoch = re.search(r'weights.[0-9]{4}',
                                          best_weight_path)
                initial_epoch = int(initial_epoch.group().replace(
                    'weights.', ''))
            else:
                os.makedirs(f'./{dir_name}', exist_ok=True)

            # each epoch settings
            if validation_size > 0:
                reduce_lr = keras.callbacks.ReduceLROnPlateau(
                    monitor='val_loss',
                    factor=args.factor,
                    patience=args.patience,
                    verbose=1,
                    cooldown=1,
                    min_lr=1e-5)
                cp = keras.callbacks.ModelCheckpoint(
                    filepath=f'./{dir_name}' +
                    '/weights.{epoch:04d}-{loss:.6f}-{acc:.6f}-{val_loss:.6f}-{val_acc:.6f}.hdf5',
                    monitor='val_loss',
                    verbose=0,
                    save_best_only=True,
                    mode='auto')
            else:
                reduce_lr = keras.callbacks.ReduceLROnPlateau(
                    monitor='loss',
                    factor=args.factor,
                    patience=args.patience,
                    verbose=1,
                    cooldown=1,
                    min_lr=1e-5)
                cp = keras.callbacks.ModelCheckpoint(
                    filepath=f'./{dir_name}' +
                    '/weights.{epoch:04d}-{loss:.6f}-{acc:.6f}.hdf5',
                    monitor='loss',
                    verbose=0,
                    save_best_only=True,
                    mode='auto')
            cbs = [reduce_lr, cp]
            if args.swa:
                swa = SWA(f'{dir_name}/swa.hdf5', epochs - 40)
                cbs.append(swa)

            # start training
            print(f'===============train start:{dir_name}===============')
            history = model.fit_generator(
                datagen.flow(train_imgs, train_lbls, batch_size=batch_size),
                steps_per_epoch=steps_per_epoch,
                initial_epoch=initial_epoch,
                epochs=epochs,
                validation_data=(validation_imgs, validation_lbls),
                callbacks=cbs,
                verbose=1,
            )
            # output history
            plot_history(history, dir_name=dir_name)

    # test each model
    for i in range(args.ensemble):
        if args.ensemble > 1:
            dir_name = f'{dir_name_base}/{i}'
            model = models[i]
        print(f'test start:{dir_name}')

        # load best weight
        if len(sorted(glob.glob(f'./{dir_name}/weights*.hdf5'))) > 1:
            for p in sorted(glob.glob(f'./{dir_name}/weights*.hdf5'))[:-1]:
                os.remove(p)
        best_weight_path = sorted(glob.glob(f'./{dir_name}/weights*.hdf5'))[-1]
        if args.swa:
            print('Load SWA weights.')
            best_weight_path = sorted(glob.glob(f'./{dir_name}/swa.hdf5'))[-1]

        model.load_weights(best_weight_path)

        # test with test time augmentation
        predicts = TTA(model, test_imgs, tta_steps=50)
        np.save(f'./{dir_name}/predicts_vec.npy', predicts)
        if args.ensemble > 1:
            results += predicts

    # get argmax index
    if args.ensemble > 1:
        predict_labels = np.argmax(results, axis=1)
        dir_name = dir_name_base
    else:
        predict_labels = np.argmax(predicts, axis=1)

    # create submit file
    submit = pd.DataFrame(data={"ImageId": [], "Label": []})
    submit.ImageId = list(range(1, predict_labels.shape[0] + 1))
    submit.Label = predict_labels
    submit.to_csv(f"./{dir_name}/submit{nowtime}_{settings}.csv", index=False)
Exemple #21
0
def train(region):
    np.random.seed(0)
    torch.manual_seed(0)

    input_len = 10
    encoder_units = 32
    decoder_units = 64
    encoder_rnn_layers = 3
    encoder_dropout = 0.2
    decoder_dropout = 0.2
    input_size = 2
    output_size = 1
    predict_len = 5
    batch_size = 16
    epochs = 500
    force_teacher = 0.8

    train_dataset, test_dataset, train_max, train_min = create_dataset(
        input_len, predict_len, region)
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    test_loader = DataLoader(
        test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

    enc = Encoder(input_size, encoder_units, input_len,
                  encoder_rnn_layers, encoder_dropout)
    dec = Decoder(encoder_units*2, decoder_units, input_len,
                  input_len, decoder_dropout, output_size)

    optimizer = AdaBound(list(enc.parameters()) +
                         list(dec.parameters()), 0.01, final_lr=0.1)
    # optimizer = optim.Adam(list(enc.parameters()) + list(dec.parameters()), 0.01)
    criterion = nn.MSELoss()

    mb = master_bar(range(epochs))
    for ep in mb:
        train_loss = 0
        enc.train()
        dec.train()
        for encoder_input, decoder_input, target in progress_bar(train_loader, parent=mb):
            optimizer.zero_grad()
            enc_vec = enc(encoder_input)
            h = enc_vec[:, -1, :]
            _, c = dec.initHidden(batch_size)
            x = decoder_input[:, 0]
            pred = []
            for pi in range(predict_len):
                x, h, c = dec(x, h, c, enc_vec)
                rand = np.random.random()
                pred += [x]
                if rand < force_teacher:
                    x = decoder_input[:, pi]
            pred = torch.cat(pred, dim=1)
            # loss = quantile_loss(pred, target)
            loss = criterion(pred, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        test_loss = 0
        enc.eval()
        dec.eval()
        for encoder_input, decoder_input, target in progress_bar(test_loader, parent=mb):
            with torch.no_grad():
                enc_vec = enc(encoder_input)
                h = enc_vec[:, -1, :]
                _, c = dec.initHidden(batch_size)
                x = decoder_input[:, 0]
                pred = []
                for pi in range(predict_len):
                    x, h, c = dec(x, h, c, enc_vec)
                    pred += [x]
                pred = torch.cat(pred, dim=1)
            # loss = quantile_loss(pred, target)
            loss = criterion(pred, target)
            test_loss += loss.item()
        print(
            f"Epoch {ep} Train Loss {train_loss/len(train_loader)} Test Loss {test_loss/len(test_loader)}")

    if not os.path.exists("models"):
        os.mkdir("models")
    torch.save(enc.state_dict(), f"models/{region}_enc.pth")
    torch.save(dec.state_dict(), f"models/{region}_dec.pth")

    test_loader = DataLoader(test_dataset, batch_size=1,
                             shuffle=False, drop_last=False)

    rmse = 0
    p = 0
    predicted = []
    true_target = []
    enc.eval()
    dec.eval()
    for encoder_input, decoder_input, target in progress_bar(test_loader, parent=mb):
        with torch.no_grad():
            enc_vec = enc(encoder_input)
            x = decoder_input[:, 0]
            h, c = dec.initHidden(1)
            pred = []
            for pi in range(predict_len):
                x, h, c = dec(x, h, c, enc_vec)
                pred += [x]
            pred = torch.cat(pred, dim=1)
            predicted += [pred[0, p].item()]
            true_target += [target[0, p].item()]
    predicted = np.array(predicted).reshape(1, -1)
    predicted = predicted * (train_max - train_min) + train_min
    true_target = np.array(true_target).reshape(1, -1)
    true_target = true_target * (train_max - train_min) + train_min
    rmse, peasonr = calc_metric(predicted, true_target)
    print(f"{region} RMSE {rmse}")
    print(f"{region} r {peasonr[0]}")
    return f"{region} RMSE {rmse} r {peasonr[0]}"
Exemple #22
0
    metric_fc.to(device)

    params = [{
        'params': model.parameters()
    }, {
        'params': metric_fc.parameters()
    }]
    if Config.optimizer == 'sgd':
        optimizer = torch.optim.SGD(params,
                                    lr=opt.lr,
                                    weight_decay=opt.weight_decay,
                                    momentum=.9,
                                    nesterov=True)
    elif Config.optimizer == 'adabound':
        optimizer = AdaBound(params=params,
                             lr=opt.lr,
                             final_lr=opt.final_lr,
                             amsbound=opt.amsbound)
    elif Config.optimizer == 'adam':
        optimizer = torch.optim.Adam(params,
                                     lr=opt.lr,
                                     weight_decay=opt.weight_decay)
    else:
        raise ValueError('Invalid Optimizer Name: {}'.format(Config.optimizer))
    scheduler = StepLR(optimizer, step_size=opt.lr_step, gamma=0.1)

    callback_manager = CallbackManager([
        TensorboardLogger(log_dir=Config.checkpoints_path),
        LoggingCallback(),
        WeightCheckpointCallback(save_to=Config.checkpoints_path,
                                 metric_model=metric_fc)
    ])
 if args.optimizer == 'fromage':
     optimizer = Fromage(params, lr=args.lr)
 if args.optimizer == 'adamw':
     optimizer = AdamW(params, lr=args.lr, weight_decay=args.wdecay)
 if args.optimizer == 'radam':
     optimizer = RAdam(params, lr=args.lr, weight_decay=args.wdecay)
 if args.optimizer.lower() == 'adabelief':
     optimizer = AdaBelief(params,
                           lr=args.lr,
                           weight_decay=args.wdecay,
                           eps=args.eps,
                           betas=(args.beta1, args.beta2))
 if args.optimizer == 'adabound':
     optimizer = AdaBound(params,
                          lr=args.lr,
                          weight_decay=args.wdecay,
                          final_lr=30,
                          gamma=1e-3)
 if args.optimizer == 'amsbound':
     optimizer = AdaBound(params,
                          lr=args.lr,
                          weight_decay=args.wdecay,
                          final_lr=30,
                          gamma=1e-3,
                          amsbound=True)
 elif args.optimizer == 'yogi':
     optimizer = Yogi(params,
                      args.lr,
                      betas=(args.beta1, args.beta2),
                      weight_decay=args.wdecay)
 elif args.optimizer == 'msvag':
def train_model_v2_1(net,
                     trainloader,
                     validloader,
                     epochs,
                     lr,
                     grad_accum_steps=1,
                     warmup_epoch=1,
                     patience=5,
                     factor=0.5,
                     opt='AdaBound',
                     weight_decay=0.0,
                     loss_w=[0.5, 0.25, 0.25],
                     reference_labels=None,
                     cb_beta=0.99,
                     start_epoch=0,
                     opt_state_dict=None):
    """
    mixup, ReduceLROnPlateau, class balance
    """
    net = net.cuda()

    # loss
    loss_w = loss_w if loss_w is not None else [0.5, 0.25, 0.25]
    if reference_labels is None:
        if len(loss_w) == 3:
            criterion = multiloss_wrapper_v1_mixup(loss_funcs=[
                mixup.CrossEntropyLossForMixup(num_class=168),
                mixup.CrossEntropyLossForMixup(num_class=11),
                mixup.CrossEntropyLossForMixup(num_class=7)
            ],
                                                   weights=loss_w)
        elif len(loss_w) == 4:
            criterion = multiloss_wrapper_v1_mixup(loss_funcs=[
                mixup.CrossEntropyLossForMixup(num_class=168),
                mixup.CrossEntropyLossForMixup(num_class=11),
                mixup.CrossEntropyLossForMixup(num_class=7),
                mixup.CrossEntropyLossForMixup(num_class=1292)
            ],
                                                   weights=loss_w)

    else:
        if len(loss_w) == 3:
            criterion = multiloss_wrapper_v1_mixup(loss_funcs=[
                cbl.CB_CrossEntropyLoss(reference_labels[:, 0],
                                        num_class=168,
                                        beta=cb_beta,
                                        label_smooth=0.0),
                cbl.CB_CrossEntropyLoss(reference_labels[:, 1],
                                        num_class=11,
                                        beta=cb_beta,
                                        label_smooth=0.0),
                cbl.CB_CrossEntropyLoss(reference_labels[:, 2],
                                        num_class=7,
                                        beta=cb_beta,
                                        label_smooth=0.0)
            ],
                                                   weights=loss_w)
        elif len(loss_w) == 4:
            criterion = multiloss_wrapper_v1_mixup(loss_funcs=[
                cbl.CB_CrossEntropyLoss(reference_labels[:, 0],
                                        num_class=168,
                                        beta=cb_beta,
                                        label_smooth=0.0),
                cbl.CB_CrossEntropyLoss(reference_labels[:, 1],
                                        num_class=11,
                                        beta=cb_beta,
                                        label_smooth=0.0),
                cbl.CB_CrossEntropyLoss(reference_labels[:, 2],
                                        num_class=7,
                                        beta=cb_beta,
                                        label_smooth=0.0),
                cbl.CB_CrossEntropyLoss(reference_labels[:, 3],
                                        num_class=1292,
                                        beta=cb_beta,
                                        label_smooth=0.0)
            ],
                                                   weights=loss_w)

    test_criterion = multiloss_wrapper_v1(loss_funcs=[
        nn.CrossEntropyLoss(),
        nn.CrossEntropyLoss(),
        nn.CrossEntropyLoss()
    ],
                                          weights=loss_w)

    # opt
    if opt == 'SGD':
        optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9)
    elif opt == 'AdaBound':
        optimizer = AdaBound(net.parameters(),
                             lr=lr,
                             final_lr=0.1,
                             weight_decay=weight_decay)

    # scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode="min",
                                                     patience=patience,
                                                     factor=factor,
                                                     verbose=True)
    warmup_scheduler = WarmUpLR(optimizer, len(trainloader) * warmup_epoch)

    if opt_state_dict is not None:
        optimizer.load_state_dict(opt_state_dict)

    # train
    loglist = []
    val_loss = 100
    for epoch in range(start_epoch, epochs):
        if epoch > warmup_epoch - 1:
            scheduler.step(val_loss)

        print('epoch ', epoch)
        tr_log = _trainer_v1(net,
                             trainloader,
                             criterion,
                             optimizer,
                             epoch,
                             grad_accum_steps,
                             warmup_epoch,
                             warmup_scheduler,
                             use_mixup=True)
        vl_log = _tester_v1(net, validloader, test_criterion)
        loglist.append(list(tr_log) + list(vl_log))

        val_loss = vl_log[0]

        save_checkpoint(epoch, net, optimizer, 'checkpoint')
        save_log(loglist, 'training_log.csv')

    return net
Exemple #25
0
#========================================================================
# NN Setting
from nn_keras import MS_NN
# NN Model Setting
if is_debug:
    N_EPOCHS = 2
else:
    N_EPOCHS = 10
# learning_rate = 1e-4
learning_rate = 1e-3
first_batch = 10  # 7: 128
from adabound import AdaBound

adabound = AdaBound(lr=learning_rate,
                    final_lr=0.1,
                    gamma=1e-03,
                    weight_decay=0.,
                    amsbound=False)

model = MS_NN(input_cols=len(use_cols))
metric = "accuracy"

opt = optimizers.Adam(lr=learning_rate)
model.compile(loss="binary_crossentropy", optimizer=adabound, metrics=[metric])

callbacks = [
    EarlyStopping(monitor='val_loss', patience=2, verbose=0),
    ReduceLROnPlateau(monitor='val_loss',
                      factor=0.1,
                      patience=7,
                      verbose=1,
Exemple #26
0
def main():
    args = parse_args()
    update_config(cfg_hrnet, args)

    # create checkpoint dir
    if not isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # create model
    #print('networks.'+ cfg_hrnet.MODEL.NAME+'.get_pose_net')
    model = eval('models.' + cfg_hrnet.MODEL.NAME + '.get_pose_net')(
        cfg_hrnet, is_train=True)
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    # show net
    args.channels = 3
    args.height = cfg.data_shape[0]
    args.width = cfg.data_shape[1]
    #net_vision(model, args)

    # define loss function (criterion) and optimizer
    criterion = torch.nn.MSELoss(reduction='mean').cuda()

    #torch.optim.Adam
    optimizer = AdaBound(model.parameters(),
                         lr=cfg.lr,
                         weight_decay=cfg.weight_decay)

    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            pretrained_dict = checkpoint['state_dict']
            model.load_state_dict(pretrained_dict)
            args.start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            logger = Logger(join(args.checkpoint, 'log.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'LR', 'Train Loss'])

    cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    print('    Total params: %.2fMB' %
          (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4))

    train_loader = torch.utils.data.DataLoader(
        #MscocoMulti(cfg),
        KPloader(cfg),
        batch_size=cfg.batch_size * len(args.gpus))
    #, shuffle=True,
    #num_workers=args.workers, pin_memory=True)

    #for i, (img, targets, valid) in enumerate(train_loader):
    #    print(i, img, targets, valid)

    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch,
                                  cfg.lr_gamma)
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))

        # train for one epoch
        train_loss = train(train_loader, model, criterion, optimizer)
        print('train_loss: ', train_loss)

        # append logger file
        logger.append([epoch + 1, lr, train_loss])

        save_model(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            },
            checkpoint=args.checkpoint)

    logger.close()
Exemple #27
0
    #     lr *= 5e-1

    if epoch >= 150:
        lr *= 0.1
    print('Learning rate: ', lr)
    return lr

if n == 18:
    model = ResNet18(input_shape=input_shape, depth=depth)
else:
    model = ResNet34(input_shape=input_shape, depth=depth)

model.compile(loss='categorical_crossentropy',
              optimizer=AdaBound(lr=lr_schedule(0),
                                 final_lr=adabound_final_lr,
                                 gamma=adabound_gamma,
                                 weight_decay=weight_decay,
                                 amsbound=amsbound),
              metrics=['accuracy'])
model.summary()
print(model_type)

# Prepare model model saving directory.
save_dir = os.path.join(os.getcwd(), 'weights')
model_name = 'cifar10_%s_model.h5' % model_type
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = ModelCheckpoint(filepath=filepath,
Exemple #28
0
    def _init_model(self):

        self.train_queue, self.valid_queue = self._load_dataset_queue()

        def _init_scheduler():
            if 'cifar' in self.args.train_dataset:
                scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, float(self.args.epochs))
            else:
                scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, self.args.decay_period,
                                                            gamma=self.args.gamma)
            return scheduler

        genotype = eval('geno_types.%s' % self.args.arch)
        reduce_level = (0 if 'cifar10' in self.args.train_dataset else 0)
        model = EvalNetwork(self.args.init_channels, self.args.num_classes, 0,
                            self.args.layers, self.args.auxiliary, genotype, reduce_level)

        # Try move model to multi gpus
        if torch.cuda.device_count() > 1 and self.args.multi_gpus:
            self.logger.info('use: %d gpus', torch.cuda.device_count())
            model = nn.DataParallel(model)
        else:
            self.logger.info('gpu device = %d' % self.device_id)
            torch.cuda.set_device(self.device_id)
        self.model = model.to(self.device)

        self.logger.info('param size = %fM', dutils.calc_parameters_count(model))

        criterion = nn.CrossEntropyLoss()
        if self.args.num_classes >= 50:
            criterion = CrossEntropyLabelSmooth(self.args.num_classes, self.args.label_smooth)
        self.criterion = criterion.to(self.device)

        if self.args.opt == 'adam':
            self.optimizer = torch.optim.Adamax(
                model.parameters(),
                self.args.learning_rate,
                weight_decay=self.args.weight_decay
            )
        elif self.args.opt == 'adabound':
            self.optimizer = AdaBound(model.parameters(),
            self.args.learning_rate,
            weight_decay=self.args.weight_decay)
        else:
            self.optimizer = torch.optim.SGD(
                model.parameters(),
                self.args.learning_rate,
                momentum=self.args.momentum,
                weight_decay=self.args.weight_decay
            )

        self.best_acc_top1 = 0
        # optionally resume from a checkpoint
        if self.args.resume:
            if os.path.isfile(self.args.resume):
                print("=> loading checkpoint {}".format(self.args.resume))
                checkpoint = torch.load(self.args.resume)
                self.dur_time = checkpoint['dur_time']
                self.args.start_epoch = checkpoint['epoch']
                self.best_acc_top1 = checkpoint['best_acc_top1']
                self.args.drop_path_prob = checkpoint['drop_path_prob']
                self.model.load_state_dict(checkpoint['state_dict'])
                self.optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded checkpoint '{}' (epoch {})".format(self.args.resume, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(self.args.resume))

        self.scheduler = _init_scheduler()
        # reload the scheduler if possible
        if self.args.resume and os.path.isfile(self.args.resume):
            checkpoint = torch.load(self.args.resume)
            self.scheduler.load_state_dict(checkpoint['scheduler'])
Exemple #29
0
def main():
    parser = argparse.ArgumentParser(description='Training CC')
    parser.add_argument('-lr', type=float, help='learning rate (3e-4)')
    parser.add_argument('-net', help='network (resnet50, resnet34,...)')
    parser.add_argument(
        '-num_trainimages',
        type=int,
        default=28000,
        help=
        'number of training images (number < 28000). For otf1: number of images per epoch'
    )
    parser.add_argument('-dat_augment',
                        default=1,
                        type=int,
                        help='data augmentation during training? (0 or 1)')
    parser.add_argument('-otf',
                        default=1,
                        type=int,
                        help='on the fly data generation? (0 or 1')
    parser.add_argument('-unique', default='pairs', help='(pairs, unique)')
    # training bagnets requires smaller batch_size, otherwise memory issues
    parser.add_argument(
        '-batch_size',
        default=64,
        type=int,
        help='batchsize: default 64, for bagnets smaller b/c of memory issues')
    parser.add_argument(
        '-optimizer',
        default='Adam',
        help=
        'The default optimizer is Adam. Optionally, you can choose adabound ("adabound"), which is used for BagNet training.'
    )
    parser.add_argument(
        '-contrast',
        default='contrastrandom',
        help=
        'The default is to train on random contrast images. You can choose "contrast0"'
    )
    parser.add_argument('-n_epochs',
                        default=10,
                        type=int,
                        help='number of epochs')
    parser.add_argument(
        '-regularization',
        default=0,
        type=int,
        help=
        'Flag to choose (1) or not choose (0, default) regularization techniques: scaling, rotation and dropout.'
    )
    parser.add_argument(
        '-load_checkpoint',
        default='',
        help=
        'String to choose loading given checkpoint from best precision (1) or to opt for leaving initialization at ImageNet/random (empty string, default).'
    )
    parser.add_argument('-load_checkpoint_epoch', default=0, type=int, help='')
    parser.add_argument(
        '-crop_margin',
        default=0,
        type=int,
        help='crop 16 px margin from each side (1), keep original image (0)')

    args = parser.parse_args()

    print('regularization', args.regularization)

    # set seed for reproducibility
    np.random.seed(0)
    torch.manual_seed(0)
    torch.cuda.manual_seed_all(0)
    torch.backends.cudnn.deterministic = True

    epochs = args.n_epochs
    print('number of epochs:', epochs)
    # after this many epochs the learning rate decays by a factor of 0.1
    epoch_decay = epochs // 2
    now = datetime.datetime.now()  # add the date to the experiment name
    exp_name = args.net + '_lr' + str(args.lr) + '_numtrain' + str(
        args.num_trainimages) + '_augment' + str(args.dat_augment) + '_' + str(
            args.unique) + '_batchsize' + str(
                args.batch_size) + '_optimizer' + str(
                    args.optimizer) + '_' + str(args.contrast) + '_reg' + str(
                        args.regularization) + '_otf' + str(
                            args.otf) + '_cropmargin' + str(
                                args.crop_margin) + '_' + str(now.month) + str(
                                    now.day) + str(now.year)

    if args.load_checkpoint:
        exp_name = '_CONTINUED_FINETUNING_' + exp_name

    # load model
    print('load model')
    if args.net[:6] == 'bagnet':
        model = my_models.load_model(args.net, args.regularization)
    else:
        model = my_models.load_model(args.net)

    # load checkpoint if resuming fine-tuning from later epoch
    if args.load_checkpoint:
        model.load_state_dict(
            torch.load('cc_checkpoints/' + args.load_checkpoint +
                       '/best_prec.pt'))

    # load dataset
    print('load dataset')
    valloader = cc_utils.load_dataset_cc(
        set_num=1,
        contrast=args.contrast,
        batch_size=args.batch_size,
        split='val',
        regularization=args.
        regularization,  # whether to use super-augmentation
        crop_margin=args.crop_margin)  # crop 16px margin

    if args.otf:  # online datageneration. Works only for set1, contrast0, unique, no dataaugmentation or regularisation
        dataset = cc_utils.Dataset_OTF(epoch_len=args.num_trainimages,
                                       crop_margin=args.crop_margin)
        trainloader = torch.utils.data.DataLoader(dataset,
                                                  batch_size=args.batch_size,
                                                  num_workers=8)
    else:
        trainloader = cc_utils.load_dataset_cc(
            set_num=1,
            contrast=args.contrast,
            batch_size=args.batch_size,
            split='trainmany',  # CAREFUL! This is the LARGE dataset
            regularization=args.
            regularization,  # whether to use super-augmentation
            num_trainimages=args.num_trainimages,
            dat_augment=args.dat_augment,
            unique=args.unique,  # number of images in the trainingset
            crop_margin=args.crop_margin)
    # loss criterion and optimizer
    criterion = nn.BCEWithLogitsLoss()

    if args.optimizer == 'Adam':
        optimizer = optim.Adam(
            filter(lambda p: p.requires_grad, model.parameters()),
            lr=args.lr)  # skip parameters that have requires_grad==False
    elif args.optimizer == 'adabound':
        optimizer = AdaBound(filter(lambda p: p.requires_grad,
                                    model.parameters()),
                             lr=args.lr,
                             final_lr=0.1)

    # create new checkpoints- and tensorboard-directories
    for version in range(100):
        checkpointdir = 'cc_checkpoints/' + \
            exp_name + '_v' + str(version) + '/'
        tensorboarddir = 'cc_tensorboard_logs/' + \
            exp_name + '_v' + str(version) + '/'
        # if checkpointdir already exists, skip it
        if not os.path.exists(checkpointdir):
            break
    print('tensorboarddir', tensorboarddir)
    os.makedirs(checkpointdir)
    os.makedirs(tensorboarddir)

    # create writer
    writer = SummaryWriter(tensorboarddir)
    print('writing to this tensorboarddir', tensorboarddir)

    # steps (x-axis) for plotting tensorboard
    step = 0
    best_prec = 0
    first_epoch = 0

    val_loss = [
    ]  # list to store all validation losses to detect plateau and potentially decrease the lr
    # if fine-tuning is continued, load old loss values to guarantee lr
    # adjustment works properly
    if args.load_checkpoint:
        with open('cc_checkpoints/' + args.load_checkpoint +
                  '/epoch_loss_lr.csv',
                  newline='') as csvfile:
            training_log = csv.reader(csvfile,
                                      delimiter=',',
                                      lineterminator='\n')
            for row_counter, row in enumerate(training_log):
                if row_counter == 1:  # skip title row
                    for list_idx in range(int(row[0])):
                        val_loss.append('NaN')
                    val_loss.append(row[1])
                elif row_counter > 1:
                    val_loss.append(float(row[1]))
            # first epoch is the one after the last epoch in the csv file
            first_epoch = int(row[0]) + 1
        csvfile.close()

    n_epoch_plateau = 25  # number of epochs over which the presence of a plateau is evaluated
    counter_lr_adjust = 1
    epoch_of_last_lr_adjust = 0

    with open(checkpointdir + '/epoch_loss_lr.csv', 'w') as csvFile:
        csv_writer = csv.writer(csvFile, delimiter=',', lineterminator='\n')
        csv_writer.writerow(['epoch', 'prec', 'loss', 'lr'])
    csvFile.close()

    net_string = args.net[:6]

    for epoch in range(first_epoch, epochs):
        print('current epoch ', epoch)

        print('train model')
        _, step = utils.train(net_string, model, args.regularization,
                              trainloader, optimizer, criterion, writer, epoch,
                              checkpointdir, step)

        # validate after every epoch
        print('validate model after training')
        prec, loss = utils.validate(net_string, model, args.regularization,
                                    valloader, criterion, writer, epoch, step)
        val_loss.append(loss)

        # save to csv file
        with open(checkpointdir + '/epoch_loss_lr.csv', 'a') as csvFile:
            csv_writer = csv.writer(csvFile,
                                    delimiter=',',
                                    lineterminator='\n')
            for param_group in optimizer.param_groups:  # find current lr
                curr_lr = param_group['lr']
            csv_writer.writerow([epoch, prec, loss, curr_lr])
        csvFile.close()

        # after more than n_epoch_plateaus, check if there is a plateau
        if epoch >= n_epoch_plateau:
            # only adjust lr if no adjustment has ever happened or
            # if the last adjustment happened more than n_epoch_plateau epochs
            # ago
            if epoch_of_last_lr_adjust == 0 or epoch - \
                    n_epoch_plateau >= epoch_of_last_lr_adjust:
                adjust_lr_counter = 0
                print('len(val_loss)', len(val_loss))
                for idx in range(epoch - n_epoch_plateau + 2, epoch + 1):
                    print('idx', idx)
                    if val_loss[idx] - val_loss[idx - 1] < 0.05:
                        adjust_lr_counter += 1
                    else:
                        break
                if adjust_lr_counter == n_epoch_plateau - 1:
                    print('adjust lr!!!')
                    utils.adjust_learning_rate_plateau(optimizer, epoch,
                                                       args.lr,
                                                       counter_lr_adjust)
                    counter_lr_adjust += 1
                    epoch_of_last_lr_adjust = epoch

        # remember best prec on valset and save checkpoint
        if prec > best_prec:
            best_prec = prec
            torch.save(model.state_dict(), checkpointdir + '/best_prec.pt')

        # save checkpoint for every epoch
        torch.save(
            model.state_dict(), checkpointdir + '/epoch' + str(epoch) +
            '_step' + str(step) + '.pt')

    # close writer
    writer.close()

    print('Wohoooo, completely done!')
Exemple #30
0
class TrainNetwork(object):
    """The main train network"""

    def __init__(self, args):
        super(TrainNetwork, self).__init__()
        self.args = args
        self.dur_time = 0
        self.logger = self._init_log()

        if not torch.cuda.is_available():
            self.logger.info('no gpu device available')
            sys.exit(1)

        self._init_hyperparam()
        self._init_random_and_device()
        self._init_model()

    def _init_hyperparam(self):
        if 'cifar100' == self.args.train_dataset:
            # cifar10:  6000 images per class, 10 classes, 50000 training images and 10000 test images
            # cifar100: 600 images per class, 100 classes, 500 training images and 100 testing images per class
            self.args.num_classes = 100
            self.args.layers = 20
            self.args.data = '/train_tiny_data/train_data/cifar100'
        elif 'imagenet' == self.args.train_dataset:
            self.args.data = '/train_data/imagenet'
            self.args.num_classes = 1000
            self.args.weight_decay = 3e-5
            self.args.report_freq = 100
            self.args.init_channels = 50
            self.args.drop_path_prob = 0
        elif 'tiny-imagenet' == self.args.train_dataset:
            self.args.data = '/train_tiny_data/train_data/tiny-imagenet'
            self.args.num_classes = 200
        elif 'food101' == self.args.train_dataset:
            self.args.data = '/train_tiny_data/train_data/food-101'
            self.args.num_classes = 101
            self.args.init_channels = 48

    def _init_log(self):
        self.args.save = '../logs/eval/' + self.args.arch + '/' + self.args.train_dataset + '/eval-{}-{}'.format(self.args.save, time.strftime('%Y%m%d-%H%M'))
        dutils.create_exp_dir(self.args.save, scripts_to_save=None)

        log_format = '%(asctime)s %(message)s'
        logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                            format=log_format, datefmt='%m/%d %I:%M:%S %p')
        fh = logging.FileHandler(os.path.join(self.args.save, 'log.txt'))
        fh.setFormatter(logging.Formatter(log_format))
        logger = logging.getLogger('Architecture Training')
        logger.addHandler(fh)
        return logger

    def _init_random_and_device(self):
        # Set random seed and cuda device
        np.random.seed(self.args.seed)
        cudnn.benchmark = True
        torch.manual_seed(self.args.seed)
        cudnn.enabled = True
        torch.cuda.manual_seed(self.args.seed)
        max_free_gpu_id, gpus_info = dutils.get_gpus_memory_info()
        self.device_id = max_free_gpu_id
        self.gpus_info = gpus_info
        self.device = torch.device('cuda:{}'.format(0 if self.args.multi_gpus else self.device_id))

    def _init_model(self):

        self.train_queue, self.valid_queue = self._load_dataset_queue()

        def _init_scheduler():
            if 'cifar' in self.args.train_dataset:
                scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, float(self.args.epochs))
            else:
                scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, self.args.decay_period,
                                                            gamma=self.args.gamma)
            return scheduler

        genotype = eval('geno_types.%s' % self.args.arch)
        reduce_level = (0 if 'cifar10' in self.args.train_dataset else 0)
        model = EvalNetwork(self.args.init_channels, self.args.num_classes, 0,
                            self.args.layers, self.args.auxiliary, genotype, reduce_level)

        # Try move model to multi gpus
        if torch.cuda.device_count() > 1 and self.args.multi_gpus:
            self.logger.info('use: %d gpus', torch.cuda.device_count())
            model = nn.DataParallel(model)
        else:
            self.logger.info('gpu device = %d' % self.device_id)
            torch.cuda.set_device(self.device_id)
        self.model = model.to(self.device)

        self.logger.info('param size = %fM', dutils.calc_parameters_count(model))

        criterion = nn.CrossEntropyLoss()
        if self.args.num_classes >= 50:
            criterion = CrossEntropyLabelSmooth(self.args.num_classes, self.args.label_smooth)
        self.criterion = criterion.to(self.device)

        if self.args.opt == 'adam':
            self.optimizer = torch.optim.Adamax(
                model.parameters(),
                self.args.learning_rate,
                weight_decay=self.args.weight_decay
            )
        elif self.args.opt == 'adabound':
            self.optimizer = AdaBound(model.parameters(),
            self.args.learning_rate,
            weight_decay=self.args.weight_decay)
        else:
            self.optimizer = torch.optim.SGD(
                model.parameters(),
                self.args.learning_rate,
                momentum=self.args.momentum,
                weight_decay=self.args.weight_decay
            )

        self.best_acc_top1 = 0
        # optionally resume from a checkpoint
        if self.args.resume:
            if os.path.isfile(self.args.resume):
                print("=> loading checkpoint {}".format(self.args.resume))
                checkpoint = torch.load(self.args.resume)
                self.dur_time = checkpoint['dur_time']
                self.args.start_epoch = checkpoint['epoch']
                self.best_acc_top1 = checkpoint['best_acc_top1']
                self.args.drop_path_prob = checkpoint['drop_path_prob']
                self.model.load_state_dict(checkpoint['state_dict'])
                self.optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded checkpoint '{}' (epoch {})".format(self.args.resume, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(self.args.resume))

        self.scheduler = _init_scheduler()
        # reload the scheduler if possible
        if self.args.resume and os.path.isfile(self.args.resume):
            checkpoint = torch.load(self.args.resume)
            self.scheduler.load_state_dict(checkpoint['scheduler'])

    def _load_dataset_queue(self):
        if 'cifar' in self.args.train_dataset:
            train_transform, valid_transform = dutils.data_transforms_cifar(self.args)
            if 'cifar10' == self.args.train_dataset:
                train_data = dset.CIFAR10(root=self.args.data, train=True, download=True, transform=train_transform)
                valid_data = dset.CIFAR10(root=self.args.data, train=False, download=True, transform=valid_transform)
            else:
                train_data = dset.CIFAR100(root=self.args.data, train=True, download=True, transform=train_transform)
                valid_data = dset.CIFAR100(root=self.args.data, train=False, download=True, transform=valid_transform)

            train_queue = torch.utils.data.DataLoader(
                train_data, batch_size = self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4
            )
            valid_queue = torch.utils.data.DataLoader(
                valid_data, batch_size = self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4
            )
        elif 'tiny-imagenet' == self.args.train_dataset:
            train_transform, valid_transform = dutils.data_transforms_tiny_imagenet()
            train_data = dartsdset.TinyImageNet200(self.args.data, train=True, download=True, transform=train_transform)
            valid_data = dartsdset.TinyImageNet200(self.args.data, train=False, download=True, transform=valid_transform)
            train_queue = torch.utils.data.DataLoader(
                train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4
            )
            valid_queue = torch.utils.data.DataLoader(
                valid_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4
            )
        elif 'imagenet' == self.args.train_dataset:
            traindir = os.path.join(self.args.data, 'train')
            validdir = os.path.join(self.args.data, 'val')
            train_transform, valid_transform = dutils.data_transforms_imagenet()
            train_data = dset.ImageFolder(
                traindir,train_transform)
            valid_data = dset.ImageFolder(
                validdir,valid_transform)

            train_queue = torch.utils.data.DataLoader(
                train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4)

            valid_queue = torch.utils.data.DataLoader(
                valid_data, batch_size=self.args.batch_size, shuffle=False, pin_memory=True, num_workers=4)
        elif 'food101' == self.args.train_dataset:
            traindir = os.path.join(self.args.data, 'train')
            validdir = os.path.join(self.args.data, 'val')
            train_transform, valid_transform = dutils.data_transforms_food101()
            train_data = dset.ImageFolder(
                traindir,train_transform)
            valid_data = dset.ImageFolder(
                validdir,valid_transform)

            train_queue = torch.utils.data.DataLoader(
                train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4)

            valid_queue = torch.utils.data.DataLoader(
                valid_data, batch_size=self.args.batch_size, shuffle=False, pin_memory=True, num_workers=4)

        return train_queue, valid_queue

    def run(self):
        self.logger.info('args = %s', self.args)
        run_start = time.time()
        for epoch in range(self.args.start_epoch, self.args.epochs):
            self.scheduler.step()
            self.logger.info('epoch % d / %d  lr %e', epoch, self.args.epochs, self.scheduler.get_lr()[0])

            if self.args.no_dropout:
                self.model._drop_path_prob = 0
            else:
                self.model._drop_path_prob = self.args.drop_path_prob * epoch / self.args.epochs
                self.logger.info('drop_path_prob %e', self.model._drop_path_prob)

            train_acc, train_obj = self.train()
            self.logger.info('train loss %e, train acc %f', train_obj, train_acc)

            valid_acc_top1, valid_acc_top5, valid_obj = self.infer()
            self.logger.info('valid loss %e, top1 valid acc %f top5 valid acc %f',
                        valid_obj, valid_acc_top1, valid_acc_top5)
            self.logger.info('best valid acc %f', self.best_acc_top1)

            is_best = False
            if valid_acc_top1 > self.best_acc_top1:
                self.best_acc_top1 = valid_acc_top1
                is_best = True

            dutils.save_checkpoint({
                'epoch': epoch+1,
                'dur_time': self.dur_time + time.time() - run_start,
                'state_dict': self.model.state_dict(),
                'drop_path_prob': self.args.drop_path_prob,
                'best_acc_top1': self.best_acc_top1,
                'optimizer': self.optimizer.state_dict(),
                'scheduler': self.scheduler.state_dict()
            }, is_best, self.args.save)
        self.logger.info('train epoches %d, best_acc_top1 %f, dur_time %s',
                         self.args.epochs, self.best_acc_top1, dutils.calc_time(self.dur_time + time.time() - run_start))

    def train(self):
        objs = dutils.AverageMeter()
        top1 = dutils.AverageMeter()
        top5 = dutils.AverageMeter()

        self.model.train()

        for step, (input, target) in enumerate(self.train_queue):

            input = input.cuda(self.device, non_blocking=True)
            target = target.cuda(self.device, non_blocking=True)

            self.optimizer.zero_grad()
            logits, logits_aux = self.model(input)
            loss = self.criterion(logits, target)
            if self.args.auxiliary:
                loss_aux = self.criterion(logits_aux, target)
                loss += self.args.auxiliary_weight*loss_aux
            loss.backward()
            nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip)
            self.optimizer.step()

            prec1, prec5 = dutils.accuracy(logits, target, topk=(1,5))
            n = input.size(0)
            objs.update(loss.item(), n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)

            if step % args.report_freq == 0:
                self.logger.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

        return top1.avg, objs.avg

    def infer(self):
        objs = dutils.AverageMeter()
        top1 = dutils.AverageMeter()
        top5 = dutils.AverageMeter()
        self.model.eval()
        with torch.no_grad():
            for step, (input, target) in enumerate(self.valid_queue):
                input = input.cuda(self.device, non_blocking=True)
                target = target.cuda(self.device, non_blocking=True)

                logits, _ = self.model(input)
                loss = self.criterion(logits, target)

                prec1, prec5 = dutils.accuracy(logits, target, topk=(1,5))
                n = input.size(0)
                objs.update(loss.item(), n)
                top1.update(prec1.item(), n)
                top5.update(prec5.item(), n)

                if step % args.report_freq == 0:
                    self.logger.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)
            return top1.avg, top5.avg, objs.avg