Example #1
0
def main_worker(local_rank, args):
    args.local_rank = local_rank
    # prepare dist environment
    dist.init_process_group(backend='nccl', rank=args.local_rank, world_size=args.world_size)
    torch.cuda.set_device(args.local_rank)
    network = ShuffleNetV1(input_size=cfg.image_size, n_class=cfg.num_classes, model_size='2.0x', group=3)
    network = network.cuda()
    criterion = CrossEntropyLabelSmooth(cfg.num_classes, 0.1)
    optimizer = optim.SGD(network.parameters(), lr=cfg.lr_init, momentum=cfg.SGD_momentum, weight_decay=cfg.SGD_weight_decay)
    dataloader_train = create_dataset_pytorch_imagenet_dist_train(
            data_path=args.data_path+'train', local_rank=local_rank, n_workers=cfg.n_workers)
    dataloader_test = create_dataset_pytorch_imagenet(data_path=args.data_path+'val', is_train=False, n_workers=cfg.n_workers)
    step_per_epoch = len(dataloader_train)
    total_iters = step_per_epoch * cfg.epoch_size
    scheduler = optim.lr_scheduler.LambdaLR(optimizer,
                    lambda step : (1.0 - step * 1.0 / total_iters) if step <= total_iters else 0, 
                    last_epoch=-1)
    summary_writer = None
    if local_rank == 0:
        summary_writer = SummaryWriter(log_dir='./summary')
    trainer = Trainer(network=network, criterion=criterion, optimizer=optimizer, scheduler=scheduler,
                      dataloader_train=dataloader_train, dataloader_test=dataloader_test,
                      summary_writer=summary_writer, epoch_size=cfg.epoch_size,
                      ckpt_path=args.ckpt_path, local_rank=local_rank)

    for epoch_id in range(cfg.epoch_size):
        trainer.step()

    if local_rank == 0:
        summary_writer.close()
Example #2
0
 def __init__(self, context: det.TrialContext) -> None:
     self.context = context
     self.data_config = context.get_data_config()
     self.criterion = CrossEntropyLabelSmooth(
         context.get_hparam("num_classes"),  # num classes
         context.get_hparam("label_smoothing_rate"),
     )
     self.last_epoch_idx = -1
Example #3
0
    def __init__(self, context: PyTorchTrialContext) -> None:
        self.context = context
        self.data_config = context.get_data_config()
        self.criterion = CrossEntropyLabelSmooth(
            context.get_hparam("num_classes"),  # num classes
            context.get_hparam("label_smoothing_rate"),
        )
        self.last_epoch_idx = -1

        self.model = self.context.wrap_model(self.build_model_from_config())

        self.optimizer = self.context.wrap_optimizer(
            torch.optim.SGD(
                self.model.parameters(),
                lr=self.context.get_hparam("learning_rate"),
                momentum=self.context.get_hparam("momentum"),
                weight_decay=self.context.get_hparam("weight_decay"),
            ))

        self.lr_scheduler = self.context.wrap_lr_scheduler(
            self.build_lr_scheduler_from_config(self.optimizer),
            step_mode=LRScheduler.StepMode.STEP_EVERY_EPOCH,
        )
Example #4
0
    def train(self):
        torch.multiprocessing.set_sharing_strategy('file_system')
        args = arg_parser()
        logger = log()
        model_root = './model_source'
        if not os.path.exists(model_root):
            os.mkdir(model_root)
        time_stamp_launch = time.strftime('%Y%m%d') + '-' + time.strftime(
            '%H%M')
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
        n_gpus = len(args.gpu.split(','))

        # set parameters
        path = args.data_root
        label_file = args.label_file
        batch_size = args.batchsize
        epochs = args.max_epoch
        best_acc = 0

        dataset_name = path.split('/')[-2]

        logger.info(
            path.split('/')[-2] + '_' + time_stamp_launch +
            'model : resnet101  lr: %s' % args.lr)
        logger.info('dataset is: ' + dataset_name)

        net = resnet101(pretrained=True)
        input_dim = net.fc.in_features
        net.fc = weightNorm(nn.Linear(input_dim, 12), name="weight")
        net = net.cuda()

        param_group = []
        for k, v in net.named_parameters():
            if k[:2] == 'fc':
                param_group += [{'params': v, 'lr': args.lr}]
            else:
                param_group += [{'params': v, 'lr': args.lr * 0.1}]

        loss = CrossEntropyLabelSmooth(num_classes=12).cuda()

        optimizer = optim.SGD(param_group, momentum=0.9, weight_decay=5e-4)
        scheduler = MultiStepLR(optimizer,
                                milestones=args.MultiStepLR,
                                gamma=0.1)

        # training dataset
        transform_train = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomCrop((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406),
                                 (0.229, 0.224, 0.225)),  # grayscale mean/std
        ])
        # train_dataset = AsoctDataset(path, label_file, args.imgs_per_volume, train=True, transform=transform_train)
        train_dataset = visDataset(path,
                                   label_file,
                                   train=True,
                                   transform=transform_train)
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=2 * n_gpus if n_gpus <= 2 else 2)

        transform_test = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406),
                                 (0.229, 0.224, 0.225)),  # grayscale mean/std
        ])
        val_dataset = visDataset(path,
                                 label_file,
                                 train=False,
                                 transform=transform_test)
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 num_workers=2 *
                                                 n_gpus if n_gpus <= 2 else 2)

        for i in range(epochs):
            accnum = 0.0
            total = 0.0
            running_loss = []
            net.train()

            for j, (img_data, img_label, ind) in enumerate(train_loader):
                img_data = img_data.cuda()
                img_label = img_label.cuda()
                r_loss, correct_num, bs_num = self.train_process(
                    net, optimizer, img_data, img_label, loss)
                running_loss += [r_loss]
                total += bs_num
                accnum += correct_num

            scheduler.step()
            avg_loss = np.mean(running_loss)
            temp_acc = 100 * np.float(accnum) / np.float(total)
            logger.info("Epoch %d running_loss=%.3f" % (i + 1, avg_loss))
            logger.info(
                "Accuracy of the prediction on the train dataset : %f %%" %
                (temp_acc))

            # valuate the model
            acc = val_source(net, val_loader)
            if acc >= best_acc:
                logger.info('saving the best model!')
                torch.save(
                    net, './model_source/' + time_stamp_launch + '-' +
                    dataset_name + '9_1_resnet50_best.pkl')
                best_acc = acc
            else:
                torch.save(
                    net, './model_source/' + time_stamp_launch + '-' +
                    dataset_name + '9_1_resnet50_last.pkl')

            logger.info('best acc is : %.04f, acc is : %.04f' %
                        (best_acc, acc))
            logger.info('================================================')

        logger.info("Finished  Training")
Example #5
0
def main():
    
    
    #LOAD CONFIGS################################################################
    args = get_args()
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_no
    
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout, level=logging.INFO,
        format=log_format, datefmt='%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)
    
    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True
    
#     cudnn.enabled=True
    torch.cuda.manual_seed(str(args.rand_seed))
    random.seed(args.rand_seed) 
    #LOAD DATA###################################################################
    def convert_param(original_lists):
      ctype, value = original_lists[0], original_lists[1]
      is_list = isinstance(value, list)
      if not is_list: value = [value]
      outs = []
      for x in value:
        if ctype == 'int':
          x = int(x)
        elif ctype == 'str':
          x = str(x)
        elif ctype == 'bool':
          x = bool(int(x))
        elif ctype == 'float':
          x = float(x)
        elif ctype == 'none':
          if x.lower() != 'none':
            raise ValueError('For the none type, the value must be none instead of {:}'.format(x))
          x = None
        else:
          raise TypeError('Does not know this type : {:}'.format(ctype))
        outs.append(x)
      if not is_list: outs = outs[0]
      return outs

    if args.dataset == 'cifar100':
        mean = [x / 255 for x in [129.3, 124.1, 112.4]]
        std  = [x / 255 for x in [68.2, 65.4, 70.4]]
        lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]
        transform_train = transforms.Compose(lists)
        transform_test  = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])

        with open('../data/cifar-split.txt', 'r') as f:
            data = json.load(f)
            content = { k: convert_param(v) for k,v in data.items()}
            Arguments = namedtuple('Configure', ' '.join(content.keys()))
            content   = Arguments(**content)

        cifar_split = content
        train_split, valid_split = cifar_split.train, cifar_split.valid
    
        print(len(train_split),len(valid_split))
    
        train_dataset = datasets.CIFAR100(root='../data', train=True, download=True, transform=transform_train)
    
    
        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=False,sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split),
            num_workers=4, pin_memory=use_gpu)

        train_dataprovider = DataIterator(train_loader)

        val_loader = torch.utils.data.DataLoader(
            datasets.CIFAR100(root='../data', train=True, download=True, transform=transform_test),
            batch_size=250, shuffle=False, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split),
            num_workers=4, pin_memory=use_gpu
        )

        val_dataprovider = DataIterator(val_loader)
        print('load data successfully')
        CLASS = 100
    elif args.dataset == 'cifar10':
        mean = [x / 255 for x in [125.3, 123.0, 113.9]]
        std  = [x / 255 for x in [63.0, 62.1, 66.7]]
        lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]
        transform_train = transforms.Compose(lists)
        transform_test  = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
        with open('../data/cifar-split.txt', 'r') as f:
            data = json.load(f)
            content = { k: convert_param(v) for k,v in data.items()}
            Arguments = namedtuple('Configure', ' '.join(content.keys()))
            content   = Arguments(**content)

        cifar_split = content
        train_split, valid_split = cifar_split.train, cifar_split.valid

        print(len(train_split),len(valid_split))

        train_dataset = datasets.CIFAR10(root='../data', train=True, download=True, transform=transform_train)


        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=False,sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split),
            num_workers=4, pin_memory=use_gpu)

        train_dataprovider = DataIterator(train_loader)

        val_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10(root='../data', train=True, download=True, transform=transform_test),
            batch_size=250, shuffle=False, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split),
            num_workers=4, pin_memory=use_gpu
        )

        val_dataprovider = DataIterator(val_loader)
        print('load data successfully')
        CLASS = 10
    elif args.dataset == 'image16':
        mean = [x / 255 for x in [122.68, 116.66, 104.01]]
        std  = [x / 255 for x in [63.22,  61.26 , 65.09]]
        transform_test  = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
        with open('../data/ImageNet16-120-split.txt', 'r') as f:
            data = json.load(f)
            content = { k: convert_param(v) for k,v in data.items()}
            Arguments = namedtuple('Configure', ' '.join(content.keys()))
            content   = Arguments(**content)
        img_split = content
        train_split, valid_split = img_split.train, img_split.valid
        train_split = train_split[:len(train_split)//args.batch_size*args.batch_size]
        valid_split = valid_split[:len(valid_split)//250*250]
        print(len(train_split),len(valid_split))
        train_dataset = ImageNet16('../data', True , transform_test,120)
        test_dataset  = ImageNet16('../data', True, transform_test,120)
    
        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=False,sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split),
            num_workers=4, pin_memory=use_gpu)

        train_dataprovider = DataIterator(train_loader)

        val_loader = torch.utils.data.DataLoader(
                test_dataset,
            batch_size=250, shuffle=False, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split),
            num_workers=4, pin_memory=use_gpu
        )

        val_dataprovider = DataIterator(val_loader)
        
        print('load data successfully')
        CLASS = 120
        
    print(CLASS)
    print(args.init_channels,args.stacks//3)
    model = TinyNetwork(C=args.init_channels,N=args.stacks//3,max_nodes = 4, num_classes = CLASS, search_space = NAS_BENCH_201, affine = False, track_running_stats = False).cuda()
    
    
    optimizer = torch.optim.SGD(get_parameters(model),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    
    criterion_smooth = CrossEntropyLabelSmooth(CLASS, 0.1)
    
    if use_gpu:

        loss_function = criterion_smooth.cuda()
        device = torch.device("cuda" )
        
    else:
        
        loss_function = criterion_smooth
        device = torch.device("cpu")
        
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,args.total_iters)
    model = model.to(device)
    
    all_iters = 0
    if args.auto_continue:
        lastest_model, iters = get_lastest_model()
        if lastest_model is not None:
            all_iters = iters
            checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            print('load from checkpoint')
            for i in range(iters):
                scheduler.step()

    args.optimizer = optimizer
    args.loss_function = loss_function
    args.scheduler = scheduler
    
    
    
    
    args.train_dataprovider = train_dataprovider
    args.val_dataprovider = val_dataprovider
    
    args.evo_controller = evolutionary(args.max_population,args.select_number, args.mutation_len,args.mutation_number,args.p_opwise,args.evo_momentum)
    
    
    
    
    
    path = './record_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}'.format(args.dataset,args.stacks,args.init_channels,args.total_iters,args.warmup_iters,args.max_population,args.select_number,args.mutation_len,args.mutation_number,args.val_interval,args.p_opwise,args.evo_momentum,args.rand_seed)
    
    logging.info(path)
    
    model.current_N = 1
    
    while all_iters < args.total_iters:
        
        if all_iters in [15000,30000,45000,60000]:
#         if all_iters in [50,100,150,200]:
#             print("----------")
            model.current_N += 1
        
        if all_iters > 1 and all_iters % args.val_interval == 0:
            results = []
            for structure_father in args.evo_controller.group:
                results.append([structure_father.structure,structure_father.loss,structure_father.count])
            if not os.path.exists(path):
                os.mkdir(path)
                
            with open(path + '/%06d-ep.txt'%all_iters,'w') as tt:
                json.dump(results,tt)
            
            
            if all_iters >= args.warmup_iters:#warmup
                args.evo_controller.select()
                
                
            
        
            
            
        all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters)
        validate(model, device, args, all_iters=all_iters)
        
    results = []
    for structure_father in args.evo_controller.group:
        results.append([structure_father.structure,structure_father.loss,structure_father.count])
    with open(path + '/%06d-ep.txt'%all_iters,'w') as tt:
        json.dump(results,tt)
Example #6
0
    parser.add_argument("--test-batch-size", type=int, default=512)
    parser.add_argument("--log-frequency", type=int, default=10)

    args = parser.parse_args()

    # use a fixed set of image will improve the performance
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)
    torch.backends.cudnn.deterministic = True

    assert torch.cuda.is_available()

    model = ShuffleNetV2OneShot()
    criterion = CrossEntropyLabelSmooth(1000, 0.1)
    get_and_apply_next_architecture(model)
    model.load_state_dict(load_and_parse_state_dict(filepath=args.checkpoint))
    model.cuda()

    train_loader = get_imagenet_iter_dali(
        "train",
        args.imagenet_dir,
        args.train_batch_size,
        args.workers,
        spos_preprocessing=args.spos_preprocessing,
        seed=args.seed,
        device_id=0)
    val_loader = get_imagenet_iter_dali(
        "val",
        args.imagenet_dir,
Example #7
0
                        type=str,
                        default="./checkpoint",
                        help='path where the checkpoint to be saved')
    parser.add_argument('--device_id',
                        type=int,
                        default=0,
                        help='device id of GPU. (Default: 0)')
    args = parser.parse_args()

    device = torch.device('cuda:' + str(args.device_id))
    network = ShuffleNetV1(input_size=cfg.image_size,
                           n_class=cfg.num_classes,
                           model_size='2.0x',
                           group=3)
    network.to(device)
    criterion = CrossEntropyLabelSmooth(cfg.num_classes, 0.1)
    optimizer = optim.SGD(network.parameters(),
                          lr=cfg.lr_init,
                          momentum=cfg.SGD_momentum,
                          weight_decay=cfg.SGD_weight_decay)
    dataloader_train = create_dataset_pytorch_cifar10(args.data_path)
    dataloader_test = create_dataset_pytorch_cifar10(args.data_path,
                                                     is_train=False)

    total_iters = len(dataloader_train) * cfg.epoch_size
    scheduler = optim.lr_scheduler.LambdaLR(optimizer,
                                            lambda step:
                                            (1.0 - step * 1.0 / total_iters)
                                            if step <= total_iters else 0,
                                            last_epoch=-1)
    summary_writer = SummaryWriter(log_dir='./summary')
def prepare(args, RCV_CONFIG):
    args.momentum = RCV_CONFIG['momentum']
    args.bn_process = True if RCV_CONFIG['bn_process'] == 'True' else False
    args.learning_rate = RCV_CONFIG['learning_rate']
    args.weight_decay = RCV_CONFIG['weight_decay']
    args.label_smooth = RCV_CONFIG['label_smooth']
    args.lr_scheduler = RCV_CONFIG['lr_scheduler']
    args.randAugment = True if RCV_CONFIG['randAugment'] == 'True' else False

    # if RCV_CONFIG['momentum'] == 'vgg':
    #     net = VGG('VGG19')
    # if RCV_CONFIG['model'] == 'resnet18':
    #     net = ResNet18()
    # if RCV_CONFIG['model'] == 'googlenet':
    #     net = GoogLeNet()

    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True

    if args.cifar100:
        # train_dataprovider, val_dataprovider, train_step, valid_step = dataset_cifar.get_dataset("cifar100", batch_size=args.batch_size, RandA=args.randAugment)

        train_dataprovider, val_dataprovider, train_step, valid_step = dataset_cifar.get_dataset(
            "cifar10", batch_size=args.batch_size, RandA=args.randAugment)
        print('load data successfully')
    else:
        assert os.path.exists(args.train_dir)
        from dataset import DataIterator, SubsetSampler, OpencvResize, ToBGRTensor
        train_dataset = datasets.ImageFolder(
            args.train_dir,
            transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.ColorJitter(brightness=0.4,
                                       contrast=0.4,
                                       saturation=0.4),
                transforms.RandomHorizontalFlip(0.5),
                ToBGRTensor(),
            ]))
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=1,
                                                   pin_memory=use_gpu)
        train_dataprovider = DataIterator(train_loader)

        assert os.path.exists(args.val_dir)
        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            args.val_dir,
            transforms.Compose([
                OpencvResize(256),
                transforms.CenterCrop(224),
                ToBGRTensor(),
            ])),
                                                 batch_size=200,
                                                 shuffle=False,
                                                 num_workers=1,
                                                 pin_memory=use_gpu)
        val_dataprovider = DataIterator(val_loader)
        print('load data successfully')

    # Imagenet
    # from network import ShuffleNetV2_OneShot
    # model = ShuffleNetV2_OneShot(n_class=1000)

    # Special for cifar
    from network_origin import cifar_fast
    model = cifar_fast(input_size=32, n_class=100)

    # Optimizer
    optimizer = get_optim(args, model)

    # Label Smooth
    if args.label_smooth > 0:
        criterion = CrossEntropyLabelSmooth(100, args.label_smooth)
    else:
        # print('CrossEntropyLoss')
        criterion = nn.CrossEntropyLoss()

    if args.lr_scheduler == 'Lambda':
        scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer,
            lambda step: (1.0 - step / (args.epochs * train_step))
            if step <= (args.epochs * train_step) else 0,
            last_epoch=-1)
    elif args.lr_scheduler == 'Cosine':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=args.epochs, eta_min=1e-8, last_epoch=-1)

    if use_gpu:
        model = nn.DataParallel(model)
        cudnn.benchmark = True
        loss_function = criterion.cuda()
        device = torch.device("cuda")
    else:
        loss_function = criterion
        device = torch.device("cpu")
    model = model.to(device)

    args.optimizer = optimizer
    args.loss_function = loss_function
    args.scheduler = scheduler
    args.train_dataprovider = train_dataprovider
    args.val_dataprovider = val_dataprovider
    args.best_acc = 0.0
    args.all_iters = 1

    return model, device, train_step, valid_step
Example #9
0
def get_cand_err(model, cand, args):
    global train_dataprovider, val_dataprovider

    if train_dataprovider is None:
        use_gpu = False
        train_dataprovider = get_train_dataprovider(args.train_batch_size,
                                                    use_gpu=True,
                                                    num_workers=32)
        val_dataprovider = get_val_dataprovider(args.test_batch_size,
                                                use_gpu=True,
                                                num_workers=32)

    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    max_train_iters = args.max_train_iters
    max_test_iters = args.max_test_iters

    print('clear bn statics....')
    # for m in model.modules():
    #     if isinstance(m, torch.nn.BatchNorm2d):
    #         m.running_mean = torch.zeros_like(m.running_mean)
    #         m.running_var = torch.ones_like(m.running_var)

    print('train bn with training set (BN sanitize) ....')
    # meta_model = ShuffleNetV2_OneShot()
    # meta_model = nn.DataParallel(meta_model)
    # meta_model = meta_model.to(device)
    # for p, q in zip(model.parameters(), meta_model.parameters()):
    #     if p is not None:
    #         q = p.clone()

    optimizer = torch.optim.SGD(get_parameters(model), lr=0.001)
    criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)
    loss_function = criterion_smooth.cuda()
    model.train()

    for step in tqdm.tqdm(range(max_train_iters)):
        # print('train step: {} total: {}'.format(step,max_train_iters))
        data, target = train_dataprovider.next()
        # print('get data',data.shape)

        target = target.type(torch.LongTensor)

        data, target = data.to(device), target.to(device)

        # print(type(data))
        # data = data.requires_grad_()
        # data = torch.tensor(data.data, requires_grad=True)
        # data = data.cuda()
        # # target.requires_grad=True
        output = model(data, cand)

        # if step<10:
        #     loss = loss_function(output, target)
        #     optimizer.zero_grad()
        #     loss.backward()
        #     optimizer.step()

        del data, target, output

    top1 = 0
    top5 = 0
    total = 0

    print('starting test....')
    model.eval()

    for step in tqdm.tqdm(range(max_test_iters)):
        # print('test step: {} total: {}'.format(step,max_test_iters))
        data, target = val_dataprovider.next()
        batchsize = data.shape[0]
        # print('get data',data.shape)
        target = target.type(torch.LongTensor)
        data, target = data.to(device), target.to(device)

        logits = model(data, cand)

        prec1, prec5 = accuracy(logits, target, topk=(1, 5))

        # print(prec1.item(),prec5.item())

        top1 += prec1.item() * batchsize
        top5 += prec5.item() * batchsize
        total += batchsize

        del data, target, logits, prec1, prec5

    top1, top5 = top1 / total, top5 / total

    top1, top5 = 1 - top1 / 100, 1 - top5 / 100

    print('top1: {:.2f} top5: {:.2f}'.format(top1 * 100, top5 * 100))

    return top1, top5
Example #10
0
def _main():
    parser = argparse.ArgumentParser("SPOS Evolutional Search")
    parser.add_argument("--port", type=int, default=8084)
    parser.add_argument("--imagenet-dir", type=str, default="./data/imagenet")
    parser.add_argument("--checkpoint",
                        type=str,
                        default="./data/checkpoint-150000.pth.tar")
    parser.add_argument(
        "--spos-preprocessing",
        action="store_true",
        default=False,
        help="When true, image values will range from 0 to 255 and use BGR "
        "(as in original repo).")
    parser.add_argument("--seed", type=int, default=42)
    parser.add_argument("--workers", type=int, default=6)
    parser.add_argument("--train-batch-size", type=int, default=128)
    parser.add_argument("--train-iters", type=int, default=200)
    parser.add_argument("--test-batch-size", type=int, default=512)
    parser.add_argument("--log-frequency", type=int, default=10)
    parser.add_argument("--label-smoothing", type=float, default=0.1)
    parser.add_argument("--evolution-sample-size", type=int, default=10)
    parser.add_argument("--evolution-population-size", type=int, default=50)
    parser.add_argument("--evolution-cycles", type=int, default=10)
    parser.add_argument(
        "--latency-filter",
        type=str,
        default=None,
        help="Apply latency filter by calling the name of the applied hardware."
    )
    parser.add_argument("--latency-threshold", type=float, default=100)

    args = parser.parse_args()

    # use a fixed set of image will improve the performance
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)
    torch.backends.cudnn.deterministic = True

    assert torch.cuda.is_available()

    base_model = ShuffleNetV2OneShot()
    criterion = CrossEntropyLabelSmooth(1000, args.label_smoothing)

    if args.latency_filter:
        latency_filter = LatencyFilter(threshold=args.latency_threshold,
                                       predictor=args.latency_filter)
    else:
        latency_filter = None

    evaluator = FunctionalEvaluator(evaluate_acc,
                                    criterion=criterion,
                                    args=args)
    evolution_strategy = strategy.RegularizedEvolution(
        model_filter=latency_filter,
        sample_size=args.evolution_sample_size,
        population_size=args.evolution_population_size,
        cycles=args.evolution_cycles)
    exp = RetiariiExperiment(base_model,
                             evaluator,
                             strategy=evolution_strategy)

    exp_config = RetiariiExeConfig('local')
    exp_config.trial_concurrency = 2
    exp_config.trial_gpu_number = 1
    exp_config.max_trial_number = args.evolution_cycles
    exp_config.training_service.use_active_gpu = False
    exp_config.execution_engine = 'base'
    exp_config.dummy_input = [1, 3, 224, 224]

    exp.run(exp_config, args.port)

    print('Exported models:')
    for i, model in enumerate(exp.export_top_models(formatter='dict')):
        print(model)
        with open(f'architecture_final_{i}.json', 'w') as f:
            json.dump(get_archchoice_by_model(model), f, indent=4)
Example #11
0
def pipeline(args, reporter):
    # Log for one Supernet
    floder = '{}/task_id_{}'.format(args.signal, args.task_id)
    path = os.path.join(arg.local, 'save', floder)
    if not os.path.isdir(path):
        os.makedirs(path)
    args.path = path

    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                        format=log_format, datefmt='%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)

    if not os.path.exists('{}/log'.format(path)):
        os.mkdir('{}/log'.format(path))
    fh = logging.FileHandler(
        os.path.join('{}/log/{}-task_id{}-train-{}{:02}{}'.format(path, args['signal'], args['task_id'], local_time.tm_year % 2000, local_time.tm_mon, t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)
    logging.info('{}-task_id: {}'.format(args.signal, args.task_id))

    # resource
    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True

    # load dataset
    if args.num_classes==10:
        dataset_train, dataset_valid = dataset_cifar.get_dataset("cifar10", N=args.randaug_n, M=args.randaug_m, RandA=args.RandA)
    elif args.num_classes==100:
        dataset_train, dataset_valid = dataset_cifar.get_dataset("cifar100", N=args.randaug_n, M=args.randaug_m, RandA=args.RandA)

    split = 0.0
    split_idx = 0
    train_sampler = None
    if split > 0.0:
        sss = StratifiedShuffleSplit(n_splits=5, test_size=split, random_state=0)
        sss = sss.split(list(range(len(dataset_train))), dataset_train.targets)
        for _ in range(split_idx + 1):
            train_idx, valid_idx = next(sss)
        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetSampler(valid_idx)
    else:
        valid_sampler = SubsetSampler([])

    train_loader = torch.utils.data.DataLoader(
        dataset_train, batch_size=args.batch_size, shuffle=True if train_sampler is None else False, num_workers=32,
        pin_memory=True,
        sampler=train_sampler, drop_last=True)

    # valid_loader = torch.utils.data.DataLoader(
    #     dataset_train, batch_size=args.batch_size, shuffle=False, num_workers=16, pin_memory=True,
    #     sampler=valid_sampler, drop_last=False)

    #
    valid_loader = torch.utils.data.DataLoader(
        dataset_valid, batch_size=args.batch_size, shuffle=False, num_workers=16, pin_memory=True,
        drop_last=False)

    train_dataprovider = DataIterator(train_loader)
    val_dataprovider = DataIterator(valid_loader)
    args.test_interval = len(valid_loader)
    args.val_interval = int(len(dataset_train) / args.batch_size) # step
    print('load data successfully')

    # network
    if args.block == 5:
        model = ShuffleNetV2_OneShot_cifar(block=args['block'], n_class=args.num_classes)
    elif args.block == 12:
        model = SuperNetwork(shadow_bn=True, layers=args['block'], classes=args.num_classes)
        print("param size = %fMB" % count_parameters_in_MB(model))
    elif args.block == 4:
        model = Network(num_classes=args.num_classes) # model = Network(net()).to(device).half()
    elif args.block == 3:
        model = Network_cifar(num_classes=args.num_classes)


    # lr and parameters
    # original optimizer lr & wd

    # test lr_range
    # args.learning_rate = args.learning_rate * (args['task_id']+ 1)

    # parameters divided into groups
    # test shuffle lr_group (4 stage * 5choice + 1base_lr == 21)
    # test mobile lr_group (12 stage * 12 choice + 1base_lr == 145)
    # test fast lr_group (3 stage * 1 choice + 1base_lr == 4)

    # lr_group = [i/100 for i in list(range(4,25,1))]
    # arch_search = list(np.random.randint(2) for i in range(5*2))
    # optimizer = torch.optim.SGD(get_dif_lr_parameters(model, lr_group, arch_search),

    if args.different_hpo:
        if args['block']==5:
            nums_lr_group = args['block'] * args['choice'] + 1
            lr_group = list(np.random.uniform(0.4, 0.8) for i in range(nums_lr_group))
            optimizer = torch.optim.SGD(shuffle_dif_lr_parameters(model, lr_group),
                                        momentum=args.momentum,
                                        weight_decay=args.weight_decay)
        elif args['block']==12:
            nums_lr_group=145
            lr_group = list(np.random.uniform(0.1, 0.3) for i in range(nums_lr_group))
            optimizer = torch.optim.SGD(mobile_dif_lr_parameters(model, lr_group),
                                        momentum=args.momentum,
                                        weight_decay=args.weight_decay)

        elif args['block']==4:
            nums_lr_group=4
            lr_l, lr_r = float(arg.lr_range.split(',')[0]), float(arg.lr_range.split(',')[1])
            lr_group = list(np.random.uniform(lr_l, lr_r) for i in range(nums_lr_group))
            optimizer = torch.optim.SGD(fast_dif_lr_parameters(model, lr_group),
                                        momentum=args.momentum,
                                        weight_decay=args.weight_decay)

        elif args['block'] == 3:
            nums_lr_group = 19 # 9 * 2 + 1
            lr_l, lr_r = float(arg.lr_range.split(',')[0]), float(arg.lr_range.split(',')[1])
            lr_group = list(np.random.uniform(lr_l, lr_r) for i in range(nums_lr_group))
            optimizer = torch.optim.SGD(fast_19_lr_parameters(model, lr_group),
                                        momentum=args.momentum,
                                        weight_decay=args.weight_decay)
            # log lr
            # for param_group in optimizer.param_groups:
            #     print(param_group['lr'])

            # save optim
            # torch.save(optimizer.state_dict(),'optimizer.pt')
            # optimizer.load_state_dict(torch.load('optimizer.pt'))

    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.learning_rate, # without hpo / glboal hpo
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    # optimizer = torch.optim.SGD(get_parameters(model),
    #                             lr=args.learning_rate,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay)

    # lookahead optimizer
    # base_opt = torch.optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999))
    # optimizer = Lookahead(base_opt, k=5, alpha=0.5)

    # blockly optimizer
    # base_opt_2 = torch.optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999))
    # base_opt_3 = torch.optim.Adam(model.parameters(), lr=1e-5, betas=(0.9, 0.999))
    # base_opt_group = [base_opt, base_opt_2, base_opt_3]
    # optimizer = BlocklyOptimizer(base_opt_group, k=5, alpha=0.5)

    # loss func, ls=0.1
    criterion_smooth = CrossEntropyLabelSmooth(10, args['label_smooth'])

    # lr_scheduler is related to total_iters
    scheduler = torch.optim.lr_scheduler.LambdaLR \
        (optimizer, lambda step: (1.0 - step / args.total_iters) if step <= args.total_iters else 0, last_epoch=-1)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    #     optimizer, float(args.total_iters / args.val_interval), eta_min=1e-8, last_epoch=-1)

    if use_gpu:
        model = nn.DataParallel(model)
        loss_function = criterion_smooth.cuda()
        device = torch.device("cuda")
    else:
        loss_function = criterion_smooth
        device = torch.device("cpu")

    model = model.to(device)

    all_iters = 0
    if args.auto_continue: # load model
        lastest_model, iters = get_lastest_model(args.path)
        if lastest_model is not None:
            all_iters = iters
            checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            print('load from checkpoint')
            for i in range(iters):
                scheduler.step() # lr Align

    args.optimizer = optimizer
    args.loss_function = loss_function
    args.scheduler = scheduler

    args.train_dataprovider = train_dataprovider
    args.val_dataprovider = val_dataprovider

    if args.eval:
        if args.eval_resume is not None:
            checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu')
            # model.load_state_dict(checkpoint, strict=True)
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            validate(model, device, args, all_iters=all_iters)
        exit(0)

    # according to total_iters
    while all_iters < args.total_iters:
        all_iters, Top1_acc = \
            train(model, device, args, bn_process=True, all_iters=all_iters, reporter=reporter)
Example #12
0
def prepare():
    args = get_args()

    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True

    if args.cifar100:
        train_dataprovider, val_dataprovider, train_step, valid_step = dataset_cifar.get_dataset(
            "cifar100", batch_size=args.batch_size, RandA=args.randAugment)
        print('load data successfully')
    else:
        assert os.path.exists(args.train_dir)
        from dataset import DataIterator, SubsetSampler, OpencvResize, ToBGRTensor
        train_dataset = datasets.ImageFolder(
            args.train_dir,
            transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.ColorJitter(brightness=0.4,
                                       contrast=0.4,
                                       saturation=0.4),
                transforms.RandomHorizontalFlip(0.5),
                ToBGRTensor(),
            ]))
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=1,
                                                   pin_memory=use_gpu)
        train_dataprovider = DataIterator(train_loader)

        assert os.path.exists(args.val_dir)
        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            args.val_dir,
            transforms.Compose([
                OpencvResize(256),
                transforms.CenterCrop(224),
                ToBGRTensor(),
            ])),
                                                 batch_size=200,
                                                 shuffle=False,
                                                 num_workers=1,
                                                 pin_memory=use_gpu)
        val_dataprovider = DataIterator(val_loader)
        print('load data successfully')

    # Imagenet
    # from network import ShuffleNetV2_OneShot
    # model = ShuffleNetV2_OneShot(n_class=1000)

    # Special for cifar
    from network_origin import cifar_fast
    model = cifar_fast(input_size=32, n_class=100)

    # Optimizer
    optimizer = get_optim(args, model)

    # Label Smooth
    if args.criterion_smooth:
        criterion = CrossEntropyLabelSmooth(100, 0.1)
    else:
        criterion = nn.CrossEntropyLoss()

    if args.lr_scheduler == 'Lambda':
        scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer,
            lambda step: (1.0 - step / (args.epochs * train_step))
            if step <= (args.epochs * train_step) else 0,
            last_epoch=-1)
    elif args.lr_scheduler == 'Cosine':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=args.epochs, eta_min=1e-8, last_epoch=-1)

    if use_gpu:
        model = nn.DataParallel(model)
        cudnn.benchmark = True
        loss_function = criterion.cuda()
        device = torch.device("cuda")
    else:
        loss_function = criterion
        device = torch.device("cpu")
    model = model.to(device)

    args.optimizer = optimizer
    args.loss_function = loss_function
    args.scheduler = scheduler
    args.train_dataprovider = train_dataprovider
    args.val_dataprovider = val_dataprovider
    args.best_acc = 0.0
    args.all_iters = 1

    start_epoch = 1
    for epoch in range(start_epoch, start_epoch + args.epochs):
        loss_output, train_acc = train_nni(args, model, device, epoch,
                                           train_step)
        acc, best_acc = test_nni(args, model, device, epoch, valid_step)
        print(
            'Epoch {}, loss/train acc = {:.2f}/{:.2f}, val acc/best acc = {:.2f}/{:.2f},'
            .format(epoch, loss_output, train_acc, acc, best_acc))
Example #13
0
def main():
    args = get_args()
    args.world_size = args.gpus * args.nodes
    args.rank = args.gpus * args.nr + args.local_rank
    print("RANK: " + str(args.rank) + ", LOCAL RANK: " + str(args.local_rank))

    # Log
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)
    if not os.path.exists('/home/admin/aihub/SinglePathOneShot/log'):
        os.mkdir('/home/admin/aihub/SinglePathOneShot/log')
    fh = logging.FileHandler(
        os.path.join(
            '/home/admin/aihub/SinglePathOneShot/log/train-{}{:02}{}'.format(
                local_time.tm_year % 2000, local_time.tm_mon, t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True

    assert os.path.exists(args.train_dir)
    train_dataset = datasets.ImageFolder(
        args.train_dir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.ColorJitter(brightness=0.4,
                                   contrast=0.4,
                                   saturation=0.4),
            transforms.RandomHorizontalFlip(0.5),
            ToBGRTensor(),
        ]))

    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset, num_replicas=args.world_size, rank=args.rank)
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=32,
                                               pin_memory=True,
                                               sampler=train_sampler)
    train_dataprovider = DataIterator(train_loader)

    assert os.path.exists(args.val_dir)
    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        args.val_dir,
        transforms.Compose([
            OpencvResize(256),
            transforms.CenterCrop(224),
            ToBGRTensor(),
        ])),
                                             batch_size=200,
                                             shuffle=False,
                                             num_workers=32,
                                             pin_memory=use_gpu)
    val_dataprovider = DataIterator(val_loader)

    print('load data successfully')

    dist.init_process_group(backend='nccl',
                            init_method='env://',
                            world_size=args.world_size,
                            rank=args.local_rank)
    #     dist.init_process_group(backend='nccl', init_method='tcp://'+args.ip+':'+str(args.port), world_size=args.world_size, rank=args.rank)
    #     dist.init_process_group(backend='nccl', init_method="file:///mnt/nas1/share_file", world_size=args.world_size, rank=args.rank)
    torch.cuda.set_device(args.local_rank)

    channels_scales = (1.0, ) * 20
    model = ShuffleNetV2_OneShot(architecture=list(args.arch),
                                 channels_scales=channels_scales)
    device = torch.device(args.local_rank)
    model = model.cuda(args.local_rank)

    optimizer = torch.optim.SGD(get_parameters(model),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)

    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lambda step: (1.0 - step / args.total_iters)
        if step <= args.total_iters else 0,
        last_epoch=-1)

    model = torch.nn.parallel.DistributedDataParallel(
        model, device_ids=[args.local_rank],
        find_unused_parameters=False)  #,output_device=args.local_rank) # ,
    loss_function = criterion_smooth.cuda()

    all_iters = 0

    args.optimizer = optimizer
    args.loss_function = loss_function
    args.scheduler = scheduler
    args.train_dataprovider = train_dataprovider
    args.val_dataprovider = val_dataprovider

    if args.eval:
        if args.eval_resume is not None:
            checkpoint = torch.load(args.eval_resume,
                                    map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint, strict=True)
            validate(model, device, args, all_iters=all_iters)
        exit(0)

    validate(model, device, args, all_iters=all_iters)

    while all_iters < args.total_iters:
        all_iters = train(model,
                          device,
                          args,
                          val_interval=args.val_interval,
                          bn_process=False,
                          all_iters=all_iters)
        validate(model, device, args, all_iters=all_iters)
    all_iters = train(model,
                      device,
                      args,
                      val_interval=int(1280000 / args.val_batch_size),
                      bn_process=True,
                      all_iters=all_iters)
    validate(model, device, args, all_iters=all_iters)
Example #14
0
def main():
    args = get_args()

    # archLoader
    arch_loader = ArchLoader(args.path)

    # Log
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    fh = logging.FileHandler(
        os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000,
                                                  local_time.tm_mon, t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True

    train_dataset, val_dataset = get_dataset('cifar100')

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=16,
                                               pin_memory=True)
    # train_dataprovider = DataIterator(train_loader)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=200,
                                             shuffle=False,
                                             num_workers=12,
                                             pin_memory=True)

    # val_dataprovider = DataIterator(val_loader)
    print('load data successfully')

    model = mutableResNet20()

    print('load model successfully')

    optimizer = torch.optim.SGD(get_parameters(model),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)

    if use_gpu:
        model = nn.DataParallel(model)
        loss_function = criterion_smooth.cuda()
        device = torch.device("cuda")
    else:
        loss_function = criterion_smooth
        device = torch.device("cpu")

    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lambda step: (1.0 - step / args.total_iters)
        if step <= args.total_iters else 0,
        last_epoch=-1)

    model = model.to(device)

    # dp_model = torch.nn.parallel.DistributedDataParallel(model)

    all_iters = 0
    if args.auto_continue:  # 自动进行??
        lastest_model, iters = get_lastest_model()
        if lastest_model is not None:
            all_iters = iters
            checkpoint = torch.load(lastest_model,
                                    map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            print('load from checkpoint')
            for i in range(iters):
                scheduler.step()

    # 参数设置
    args.optimizer = optimizer
    args.loss_function = loss_function
    args.scheduler = scheduler
    args.train_loader = train_loader
    args.val_loader = val_loader
    # args.train_dataprovider = train_dataprovider
    # args.val_dataprovider = val_dataprovider

    if args.eval:
        if args.eval_resume is not None:
            checkpoint = torch.load(args.eval_resume,
                                    map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint, strict=True)
            validate(model,
                     device,
                     args,
                     all_iters=all_iters,
                     arch_loader=arch_loader)
        exit(0)

    while all_iters < args.total_iters:
        all_iters = train(model,
                          device,
                          args,
                          val_interval=args.val_interval,
                          bn_process=False,
                          all_iters=all_iters,
                          arch_loader=arch_loader,
                          arch_batch=args.arch_batch)
Example #15
0
def main():
    args = get_args()

    # archLoader
    arch_loader = ArchLoader(args.path)

    # Log
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m-%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    fh = logging.FileHandler(
        os.path.join('log/train-{}-{:02}-{:02}-{:.3f}'.format(
            local_time.tm_year % 2000, local_time.tm_mon, local_time.tm_mday,
            t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True

    kwargs = {'num_workers': 4, 'pin_memory': True}

    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        root="./data",
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.Resize(32),
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)

    val_loader = torch.utils.data.DataLoader(datasets.MNIST(
        root="./data",
        train=False,
        transform=transforms.Compose([
            transforms.Resize(32),
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             **kwargs)

    model = mutableResNet20(num_classes=10)
    base_model = copy.deepcopy(model)

    logging.info('load model successfully')

    optimizer = torch.optim.SGD(get_parameters(model),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)

    if use_gpu:
        model = nn.DataParallel(model)
        loss_function = criterion_smooth.cuda()
        device = torch.device("cuda")
        base_model.cuda()
    else:
        loss_function = criterion_smooth
        device = torch.device("cpu")

    # scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
    #                                               lambda step: (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1)
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=5)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    #     optimizer, T_max=200)

    model = model.to(device)

    all_iters = 0

    if args.auto_continue:
        lastest_model, iters = get_lastest_model()
        if lastest_model is not None:
            all_iters = iters
            checkpoint = torch.load(lastest_model,
                                    map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            logging.info('load from checkpoint')
            for i in range(iters):
                scheduler.step()

    # 参数设置
    args.optimizer = optimizer
    args.loss_function = loss_function
    args.scheduler = scheduler
    args.train_loader = train_loader
    args.val_loader = val_loader

    if args.eval:
        if args.eval_resume is not None:
            checkpoint = torch.load(args.eval_resume,
                                    map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint, strict=True)
            validate(model,
                     device,
                     args,
                     all_iters=all_iters,
                     arch_loader=arch_loader)
        exit(0)

    # warmup weights
    if args.warmup is not None:
        logging.info("begin warmup weights")
        while all_iters < args.warmup:
            all_iters = train_supernet(model,
                                       device,
                                       args,
                                       bn_process=False,
                                       all_iters=all_iters)

        validate(model,
                 device,
                 args,
                 all_iters=all_iters,
                 arch_loader=arch_loader)

    while all_iters < args.total_iters:
        all_iters = train_subnet(model,
                                 base_model,
                                 device,
                                 args,
                                 bn_process=False,
                                 all_iters=all_iters,
                                 arch_loader=arch_loader)
        logging.info("validate iter {}".format(all_iters))

        if all_iters % 9 == 0:
            validate(model,
                     device,
                     args,
                     all_iters=all_iters,
                     arch_loader=arch_loader)

    validate(model, device, args, all_iters=all_iters, arch_loader=arch_loader)
def main():
    args = get_args()

    # archLoader
    arch_loader = ArchLoader(args.path)

    # Log
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    fh = logging.FileHandler(
        os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000,
                                                  local_time.tm_mon, t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True

    val_loader = torch.utils.data.DataLoader(datasets.MNIST(
        root="./data",
        train=False,
        transform=transforms.Compose([
            transforms.Resize(32),
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=4,
                                             pin_memory=True)

    print('load data successfully')

    model = mutableResNet20(10)

    criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)

    if use_gpu:
        model = nn.DataParallel(model)
        loss_function = criterion_smooth.cuda()
        device = torch.device("cuda")
    else:
        loss_function = criterion_smooth
        device = torch.device("cpu")

    model = model.to(device)
    print("load model successfully")

    all_iters = 0
    print('load from latest checkpoint')
    lastest_model, iters = get_lastest_model()
    if lastest_model is not None:
        all_iters = iters
        checkpoint = torch.load(lastest_model,
                                map_location=None if use_gpu else 'cpu')
        model.load_state_dict(checkpoint['state_dict'], strict=True)

    # 参数设置
    args.loss_function = loss_function
    args.val_dataloader = val_loader

    print("start to validate model")

    validate(model, device, args, all_iters=all_iters, arch_loader=arch_loader)
Example #17
0
def main():
    args = get_args()

    # Log
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout, level=logging.INFO,
        format=log_format, datefmt='%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True

    assert os.path.exists(args.train_dir)
    train_dataset = datasets.ImageFolder(
        args.train_dir,
        transforms.Compose([
            transforms.RandomResizedCrop(args.im_size),
            transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
            transforms.RandomHorizontalFlip(0.5),
            ToBGRTensor(),
        ])
    )
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=8, pin_memory=use_gpu)
    train_dataprovider = DataIterator(train_loader)

    assert os.path.exists(args.val_dir)
    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(args.val_dir, transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(args.im_size),
            ToBGRTensor(),
        ])),
        batch_size=200, shuffle=False,
        num_workers=8, pin_memory=use_gpu
    )
    val_dataprovider = DataIterator(val_loader)
    print('load data successfully')

    arch_path='arch.pkl'

    if os.path.exists(arch_path):
        with open(arch_path,'rb') as f:
            architecture=pickle.load(f)
    else:
        raise NotImplementedError
    channels_scales = (1.0,)*20
    model = ShuffleNetV2_OneShot(architecture=architecture, channels_scales=channels_scales, n_class=args.num_classes, input_size=args.im_size)

    print('flops:',get_flops(model))

    optimizer = torch.optim.SGD(get_parameters(model),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    criterion_smooth = CrossEntropyLabelSmooth(args.num_classes, 0.1)

    if use_gpu:
        # model = nn.DataParallel(model)
        loss_function = criterion_smooth.cuda()
        device = torch.device("cuda")
    else:
        loss_function = criterion_smooth
        device = torch.device("cpu")

    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                    lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1)

    # model = model.to(device)
    model = model.cuda()

    all_iters = 0
    if args.auto_continue:
        lastest_model, iters = get_lastest_model()
        if lastest_model is not None:
            all_iters = iters
            checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            print('load from checkpoint')
            for i in range(iters):
                scheduler.step()

    args.optimizer = optimizer
    args.loss_function = loss_function
    args.scheduler = scheduler
    args.train_dataprovider = train_dataprovider
    args.val_dataprovider = val_dataprovider

    if args.eval:
        if args.eval_resume is not None:
            checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint, strict=True)
            validate(model, device, args, all_iters=all_iters)
        exit(0)
    t = time.time()
    while all_iters < args.total_iters:
        all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters)
        validate(model, device, args, all_iters=all_iters)
    # all_iters = train(model, device, args, val_interval=int(1280000/args.batch_size), bn_process=True, all_iters=all_iters)
    validate(model, device, args, all_iters=all_iters)
    save_checkpoint({'state_dict': model.state_dict(),}, args.total_iters, tag='bnps-')
    print("Finished {} iters in {:.3f} seconds".format(all_iters, time.time()-t))
Example #18
0
def main():
    args = get_args()

    # Log
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    fh = logging.FileHandler(
        os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000,
                                                  local_time.tm_mon, t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True

    assert os.path.exists(args.train_dir)
    train_dataset = datasets.ImageFolder(
        args.train_dir,
        transforms.Compose([
            transforms.RandomResizedCrop(96),
            transforms.ColorJitter(brightness=0.4,
                                   contrast=0.4,
                                   saturation=0.4),
            transforms.RandomHorizontalFlip(0.5),
            ToBGRTensor(),
        ]))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=use_gpu)
    train_dataprovider = DataIterator(train_loader)

    assert os.path.exists(args.val_dir)
    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(
            args.val_dir,
            transforms.Compose([
                OpencvResize(96),
                # transforms.CenterCrop(96),
                ToBGRTensor(),
            ])),
        batch_size=200,
        shuffle=False,
        num_workers=4,
        pin_memory=use_gpu)
    val_dataprovider = DataIterator(val_loader)

    arch_path = 'cl400.p'

    if os.path.exists(arch_path):
        with open(arch_path, 'rb') as f:
            architectures = pickle.load(f)
    else:
        raise NotImplementedError
    channels_scales = (1.0, ) * 20
    cands = {}
    splits = [(i, 10 + i) for i in range(0, 400, 10)]
    architectures = np.array(architectures)
    architectures = architectures[
        splits[args.split_num][0]:splits[args.split_num][1]]
    print(len(architectures))
    logging.info("Training and Validating arch: " +
                 str(splits[args.split_num]))
    for architecture in architectures:
        architecture = tuple(architecture.tolist())
        model = ShuffleNetV2_OneShot(architecture=architecture,
                                     channels_scales=channels_scales,
                                     n_class=10,
                                     input_size=96)

        print('flops:', get_flops(model))

        optimizer = torch.optim.SGD(get_parameters(model),
                                    lr=args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)

        if use_gpu:
            model = nn.DataParallel(model)
            loss_function = criterion_smooth.cuda()
            device = torch.device("cuda")
        else:
            loss_function = criterion_smooth
            device = torch.device("cpu")

        scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer,
            lambda step: (1.0 - step / args.total_iters)
            if step <= args.total_iters else 0,
            last_epoch=-1)

        model = model.to(device)

        all_iters = 0
        if args.auto_continue:
            lastest_model, iters = get_lastest_model()
            if lastest_model is not None:
                all_iters = iters
                checkpoint = torch.load(
                    lastest_model, map_location=None if use_gpu else 'cpu')
                model.load_state_dict(checkpoint['state_dict'], strict=True)
                print('load from checkpoint')
                for i in range(iters):
                    scheduler.step()

        args.optimizer = optimizer
        args.loss_function = loss_function
        args.scheduler = scheduler
        args.train_dataprovider = train_dataprovider
        args.val_dataprovider = val_dataprovider
        # print("BEGIN VALDATE: ", args.eval, args.eval_resume)
        if args.eval:
            if args.eval_resume is not None:
                checkpoint = torch.load(
                    args.eval_resume, map_location=None if use_gpu else 'cpu')
                model.load_state_dict(checkpoint, strict=True)
                validate(model, device, args, all_iters=all_iters)
            exit(0)
        # t1,t5 = validate(model, device, args, all_iters=all_iters)
        # print("VALDATE: ", t1, "   ", t5)

        while all_iters < args.total_iters:
            all_iters = train(model,
                              device,
                              args,
                              val_interval=args.val_interval,
                              bn_process=False,
                              all_iters=all_iters)
            validate(model, device, args, all_iters=all_iters)
        all_iters = train(model,
                          device,
                          args,
                          val_interval=int(1280000 / args.batch_size),
                          bn_process=True,
                          all_iters=all_iters)
        top1, top5 = validate(model, device, args, all_iters=all_iters)
        save_checkpoint({
            'state_dict': model.state_dict(),
        },
                        args.total_iters,
                        tag='bnps-')
        cands[architecture] = [top1, top5]
        pickle.dump(
            cands,
            open("from_scratch_split_{}.pkl".format(args.split_num), 'wb'))
Example #19
0
         logger.warning(
             "You might want to use SPOS preprocessing if you are loading their checkpoints."
         )
     model.load_state_dict(load_and_parse_state_dict())
 model.cuda()
 if torch.cuda.device_count(
 ) > 1:  # exclude last gpu, saving for data preprocessing on gpu
     model = nn.DataParallel(model,
                             device_ids=list(
                                 range(0,
                                       torch.cuda.device_count() - 1)))
 mutator = SPOSSupernetTrainingMutator(model,
                                       flops_func=flops_func,
                                       flops_lb=290E6,
                                       flops_ub=360E6)
 criterion = CrossEntropyLabelSmooth(1000, args.label_smoothing)
 optimizer = torch.optim.SGD(model.parameters(),
                             lr=args.learning_rate,
                             momentum=args.momentum,
                             weight_decay=args.weight_decay)
 scheduler = torch.optim.lr_scheduler.LambdaLR(
     optimizer,
     lambda step: (1.0 - step / args.epochs) if step <= args.epochs else 0,
     last_epoch=-1)
 train_loader = get_imagenet_iter_dali(
     "train",
     args.imagenet_dir,
     args.batch_size,
     args.workers,
     spos_preprocessing=args.spos_preprocessing)
 valid_loader = get_imagenet_iter_dali(
def main():
    args = get_args()
    num_gpus = torch.cuda.device_count()
    args.gpu = args.local_rank % num_gpus
    torch.cuda.set_device(args.gpu)

    torch.distributed.init_process_group(backend='nccl', init_method='env://')
    args.world_size = torch.distributed.get_world_size()
    args.batch_size = args.batch_size // args.world_size

    # archLoader
    arch_loader = ArchLoader(args.path)

    # Log
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m-%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    fh = logging.FileHandler(
        os.path.join('log/train-{}-{:02}-{:02}-{:.3f}'.format(
            local_time.tm_year % 2000, local_time.tm_mon, local_time.tm_mday,
            t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True

    train_loader = get_train_loader(args.batch_size, args.local_rank,
                                    args.num_workers, args.total_iters)

    val_loader = get_val_loader(args.batch_size, args.num_workers)

    model = mutableResNet20()

    logging.info('load model successfully')

    optimizer = torch.optim.SGD(get_parameters(model),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)

    if use_gpu:
        # model = nn.DataParallel(model)
        model = model.cuda(args.gpu)
        model = nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)
        loss_function = criterion_smooth.cuda()
    else:
        loss_function = criterion_smooth

    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=5)

    all_iters = 0

    if args.auto_continue:  # 自动进行??
        lastest_model, iters = get_lastest_model()
        if lastest_model is not None:
            all_iters = iters
            checkpoint = torch.load(lastest_model,
                                    map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            logging.info('load from checkpoint')
            for i in range(iters):
                scheduler.step()

    # 参数设置
    args.optimizer = optimizer
    args.loss_function = loss_function
    args.scheduler = scheduler
    args.train_loader = train_loader
    args.val_loader = val_loader

    if args.eval:
        if args.eval_resume is not None:
            checkpoint = torch.load(args.eval_resume,
                                    map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint, strict=True)
            validate(model, args, all_iters=all_iters, arch_loader=arch_loader)
        exit(0)

    # warmup weights
    if args.warmup > 0:
        logging.info("begin warmup weights")
        while all_iters < args.warmup:
            all_iters = train_supernet(model,
                                       args,
                                       bn_process=False,
                                       all_iters=all_iters)

        validate(model, args, all_iters=all_iters, arch_loader=arch_loader)

    while all_iters < args.total_iters:
        logging.info("=" * 50)
        all_iters = train_subnet(model,
                                 args,
                                 bn_process=False,
                                 all_iters=all_iters,
                                 arch_loader=arch_loader)

        if all_iters % 200 == 0 and args.local_rank == 0:
            logging.info("validate iter {}".format(all_iters))

            validate(model, args, all_iters=all_iters, arch_loader=arch_loader)
Example #21
0
def main():
    args = get_args()

    # Log
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    fh = logging.FileHandler(
        os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000,
                                                  local_time.tm_mon, t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    use_gpu = False
    if torch.cuda.is_available():
        use_gpu = True

    if args.cifar10 == False:

        assert os.path.exists(args.train_dir)
        train_dataset = datasets.ImageFolder(
            args.train_dir,
            transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.ColorJitter(brightness=0.4,
                                       contrast=0.4,
                                       saturation=0.4),
                transforms.RandomHorizontalFlip(0.5),
                ToBGRTensor(),
            ]))
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=1,
                                                   pin_memory=use_gpu)
        train_dataprovider = DataIterator(train_loader)

        assert os.path.exists(args.val_dir)
        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            args.val_dir,
            transforms.Compose([
                OpencvResize(256),
                transforms.CenterCrop(224),
                ToBGRTensor(),
            ])),
                                                 batch_size=200,
                                                 shuffle=False,
                                                 num_workers=1,
                                                 pin_memory=use_gpu)
        val_dataprovider = DataIterator(val_loader)
        print('load imagenet data successfully')

    else:
        train_transform, valid_transform = data_transforms(args)

        trainset = torchvision.datasets.CIFAR10(root=os.path.join(
            args.data_dir, 'cifar'),
                                                train=True,
                                                download=True,
                                                transform=train_transform)
        train_loader = torch.utils.data.DataLoader(trainset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   pin_memory=True,
                                                   num_workers=8)
        train_dataprovider = DataIterator(train_loader)
        valset = torchvision.datasets.CIFAR10(root=os.path.join(
            args.data_dir, 'cifar'),
                                              train=False,
                                              download=True,
                                              transform=valid_transform)
        val_loader = torch.utils.data.DataLoader(valset,
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 pin_memory=True,
                                                 num_workers=8)
        val_dataprovider = DataIterator(val_loader)

        print('load cifar10 data successfully')

    model = ShuffleNetV2_OneShot()

    optimizer = torch.optim.SGD(get_parameters(model),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)

    if use_gpu:
        model = nn.DataParallel(model)
        loss_function = criterion_smooth.cuda()
        device = torch.device("cuda")
    else:
        loss_function = criterion_smooth
        device = torch.device("cpu")

    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lambda step: (1.0 - step / args.total_iters)
        if step <= args.total_iters else 0,
        last_epoch=-1)

    model = model.to(device)

    all_iters = 0
    if args.auto_continue:
        lastest_model, iters = get_lastest_model()
        if lastest_model is not None:
            all_iters = iters
            checkpoint = torch.load(lastest_model,
                                    map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            print('load from checkpoint')
            for i in range(iters):
                scheduler.step()

    args.optimizer = optimizer
    args.loss_function = loss_function
    args.scheduler = scheduler
    args.train_dataprovider = train_dataprovider
    args.val_dataprovider = val_dataprovider

    if args.eval:
        if args.eval_resume is not None:
            checkpoint = torch.load(args.eval_resume,
                                    map_location=None if use_gpu else 'cpu')
            model.load_state_dict(checkpoint, strict=True)
            validate(model, device, args, all_iters=all_iters)
        exit(0)

    while all_iters < args.total_iters:
        all_iters = train(model,
                          device,
                          args,
                          val_interval=args.val_interval,
                          bn_process=False,
                          all_iters=all_iters)
Example #22
0
def train(model, optimizer, scheduler, train_loader, dev, epoch):

    model.train()
    total_loss = 0
    num_batches = 0
    total_correct = 0
    count = 0
    criterion = nn.CrossEntropyLoss()
    if opt.labelsmooth:
        criterion = CrossEntropyLabelSmooth()
    warm_up = min(1.0, 0.1 + 0.9 * epoch / opt.warm_epoch)

    warm_iteration = round(dataset_sizes['train'] /
                           opt.batch_size) * opt.warm_epoch  # first 5 epoch
    total_iteration = round(
        dataset_sizes['train'] / opt.batch_size) * opt.num_epochs

    with tqdm.tqdm(train_loader, ascii=True) as tq:
        for data, label in tq:
            num_examples = label.shape[0]
            data, label = data.to(dev), label.to(dev).squeeze().long()
            optimizer.zero_grad()
            xyz = data[:, :, 0:3].contiguous()
            rgb = data[:, :, 3:].contiguous()
            logits = model(xyz.detach(), rgb.detach(), istrain=True)
            #loss = compute_loss(logits, label)
            if opt.npart > 1:
                loss = criterion(logits[0], label)
                for i in range(1, opt.npart):
                    loss += criterion(logits[i], label)
            else:
                loss = criterion(logits, label)
            if epoch < opt.warm_epoch:
                warm_up = min(1.0, warm_up + 0.9 / warm_iteration)
                loss *= warm_up

            loss.backward()
            optimizer.step()

            if opt.npart > 1:
                logit_sum = logits[0].detach()
                for i in range(1, opt.npart):
                    logit_sum += logits[i].detach()
                _, preds = logit_sum.max(1)
            else:
                _, preds = logits.max(1)

            num_batches += 1
            count += num_examples
            correct = (preds == label).sum().item()
            total_loss += loss.item()
            total_correct += correct

            tq.set_postfix({
                #'Loss': '%.5f' % loss,
                'AvgLoss': '%.4f' % (total_loss / num_batches),
                #'Acc': '%.5f' % (correct / num_examples),
                'AvgAcc': '%.4f' % (total_correct / count)
            })
        y_loss['train'].append(total_loss / num_batches)
        y_err['train'].append(1.0 - total_correct / count)

    scheduler.step()