Ejemplo n.º 1
0
def run_model(config,
         seed=0,
         data_dir='./data',
         genotype_class='PCDARTS',
         num_epochs=20,
         batch_size=get('batch_size'),
         init_channels=get('init_channels'),
         train_criterion=torch.nn.CrossEntropyLoss,
         data_augmentations=None,
         save_model_str=None, **kwargs):
    """
    Training loop for configurableNet.
    :param model_config: network config (dict)
    :param data_dir: dataset path (str)
    :param num_epochs: (int)
    :param batch_size: (int)
    :param learning_rate: model optimizer learning rate (float)
    :param train_criterion: Which loss to use during training (torch.nn._Loss)
    :param model_optimizer: Which model optimizer to use during trainnig (torch.optim.Optimizer)
    :param data_augmentations: List of data augmentations to apply such as rescaling.
        (list[transformations], transforms.Composition[list[transformations]], None)
        If none only ToTensor is used
    :return:
    """


    # instantiate optimize
    
    if not torch.cuda.is_available():
            logging.info('no gpu device available')
            sys.exit(1)

    gpu = 'cuda:0'
    np.random.seed(seed)
    torch.cuda.set_device(gpu)
    cudnn.benchmark = True
    torch.manual_seed(seed)
    cudnn.enabled=True
    torch.cuda.manual_seed(seed)
    logging.info('gpu device = %s' % gpu)
    logging.info("config = %s", config)

    if data_augmentations is None:
        # You can add any preprocessing/data augmentation you want here
        data_augmentations = transforms.ToTensor()
    elif isinstance(type(data_augmentations), list):
        data_augmentations = transforms.Compose(data_augmentations)
    elif not isinstance(data_augmentations, transforms.Compose):
        raise NotImplementedError

    train_dataset = K49(data_dir, True, data_augmentations)
    test_dataset = K49(data_dir, False, data_augmentations)
    # train_dataset = KMNIST(data_dir, True, data_augmentations)
    # test_dataset = KMNIST(data_dir, False, data_augmentations)
    # Make data batch iterable
    # Could modify the sampler to not uniformly random sample
    
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=batch_size,
                             shuffle=False)

    genotype = eval("genotypes.%s" % genotype_class)
    model = Network(init_channels, train_dataset.n_classes, config['n_conv_layers'], genotype)
    model = model.cuda()
    
    total_model_params = np.sum(p.numel() for p in model.parameters())

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = train_criterion
    criterion = criterion.cuda()
    
    if config['optimizer'] == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), 
                                    lr=config['initial_lr'], 
                                    momentum=config['sgd_momentum'], 
                                    weight_decay=config['weight_decay'], 
                                    nesterov=config['nesterov'])
    else:
        optimizer = get('opti_dict')[config['optimizer']](model.parameters(), lr=config['initial_lr'], weight_decay=config['weight_decay'])
    
    if config['lr_scheduler'] == 'Cosine':
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epochs)
    elif config['lr_scheduler'] == 'Exponential':
        lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)

    logging.info('Generated Network:')
    summary(model, (train_dataset.channels,
                    train_dataset.img_rows,
                    train_dataset.img_cols),
            device='cuda' if torch.cuda.is_available() else 'cpu')
    for epoch in range(num_epochs):
        lr_scheduler.step()
        logging.info('epoch %d lr %e', epoch, lr_scheduler.get_lr()[0])
        model.drop_path_prob = config['drop_path_prob'] * epoch / num_epochs

        train_acc, train_obj = train(train_loader, model, criterion, optimizer, grad_clip=config['grad_clip_value'])
        logging.info('train_acc %f', train_acc)

        test_acc, test_obj = infer(test_loader, model, criterion)
        logging.info('test_acc %f', test_acc)


    if save_model_str:
        # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)"
        if os.path.exists(save_model_str):
            save_model_str += '_'.join(time.ctime())
        torch.save(model.state_dict(), save_model_str)
    
    return test_acc
Ejemplo n.º 2
0
    def compute(self, x, budget, config, **kwargs):
        """
        Get model with hyperparameters from config generated by get_configspace()
        """
        config = get_config_dictionary(x, config)
        print("config", config)
        if (len(config.keys())<len(x)):
            return 100
        if not torch.cuda.is_available():
            logging.info('no gpu device available')
            sys.exit(1)

        gpu = 'cuda:0'
        np.random.seed(self.seed)
        torch.cuda.set_device(gpu)
        cudnn.benchmark = True
        torch.manual_seed(self.seed)
        cudnn.enabled=True
        torch.cuda.manual_seed(self.seed)
        logging.info('gpu device = %s' % gpu)
        logging.info("config = %s", config)

        genotype = eval("genotypes.%s" % 'PCDARTS')
        model = Network(self.init_channels, self.n_classes, config['n_conv_layers'], genotype)
        model = model.cuda()

        logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

        criterion = nn.CrossEntropyLoss()
        criterion = criterion.cuda()
        
        if config['optimizer'] == 'sgd':
            optimizer = torch.optim.SGD(model.parameters(), 
                                        lr=config['initial_lr'], 
                                        momentum=0.9, 
                                        weight_decay=config['weight_decay'], 
                                        nesterov=True)
        else:
            optimizer = settings.opti_dict[config['optimizer']](model.parameters(), lr=config['initial_lr'])
        
        if config['lr_scheduler'] == 'Cosine':
            lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, int(budget))
        elif config['lr_scheduler'] == 'Exponential':
            lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)

        
        indices = list(range(int(self.split*len(self.train_dataset))))
        valid_indices =  list(range(int(self.split*len(self.train_dataset)), len(self.train_dataset)))
        print("Training size=", len(indices))
        training_sampler = SubsetRandomSampler(indices)
        valid_sampler = SubsetRandomSampler(valid_indices)
        train_queue = torch.utils.data.DataLoader(dataset=self.train_dataset,
                                                batch_size=self.batch_size,
                                                sampler=training_sampler) 

        valid_queue = torch.utils.data.DataLoader(dataset=self.train_dataset, 
                                                batch_size=self.batch_size, 
                                                sampler=valid_sampler)


        for epoch in range(int(budget)):
            lr_scheduler.step()
            logging.info('epoch %d lr %e', epoch, lr_scheduler.get_lr()[0])
            model.drop_path_prob = config['drop_path_prob'] * epoch / int(budget)

            train_acc, train_obj = train(train_queue, model, criterion, optimizer, grad_clip=config['grad_clip_value'])
            logging.info('train_acc %f', train_acc)

            valid_acc, valid_obj = infer(valid_queue, model, criterion)
            logging.info('valid_acc %f', valid_acc)

        return valid_obj # Hyperband always minimizes, so we want to minimise the error, error = 1-acc
Ejemplo n.º 3
0
def create_run_ensemble(model_state_list, 
                        n_layers,
                        grad_clip_value=5, 
                        seed=0, 
                        num_epochs=20,
                        learning_rate=0.001,
                        init_channels=get('init_channels'), 
                        batch_size=get('batch_size'), 
                        genotype_class='PCDARTS'):
    
    if not torch.cuda.is_available():
            logging.info('no gpu device available')
            sys.exit(1)

    gpu = 'cuda:0'
    np.random.seed(seed)
    torch.cuda.set_device(gpu)
    cudnn.benchmark = True
    torch.manual_seed(seed)
    cudnn.enabled=True
    torch.cuda.manual_seed(seed)
    logging.info('gpu device = %s' % gpu)
    logging.info("config = %s", config)

    if data_augmentations is None:
        # You can add any preprocessing/data augmentation you want here
        data_augmentations = transforms.ToTensor()
    elif isinstance(type(data_augmentations), list):
        data_augmentations = transforms.Compose(data_augmentations)
    elif not isinstance(data_augmentations, transforms.Compose):
        raise NotImplementedError

    train_dataset = K49(data_dir, True, data_augmentations)
    test_dataset = K49(data_dir, False, data_augmentations)
    # train_dataset = KMNIST(data_dir, True, data_augmentations)
    # test_dataset = KMNIST(data_dir, False, data_augmentations)
    # Make data batch iterable
    # Could modify the sampler to not uniformly random sample
    
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=batch_size,
                             shuffle=False)

    genotype = eval("genotypes.%s" % genotype_class)
    dataset = dict()
    dims = []
    for i, model_state in enumerate(model_state_list):
        model = Network(init_channels, train_dataset.n_classes, n_layers, genotype)
        model.load_state_dict(torch.load(model_state))
        model.cuda()
        for p in model.parameters():
            p.requires_grad = False
        trn_labels = []
        trn_features = []
        if i == 0:
            for d,la in train_loader:
                o = model(Variable(d.cuda()))
                o = o.view(o.size(0),-1)
                trn_labels.extend(la)
                trn_features.extend(o.cpu().data)
            test_labels = []
            test_features = []
            for d,la in test_loader:
                o = model(Variable(d.cuda()))
                o = o.view(o.size(0),-1)
                test_labels.extend(la)
                test_features.extend(o.cpu().data)
            dataset['trn_labels'] = trn_labels
            dataset['test_labels'] = test_labels

        else:
            for d,la in train_loader:
                o = model(Variable(d.cuda()))
                o = o.view(o.size(0),-1)
                trn_features.extend(o.cpu().data)
            test_labels = []
            test_features = []
            for d,la in test_loader:
                o = model(Variable(d.cuda()))
                o = o.view(o.size(0),-1)
                test_features.extend(o.cpu().data)            
        dataset['trn_features'].extend(trn_features)
        dims.extend(dataset['trn_features'][i][0].size(0))
        dataset['test_features'].extend(test_features)
    

    trn_feat_dset = FeaturesDataset(dataset['trn_features'][0],dataset['trn_features'][1],dataset['trn_features'][2],dataset['trn_labels'])
    test_feat_dset = FeaturesDataset(dataset['test_features'][0],dataset['test_features'][1],dataset['test_features'][2],dataset['test_labels'])
    trn_feat_loader = DataLoader(trn_feat_dset,batch_size=64,shuffle=True)
    test_feat_loader = DataLoader(val_feat_dset,batch_size=64)
    model = EnsembleModel(dims, out_size=train_dataset.n_classes)
    criterion = torch.nn.optim.CrossEntropyLoss
    criterion = criterion.cuda()
    optimizer = torch.nn.optim.SGD(model.parameters(), 
                                    lr=learning_rate, 
                                    momentum=0.9)   
    
    for epoch in range(num_epochs):
        epoch_loss, epoch_accuracy = fit(epoch,model,trn_feat_loader,critierion, training=True)
        val_epoch_loss , val_epoch_accuracy = fit(epoch,model, test_feat_loader, criterion, training=False)


    if save_model_str:
        # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)"
        if not os.path.exists(save_model_str):
            os.mkdir(save_model_str)
        
        torch.save(model.state_dict(), os.path.join(save_model_str, time.ctime())) 
Ejemplo n.º 4
0
def create_run_ensemble(model_description,
                        ensemble_config,
                        seed=get('seed'),
                        num_epochs=20,
                        data_dir='./data',
                        init_channels=get('init_channels'),
                        batch_size=get('batch_size'),
                        genotype_class='PCDARTS',
                        data_augmentations=None,
                        save_model_str=None):

    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    gpu = 'cuda:0'
    np.random.seed(seed)
    torch.cuda.set_device(gpu)
    cudnn.benchmark = True
    torch.manual_seed(seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(seed)
    logging.info('gpu device = %s' % gpu)

    if data_augmentations is None:
        # You can add any preprocessing/data augmentation you want here
        data_augmentations = transforms.ToTensor()
    elif isinstance(type(data_augmentations), list):
        data_augmentations = transforms.Compose(data_augmentations)
    elif not isinstance(data_augmentations, transforms.Compose):
        raise NotImplementedError

    train_dataset = K49(data_dir, True, data_augmentations)
    test_dataset = K49(data_dir, False, data_augmentations)
    # train_dataset = KMNIST(data_dir, True, data_augmentations)
    # test_dataset = KMNIST(data_dir, False, data_augmentations)
    # Make data batch iterable
    # Could modify the sampler to not uniformly random sample

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=batch_size,
                             shuffle=False)

    genotype = eval("genotypes.%s" % genotype_class)
    trained_models = []
    for i, model_state in enumerate(model_description.keys()):
        model = Network(
            init_channels, train_dataset.n_classes,
            model_description[model_state]['config']['n_conv_layers'],
            genotype)
        model.load_state_dict(
            torch.load(model_description[model_state]['model_path']))
        model.cuda()
        model.drop_path_prob = model_description[model_state]['config'][
            'drop_path_prob']
        trained_models.append(model)

    ensemble_model = EnsembleModel(trained_models,
                                   dense_units=ensemble_config['dense_units'],
                                   out_size=train_dataset.n_classes)
    ensemble_model = ensemble_model.cuda()

    summary(ensemble_model, input_size=(1, 28, 28))
    criterion = torch.nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    if ensemble_config['optimizer'] == 'sgd':
        optimizer = torch.optim.SGD(
            model.parameters(),
            lr=ensemble_config['initial_lr'],
            momentum=ensemble_config['sgd_momentum'],
            weight_decay=ensemble_config['weight_decay'],
            nesterov=ensemble_config['nesterov'])
    else:
        optimizer = get('opti_dict')[ensemble_config['optimizer']](
            model.parameters(),
            lr=ensemble_config['initial_lr'],
            weight_decay=ensemble_config['weight_decay'])

    if ensemble_config['lr_scheduler'] == 'Cosine':
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, num_epochs)
    elif ensemble_config['lr_scheduler'] == 'Exponential':
        lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer,
                                                              gamma=0.1)

    print('Started Training')
    for epoch in range(num_epochs):
        logging.info('epoch %d lr %e', epoch, lr_scheduler.get_lr()[0])
        model.drop_path_prob = ensemble_config[
            'drop_path_prob'] * epoch / num_epochs
        for p in ensemble_model.model_1.parameters():
            p.requires_grad = False
        for p in ensemble_model.model_2.parameters():
            p.requires_grad = False
        for p in ensemble_model.model_3.parameters():
            p.requires_grad = False
        for p in ensemble_model.out_classifier.parameters():
            p.requires_grad = True
        train_acc, train_obj, models_avg = ensemble_train(
            train_loader,
            ensemble_model,
            criterion,
            optimizer,
            grad_clip=ensemble_config['grad_clip_value'])
        logging.info('train_acc %f', train_acc)
        logging.info('models_avg {}'.format(models_avg))
        lr_scheduler.step()

        test_acc, test_obj, models_avg = ensemble_infer(
            test_loader, ensemble_model, criterion)
        logging.info('test_acc %f', test_acc)
        logging.info('models_avg {}'.format(models_avg))

    if save_model_str:
        # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)"
        if not os.path.exists(save_model_str):
            os.mkdir(save_model_str)
        os.path.join(save_model_str, 'ENSEMBLE')

        torch.save(ensemble_model.state_dict(),
                   os.path.join(save_model_str, time.ctime()))