def __init__(self, model_description, run_dir, init_channels=get('init_channels'), batch_size=get('batch_size'), split=0.8, dataset=K49, **kwargs): super().__init__(**kwargs) self.init_channels = init_channels self.run_dir = run_dir data_augmentations = transforms.ToTensor() self.train_dataset = dataset('./data', True, data_augmentations) self.test_dataset = dataset('./data', False, data_augmentations) self.n_classes = self.train_dataset.n_classes self.split = split self.batch_size = batch_size if 'seed' in kwargs: self.seed = kwargs['seed'] else: self.seed = 0 if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) genotype = eval("genotypes.%s" % 'PCDARTS') trained_models = [] for i, model_state in enumerate(model_description.keys()): model = Network( init_channels, self.train_dataset.n_classes, model_description[model_state]['config']['n_conv_layers'], genotype) model.load_state_dict( torch.load(model_description[model_state]['model_path'])) model.cuda() model.drop_path_prob = model_description[model_state]['config'][ 'drop_path_prob'] trained_models.append(model) self.trained_models = trained_models
def create_run_ensemble(model_state_list, n_layers, grad_clip_value=5, seed=0, num_epochs=20, learning_rate=0.001, init_channels=get('init_channels'), batch_size=get('batch_size'), genotype_class='PCDARTS'): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled=True torch.cuda.manual_seed(seed) logging.info('gpu device = %s' % gpu) logging.info("config = %s", config) if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) # train_dataset = KMNIST(data_dir, True, data_augmentations) # test_dataset = KMNIST(data_dir, False, data_augmentations) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) genotype = eval("genotypes.%s" % genotype_class) dataset = dict() dims = [] for i, model_state in enumerate(model_state_list): model = Network(init_channels, train_dataset.n_classes, n_layers, genotype) model.load_state_dict(torch.load(model_state)) model.cuda() for p in model.parameters(): p.requires_grad = False trn_labels = [] trn_features = [] if i == 0: for d,la in train_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) trn_labels.extend(la) trn_features.extend(o.cpu().data) test_labels = [] test_features = [] for d,la in test_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) test_labels.extend(la) test_features.extend(o.cpu().data) dataset['trn_labels'] = trn_labels dataset['test_labels'] = test_labels else: for d,la in train_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) trn_features.extend(o.cpu().data) test_labels = [] test_features = [] for d,la in test_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) test_features.extend(o.cpu().data) dataset['trn_features'].extend(trn_features) dims.extend(dataset['trn_features'][i][0].size(0)) dataset['test_features'].extend(test_features) trn_feat_dset = FeaturesDataset(dataset['trn_features'][0],dataset['trn_features'][1],dataset['trn_features'][2],dataset['trn_labels']) test_feat_dset = FeaturesDataset(dataset['test_features'][0],dataset['test_features'][1],dataset['test_features'][2],dataset['test_labels']) trn_feat_loader = DataLoader(trn_feat_dset,batch_size=64,shuffle=True) test_feat_loader = DataLoader(val_feat_dset,batch_size=64) model = EnsembleModel(dims, out_size=train_dataset.n_classes) criterion = torch.nn.optim.CrossEntropyLoss criterion = criterion.cuda() optimizer = torch.nn.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) for epoch in range(num_epochs): epoch_loss, epoch_accuracy = fit(epoch,model,trn_feat_loader,critierion, training=True) val_epoch_loss , val_epoch_accuracy = fit(epoch,model, test_feat_loader, criterion, training=False) if save_model_str: # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)" if not os.path.exists(save_model_str): os.mkdir(save_model_str) torch.save(model.state_dict(), os.path.join(save_model_str, time.ctime()))
def create_majority_ensemble(model_description, ensemble_config, seed=get('seed'), num_epochs=20, data_dir='./data', init_channels=get('init_channels'), batch_size=get('batch_size'), genotype_class='PCDARTS', data_augmentations=None, save_model_str=None): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled = True torch.cuda.manual_seed(seed) logging.info('gpu device = %s' % gpu) if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) # train_dataset = KMNIST(data_dir, True, data_augmentations) # test_dataset = KMNIST(data_dir, False, data_augmentations) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) criterion = torch.nn.CrossEntropyLoss() criterion = criterion.cuda() genotype = eval("genotypes.%s" % genotype_class) trained_models = [] for i, model_state in enumerate(model_description.keys()): model = Network( init_channels, train_dataset.n_classes, model_description[model_state]['config']['n_conv_layers'], genotype) model.load_state_dict( torch.load(model_description[model_state]['model_path'])) model.cuda() model.drop_path_prob = model_description[model_state]['config'][ 'drop_path_prob'] trained_models.append(model) ensemble_model = MajorityEnsembleModel(trained_models) print('Started Training') for epoch in range(num_epochs): test_acc, models_avg = majority_predict(test_loader, ensemble_model, criterion, 50) logging.info('test_acc %f', test_acc) logging.info('models_avg {}'.format(models_avg)) if save_model_str: # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)" if not os.path.exists(save_model_str): os.mkdir(save_model_str) os.path.join(save_model_str, 'ENSEMBLE') torch.save(ensemble_model.state_dict(), os.path.join(save_model_str, time.ctime()))
def create_run_ensemble(model_description, ensemble_config, seed=get('seed'), num_epochs=20, data_dir='./data', init_channels=get('init_channels'), batch_size=get('batch_size'), genotype_class='PCDARTS', data_augmentations=None, save_model_str=None): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled = True torch.cuda.manual_seed(seed) logging.info('gpu device = %s' % gpu) if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) # train_dataset = KMNIST(data_dir, True, data_augmentations) # test_dataset = KMNIST(data_dir, False, data_augmentations) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) genotype = eval("genotypes.%s" % genotype_class) trained_models = [] for i, model_state in enumerate(model_description.keys()): model = Network( init_channels, train_dataset.n_classes, model_description[model_state]['config']['n_conv_layers'], genotype) model.load_state_dict( torch.load(model_description[model_state]['model_path'])) model.cuda() model.drop_path_prob = model_description[model_state]['config'][ 'drop_path_prob'] trained_models.append(model) ensemble_model = EnsembleModel(trained_models, dense_units=ensemble_config['dense_units'], out_size=train_dataset.n_classes) ensemble_model = ensemble_model.cuda() summary(ensemble_model, input_size=(1, 28, 28)) criterion = torch.nn.CrossEntropyLoss() criterion = criterion.cuda() if ensemble_config['optimizer'] == 'sgd': optimizer = torch.optim.SGD( model.parameters(), lr=ensemble_config['initial_lr'], momentum=ensemble_config['sgd_momentum'], weight_decay=ensemble_config['weight_decay'], nesterov=ensemble_config['nesterov']) else: optimizer = get('opti_dict')[ensemble_config['optimizer']]( model.parameters(), lr=ensemble_config['initial_lr'], weight_decay=ensemble_config['weight_decay']) if ensemble_config['lr_scheduler'] == 'Cosine': lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, num_epochs) elif ensemble_config['lr_scheduler'] == 'Exponential': lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1) print('Started Training') for epoch in range(num_epochs): logging.info('epoch %d lr %e', epoch, lr_scheduler.get_lr()[0]) model.drop_path_prob = ensemble_config[ 'drop_path_prob'] * epoch / num_epochs for p in ensemble_model.model_1.parameters(): p.requires_grad = False for p in ensemble_model.model_2.parameters(): p.requires_grad = False for p in ensemble_model.model_3.parameters(): p.requires_grad = False for p in ensemble_model.out_classifier.parameters(): p.requires_grad = True train_acc, train_obj, models_avg = ensemble_train( train_loader, ensemble_model, criterion, optimizer, grad_clip=ensemble_config['grad_clip_value']) logging.info('train_acc %f', train_acc) logging.info('models_avg {}'.format(models_avg)) lr_scheduler.step() test_acc, test_obj, models_avg = ensemble_infer( test_loader, ensemble_model, criterion) logging.info('test_acc %f', test_acc) logging.info('models_avg {}'.format(models_avg)) if save_model_str: # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)" if not os.path.exists(save_model_str): os.mkdir(save_model_str) os.path.join(save_model_str, 'ENSEMBLE') torch.save(ensemble_model.state_dict(), os.path.join(save_model_str, time.ctime()))