def __init__(self, data_dir='../data', save_model_str='../model/', **kwargs): super().__init__(**kwargs) data_augmentations = transforms.ToTensor() self.save_model_str = save_model_str # Load the Data here self.train_dataset = K49(data_dir, True, data_augmentations) self.test_dataset = K49(data_dir, False, data_augmentations)
def test(model_res, batch_size=96): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') with open(model_res, 'rb') as f: model = pickle.load(f) data_augmentations = transforms.ToTensor() test_dataset = K49('../data', False, data_augmentations) #test_dataset = KMNIST('../data', False, data_augmentations) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=2) criterion = torch.nn.CrossEntropyLoss().to(device) score, loss = model.eval_fn(test_loader, device, criterion=criterion) plt.show()
def __init__(self, dataset, **kwargs): super().__init__(**kwargs) # Device configuration self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') batch_size = 96 # Load the data here data_dir = '../data' data_augmentations = None if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = None if (dataset == 'K49'): train_dataset = K49(data_dir, True, data_augmentations) else: train_dataset = KMNIST(data_dir, True, data_augmentations) self.input_shape = (train_dataset.channels, train_dataset.img_rows, train_dataset.img_cols) self.num_classes = train_dataset.n_classes num_train = len(train_dataset) indices = list(range(num_train)) split = int(np.floor(0.8 * num_train)) train_sampler = torch.utils.data.sampler.SubsetRandomSampler( indices[:split]) validation_sampler = torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]) self.train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=2) self.validation_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=batch_size, sampler=validation_sampler, num_workers=2)
def __init__(self, data_dir='../data', save_model_str='../model/', **kwargs): super().__init__(**kwargs) data_augmentations = transforms.ToTensor() self.save_model_str = save_model_str # Load the Data here self.train_dataset = K49(data_dir, True, data_augmentations) self.test_dataset = K49(data_dir, False, data_augmentations) self.data = '../data' self.batch_size = 64 self.learning_rate = 0.025 self.learning_rate_min = 0.001 self.momentum = 0.9 self.weight_decay = 3e-4 self.report_freq = 2 self.gpu = 0 self.epochs = 50 self.init_channels = 16 self.layers = 8 self.model_path = 'saved_models' self.cutout = False self.cutout_length = 16 self.drop_path_prob = 0.3 self.save = 'EXP' self.seed = 2 self.grad_clip = 5 self.train_portion = 0.5 self.unrolled = False #use one-step unrolled validation loss self.arch_learning_rate = 3e-4 #learning rate for arch encoding self.arch_weight_decay = 1e-3 #weight decay for arch encoding'
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() utils.load(model, args.model_path) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() data_dir = '../data/kmnist/' data_augmentations = transforms.ToTensor() # Load the Data here test_dataset = K49(data_dir, False, data_augmentations) test_queue = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) model.drop_path_prob = args.drop_path_prob test_acc, test_obj = infer(test_queue, model, criterion) logging.info('test_acc %f', test_acc)
def train_test(dataset, model_config, data_dir, num_epochs=10, batch_size=50, learning_rate=0.001, train_criterion=torch.nn.CrossEntropyLoss, model_optimizer=torch.optim.Adam, opti_aux_param=False, data_augmentations=None, save_model_str=None): ''' This function is exactly like the train() above. The reason to have a second copy is as follows: 1) Has no test=True/False parameter since it is assumed that test=True (Train not split to Validation) 2) Evaluates the Test set at each epoch (hence slower) 3) Tracks and returns two additional values - train & test loss over epochs 4) Has no code for cheaper evaluations on subsetted data Primarily used to generate results for a given configuration or effectively train & test a model :return: Model, Model statistics, confusion matrix, training loss over epochs, test loss over epochs ''' if train_criterion == torch.nn.MSELoss: train_criterion = train_criterion(reduction='mean') # not instantiated until now else: train_criterion = train_criterion() # Device configuration (fixed to cpu as we don't provide GPUs for the project) device = torch.device('cpu') # 'cuda:0' if torch.cuda.is_available() else 'cpu') # https://discuss.pytorch.org/t/data-augmentation-in-pytorch/7925/9 if data_augmentations is not None: data_augmentations = transforms.Compose([ transforms.ToPILImage(), transforms.RandomApply([transforms.RandomRotation(15), transforms.Resize((28, 28))]#, # transforms.RandomAffine(degrees=15, translate=(0,0.2), # scale=(0.8,1.2), shear=10)] , p=model_config['aug_prob']), transforms.ToTensor() ]) if data_augmentations is None: # We only use ToTensor here as that is al that is needed to make it work data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError if dataset == 'KMNIST': train_dataset = KMNIST(data_dir, True, data_augmentations) test_dataset = KMNIST(data_dir, False, data_augmentations) elif dataset == 'K49': train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) else: raise NotImplementedError train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) model = ConfigurableNet(model_config, num_classes=train_dataset.n_classes, height=train_dataset.img_rows, width=train_dataset.img_cols, channels=train_dataset.channels).to(device) total_model_params = np.sum(p.numel() for p in model.parameters()) equal_freq = [1 / train_dataset.n_classes for _ in range(train_dataset.n_classes)] logging.debug('Train Dataset balanced: {}'.format(np.allclose(train_dataset.class_frequency, equal_freq))) logging.debug(' Test Dataset balanced: {}'.format(np.allclose(test_dataset.class_frequency, equal_freq))) logging.info('Generated Network:') summary(model, (train_dataset.channels, train_dataset.img_rows, train_dataset.img_cols), device='cpu') # Train the model if model_optimizer == torch.optim.Adam: optimizer = model_optimizer(model.parameters(), lr=learning_rate, amsgrad=opti_aux_param) elif model_optimizer == torch.optim.SGD: optimizer = model_optimizer(model.parameters(), lr=learning_rate, momentum=opti_aux_param) else: optimizer = model_optimizer(model.parameters(), lr=learning_rate) total_step = len(train_loader) train_time = time.time() epoch_times = [] track_train_loss = [] track_test_loss = [] for epoch in range(num_epochs): logging.info('#' * 120) epoch_loss = [] epoch_start_time = time.time() for i_batch, (images, labels) in enumerate(train_loader): images = images.to(device) labels = labels.to(device) # Forward -> Backward <- passes outputs = model(images) # outputs.detach().numpy() if type(train_criterion) == torch.nn.MSELoss: one_hot = torch.zeros((len(labels), 10)) for i, l in enumerate(one_hot): one_hot[i][labels[i]] = 1 labels = one_hot loss = train_criterion(outputs, labels) epoch_loss.append(loss.data.numpy()) optimizer.zero_grad() # zero out gradients for new minibatch loss.backward() optimizer.step() if (i_batch + 1) % 100 == 0: logging.info('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, num_epochs, i_batch + 1, total_step, loss.item())) epoch_times.append(time.time() - epoch_start_time) test_score, test_loss, cm = eval(model, test_loader, device, train_criterion) track_test_loss.append(test_loss) track_train_loss.append(np.mean(epoch_loss)) train_time = time.time() - train_time # Test the model logging.info('~+~' * 40) model.eval() test_time = time.time() train_score, train_loss, _ = eval(model, train_loader, device, train_criterion, train=True) # if test: # test_score, test_loss = eval(model, test_loader, device, train_criterion) # else: # test_score, test_loss = eval(model, validation_loader, device, train_criterion) logging.info("Evaluation done") test_time = time.time() - test_time if save_model_str: logging.info("Saving model...") # Save the model checkpoint can be restored via "model = torch.load(save_model_str)" if os.path.exists(save_model_str): save_model_str += '_'.join(time.ctime()) torch.save(model.state_dict(), save_model_str) logging.info("Returning from train()") return train_score, train_loss, test_score, test_loss, train_time, test_time, total_model_params, model, \ track_train_loss, track_test_loss, cm
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) """ train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) """ data_dir = '../data/kmnist/' data_augmentations = transforms.ToTensor() # Load the Data here train_dataset = K49(data_dir, True, data_augmentations) #test_dataset = K49(data_dir, False, data_augmentations) num_train = len(train_dataset) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def f_nn(params): ''' lets say that you are making your own model from scratch, you could do something like this but be sure of the shapes that you get in(:number of inchannels) and also the the shape you output if params['choice']['layers']== 'two': self.fc1 = nn.Conv2d(channels, reduction, kernel_size=1, padding=0) # calling the model function here with obove paramters ''' model = torchModel() model.to(device) print('Params testing: ', params) batch_size = int(params['batch_size']) data_augmentations = transforms.ToTensor() data_dir = '../data' train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) Train_dataset_loader = train_loader Test_dataset_loader = test_loader criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=params['learning_rate']) print('chossen learning rate', params['learning_rate']) exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) epochs = params['epochs'] steps = 0 train_losses, test_losses = [], [] for e in range(epochs): correct = 0 average_precision = [] running_loss = 0 model.train() exp_lr_scheduler.step() for images, labels in Train_dataset_loader: images, labels = Variable(images), Variable(labels) images, labels = images.to(device), labels optimizer.zero_grad() log_ps = model(images) loss = criterion(log_ps, labels.to(device)) loss.backward() optimizer.step() running_loss += loss.item( ) # calculate loss for batch wise and add it to the previous value else: test_loss = 0 accuracy = 0 total = 0 # Turn off gradients for validation, saves memory and computations with torch.no_grad(): model.eval() for images, labels in Test_dataset_loader: images, labels = Variable(images), Variable(labels) images, labels = images.to(device), labels.to(device) ps = model(images) test_loss += criterion(ps, labels.to(device)) _, predicted = torch.max(ps.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() train_losses.append(running_loss / len(Train_dataset_loader)) test_losses.append(test_loss / len(Test_dataset_loader)) if e == epochs - 1: print("Epoch: {}/{}.. ".format(e + 1, epochs), "Training Loss: {:.3f}.. ".format(train_losses[-1]), "Test Loss: {:.3f}.. ".format(test_losses[-1]), "Test Accuracy: {:.3f}".format(correct / total)) print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total)) import matplotlib.pyplot as plt plt.plot(train_losses, label='Training loss') plt.plot(test_losses, label='Validation loss') plt.legend(frameon=False) loss = test_loss / len(Test_dataset_loader) return loss.detach().item()
def main(config, genotype, data_dir, num_epochs=10, batch_size=50, data_augmentations=None, save_model_str=None, exp_dir=None): """ Training loop for configurableNet. :param model_config: network config (dict) :param data_dir: dataset path (str) :param num_epochs: (int) :param batch_size: (int) :param learning_rate: model optimizer learning rate (float) :param train_criterion: Which loss to use during training (torch.nn._Loss) :param model_optimizer: Which model optimizer to use during trainnig (torch.optim.Optimizer) :param data_augmentations: List of data augmentations to apply such as rescaling. (list[transformations], transforms.Composition[list[transformations]], None) If none only ToTensor is used :return: """ # Device configuration device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) #train_dataset = KMNIST(data_dir, True, data_augmentations) num_train = len(train_dataset) indices = list(range(num_train)) split = int(np.floor(0.9 * num_train)) train_sampler = torch.utils.data.sampler.SubsetRandomSampler( indices[:split]) validation_sampler = torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=2) validation_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, sampler=validation_sampler, num_workers=2) ######################################################################################################################## model_config = { 'n_cells': config['n_cells'], 'init_channels': config['init_channels'], 'drop_path_prob': config['drop_path_prob'], 'n_hidden_layers': 1, #config['n_hidden_layers'], 'n_hidden_units': 256, #config['n_hidden_units'], 'dropout_ratio': 0.2 #config['dropout_ratio'], } model = dartsModel(genotype, model_config, input_shape=(train_dataset.channels, train_dataset.img_rows, train_dataset.img_cols), num_classes=train_dataset.n_classes).to(device) ''' model = torchModel(model_config, input_shape=(train_dataset.channels, train_dataset.img_rows, train_dataset.img_cols), num_classes=train_dataset.n_classes).to(device) ''' ######################################################################################################################## total_model_params = sum(p.numel() for p in model.parameters()) # instantiate optimizer optimizer = None weight_decay = 0.0 if (config['weight_decay_bool']): weight_decay = config['weight_decay'] lr = config['lr'] if (config['optimizer'] == 'adam'): optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) if (config['optimizer'] == 'sgd'): optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=config['sgd_momentum'], weight_decay=weight_decay) # instantiate training criterion _, occurences = np.unique(train_dataset.labels, return_counts=True) class_weights = torch.FloatTensor(1 / occurences).to(device) train_criterion = torch.nn.CrossEntropyLoss().to(device) logging.info('Generated Network:') summary(model, (train_dataset.channels, train_dataset.img_rows, train_dataset.img_cols), device='cuda' if torch.cuda.is_available() else 'cpu') history = { 'training': { 'loss': list(), 'acc': list() }, 'validation': { 'loss': list(), 'acc': list() } } # Train the model for epoch in range(num_epochs): logging.info('#' * 50) logging.info('Epoch [{}/{}]'.format(epoch + 1, num_epochs)) model.drop_path_prob = model_config[ 'drop_path_prob'] * epoch / num_epochs score, loss = model.train_fn(optimizer, train_criterion, train_loader, device) logging.info('Training finished | loss: %f | acc: %f \n' % (loss, score)) history['training']['loss'].append(loss) history['training']['acc'].append(score) score, loss = model.eval_fn(validation_loader, device, criterion=train_criterion) logging.info('Validation finished | loss: %f | acc: %f \n' % (loss, score)) history['validation']['loss'].append(loss) history['validation']['acc'].append(score) if save_model_str: # Save the model checkpoint can be restored via "model = torch.load(save_model_str)" if os.path.exists(save_model_str): save_model_str += '_'.join(time.ctime()) torch.save(model.state_dict(), save_model_str) history_res = exp_dir + '/history' model_res = exp_dir + '/model' with open(model_res, 'wb') as f: pickle.dump(model, f) with open(history_res, 'wb') as f: pickle.dump(history, f)
def create_run_ensemble(model_state_list, n_layers, grad_clip_value=5, seed=0, num_epochs=20, learning_rate=0.001, init_channels=get('init_channels'), batch_size=get('batch_size'), genotype_class='PCDARTS'): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled=True torch.cuda.manual_seed(seed) logging.info('gpu device = %s' % gpu) logging.info("config = %s", config) if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) # train_dataset = KMNIST(data_dir, True, data_augmentations) # test_dataset = KMNIST(data_dir, False, data_augmentations) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) genotype = eval("genotypes.%s" % genotype_class) dataset = dict() dims = [] for i, model_state in enumerate(model_state_list): model = Network(init_channels, train_dataset.n_classes, n_layers, genotype) model.load_state_dict(torch.load(model_state)) model.cuda() for p in model.parameters(): p.requires_grad = False trn_labels = [] trn_features = [] if i == 0: for d,la in train_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) trn_labels.extend(la) trn_features.extend(o.cpu().data) test_labels = [] test_features = [] for d,la in test_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) test_labels.extend(la) test_features.extend(o.cpu().data) dataset['trn_labels'] = trn_labels dataset['test_labels'] = test_labels else: for d,la in train_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) trn_features.extend(o.cpu().data) test_labels = [] test_features = [] for d,la in test_loader: o = model(Variable(d.cuda())) o = o.view(o.size(0),-1) test_features.extend(o.cpu().data) dataset['trn_features'].extend(trn_features) dims.extend(dataset['trn_features'][i][0].size(0)) dataset['test_features'].extend(test_features) trn_feat_dset = FeaturesDataset(dataset['trn_features'][0],dataset['trn_features'][1],dataset['trn_features'][2],dataset['trn_labels']) test_feat_dset = FeaturesDataset(dataset['test_features'][0],dataset['test_features'][1],dataset['test_features'][2],dataset['test_labels']) trn_feat_loader = DataLoader(trn_feat_dset,batch_size=64,shuffle=True) test_feat_loader = DataLoader(val_feat_dset,batch_size=64) model = EnsembleModel(dims, out_size=train_dataset.n_classes) criterion = torch.nn.optim.CrossEntropyLoss criterion = criterion.cuda() optimizer = torch.nn.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) for epoch in range(num_epochs): epoch_loss, epoch_accuracy = fit(epoch,model,trn_feat_loader,critierion, training=True) val_epoch_loss , val_epoch_accuracy = fit(epoch,model, test_feat_loader, criterion, training=False) if save_model_str: # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)" if not os.path.exists(save_model_str): os.mkdir(save_model_str) torch.save(model.state_dict(), os.path.join(save_model_str, time.ctime()))
def train(dataset, model_config, data_dir, num_epochs=10, batch_size=50, learning_rate=0.001, train_criterion=torch.nn.CrossEntropyLoss, model_optimizer=torch.optim.Adam, data_augmentations=None, save_model_str=None): """ Training loop for configurableNet. :param dataset: which dataset to load (str) :param model_config: configurableNet config (dict) :param num_epochs: (int) :param batch_size: (int) :param learning_rate: model optimizer learning rate (float) :param train_criterion: Which loss to use during training (torch.nn._Loss) :param model_optimizer: Which model optimizer to use during trainnig (torch.optim.Optimizer) :param data_augmentations: List of data augmentations to apply such as rescaling. (list[transformations], transforms.Composition[list[transformations]], None) If none only ToTensor is used :return: """ train_criterion = train_criterion() # not instantiated until now # Device configuration (fixed to cpu as we don't provide GPUs for the project) device = torch.device('cpu') # 'cuda:0' if torch.cuda.is_available() else 'cpu') if data_augmentations is None: # We only use ToTensor here as that is al that is needed to make it work data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError if dataset == 'KMNIST': train_dataset = KMNIST(data_dir, True, data_augmentations) test_dataset = KMNIST(data_dir, False, data_augmentations) elif dataset == 'K49': train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) else: raise NotImplementedError # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) model = ConfigurableNet(model_config, num_classes=train_dataset.n_classes, height=train_dataset.img_rows, width=train_dataset.img_cols, channels=train_dataset.channels).to(device) total_model_params = np.sum(p.numel() for p in model.parameters()) equal_freq = [1 / train_dataset.n_classes for _ in range(train_dataset.n_classes)] logging.debug('Train Dataset balanced: {}'.format(np.allclose(test_dataset.class_frequency, equal_freq))) logging.debug(' Test Dataset balanced: {}'.format(np.allclose(test_dataset.class_frequency, equal_freq))) logging.info('Generated Network:') summary(model, (train_dataset.channels, train_dataset.img_rows, train_dataset.img_cols), device='cpu') # Train the model optimizer = model_optimizer(model.parameters(), lr=learning_rate) total_step = len(train_loader) train_time = time.time() epoch_times = [] for epoch in range(num_epochs): logging.info('#' * 120) epoch_start_time = time.time() for i_batch, (images, labels) in enumerate(train_loader): images = images.to(device) labels = labels.to(device) # Forward -> Backward <- passes outputs = model(images) loss = train_criterion(outputs, labels) optimizer.zero_grad() # zero out gradients for new minibatch loss.backward() optimizer.step() if (i_batch + 1) % 100 == 0: logging.info('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, num_epochs, i_batch + 1, total_step, loss.item())) epoch_times.append(time.time() - epoch_start_time) train_time = time.time() - train_time # Test the model logging.info('~+~' * 40) model.eval() test_time = time.time() train_score = eval(model, train_loader, device, train=True) test_score = eval(model, test_loader, device) test_time = time.time() - test_time if save_model_str: # Save the model checkpoint can be restored via "model = torch.load(save_model_str)" if os.path.exists(save_model_str): save_model_str += '_'.join(time.ctime()) torch.save(model.state_dict(), save_model_str) return train_score, test_score, train_time, test_time, total_model_params, model
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device('cuda:0') cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = torch.nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, K49_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) data_dir = './data/' data_augmentations = transforms.ToTensor() # Load the Data here train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) num_train = len(train_dataset) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): lr = scheduler.get_lr()[0] scheduler.step() logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) return train_acc, valid_acc
def main(model_config, data_dir, num_epochs=10, batch_size=50, learning_rate=0.001, train_criterion=torch.nn.CrossEntropyLoss, model_optimizer=torch.optim.Adam, data_augmentations=None, save_model_str=None): """ Training loop for configurableNet. :param model_config: network config (dict) :param data_dir: dataset path (str) :param num_epochs: (int) :param batch_size: (int) :param learning_rate: model optimizer learning rate (float) :param train_criterion: Which loss to use during training (torch.nn._Loss) :param model_optimizer: Which model optimizer to use during trainnig (torch.optim.Optimizer) :param data_augmentations: List of data augmentations to apply such as rescaling. (list[transformations], transforms.Composition[list[transformations]], None) If none only ToTensor is used :return: """ # Device configuration device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) model = torchModel(model_config, input_shape=(train_dataset.channels, train_dataset.img_rows, train_dataset.img_cols), num_classes=train_dataset.n_classes).to(device) total_model_params = np.sum(p.numel() for p in model.parameters()) # instantiate optimizer optimizer = model_optimizer(model.parameters(), lr=learning_rate) # instantiate training criterion train_criterion = train_criterion().to(device) logging.info('Generated Network:') summary(model, (train_dataset.channels, train_dataset.img_rows, train_dataset.img_cols), device='cuda' if torch.cuda.is_available() else 'cpu') # Train the model for epoch in range(num_epochs): logging.info('#' * 50) logging.info('Epoch [{}/{}]'.format(epoch + 1, num_epochs)) train_score, train_loss = model.train_fn(optimizer, train_criterion, train_loader, device) logging.info('Train accuracy %f', train_score) test_score = model.eval_fn(test_loader, device) logging.info('Test accuracy %f', test_score) if save_model_str: # Save the model checkpoint can be restored via "model = torch.load(save_model_str)" if os.path.exists(save_model_str): save_model_str += '_'.join(time.ctime()) torch.save(model.state_dict(), save_model_str)
def main(exp_dir=None): ### LOGGING ### if (exp_dir == None): exp_dir = 'experiment-{}'.format( datetime.now().strftime("%Y%m%d-%H%M%S%f")) utils.create_exp_dir(exp_dir) #args.d_save = exp_dir+'/darts_search' #utils.create_exp_dir(args.d_save, scripts_to_save=glob.glob('*.py')) #log_format = '%(asctime)s %(message)s' #logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') #fh = logging.FileHandler(os.path.join(args.d_save, 'log.txt')) #fh.setFormatter(logging.Formatter(log_format)) #logging.getLogger().addHandler(fh) ############### if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) #np.random.seed(args.d_seed) torch.cuda.set_device(args.d_gpu) cudnn.benchmark = True #torch.manual_seed(args.d_seed) cudnn.enabled = True #torch.cuda.manual_seed(args.d_seed) logging.info('gpu device = %d' % args.d_gpu) logging.info("args = %s", args) ######## data_dir = '../data' data_augmentations = None if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_data = None if (args.d_data == 'K49'): train_data = K49(data_dir, True, data_augmentations) CIFAR_CLASSES = 49 else: train_data = KMNIST(data_dir, True, data_augmentations) ######### criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.d_init_channels, CIFAR_CLASSES, args.d_layers, criterion) model = model.cuda() logging.info("Param size = %fMB", utils.count_parameters_in_MB(model)) logging.info('Total # of params: %d', sum(p.numel() for p in model.parameters())) optimizer = torch.optim.SGD(model.parameters(), args.d_learning_rate, momentum=args.d_momentum, weight_decay=args.d_weight_decay) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.d_train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.d_batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.d_batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.d_epochs), eta_min=args.d_learning_rate_min) architect = Architect(model, args) for epoch in range(args.d_epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) if (epoch == args.d_epochs - 1): architecture_res = exp_dir + '/arch' with open(architecture_res, 'wb') as f: pickle.dump(genotype, f) logging.info(F.softmax(model.alphas_normal, dim=-1)) logging.info(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc)
def train(dataset, model_config, data_dir, num_epochs=10, batch_size=50, learning_rate=0.001, train_criterion=torch.nn.CrossEntropyLoss, model_optimizer=torch.optim.Adam, opti_aux_param=False, data_augmentations=None, save_model_str=None, test=False): ''' Training loop for configurableNet. :param dataset: which dataset to load (str) :param model_config: configurableNet config (dict) :param data_dir: folder dump from where KMNIST/K49 can be loaded :param num_epochs: (int) :param batch_size: (int) :param learning_rate: model optimizer learning rate (float) :param train_criterion: Which loss to use during training (torch.nn._Loss) :param model_optimizer: Which model optimizer to use during trainnig (torch.optim.Optimizer) :param opti_aux_param: :param data_augmentations: List of data augmentations to apply such as rescaling. (list[transformations], transforms.Composition[list[transformations]], None) If none only ToTensor is used :param save_model_str: Directory to save the model :param test: True/False on whether the test set to be evaluated or validation obtained from training set :return: Model, Model statistics, confusion matrix ''' if train_criterion == torch.nn.MSELoss: train_criterion = train_criterion(reduction='mean') # not instantiated until now else: train_criterion = train_criterion() # Device configuration (fixed to cpu as we don't provide GPUs for the project) device = torch.device('cpu') # 'cuda:0' if torch.cuda.is_available() else 'cpu') # Adding Rotation and Shear as transforms for Data Augmentation # https://discuss.pytorch.org/t/data-augmentation-in-pytorch/7925/9 # if data_augmentations is not None: # data_augmentations = transforms.Compose([ # transforms.ToPILImage(), # transforms.RandomApply([transforms.RandomRotation(15), # #transforms.Resize((28, 28)), # transforms.RandomAffine(degrees=15, shear=10)] # , p=model_config['aug_prob']), # transforms.ToTensor() # ]) if data_augmentations is None: # We only use ToTensor here as that is al that is needed to make it work data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError if dataset == 'KMNIST': train_dataset = KMNIST(data_dir, True, data_augmentations) test_dataset = KMNIST(data_dir, False, data_augmentations) elif dataset == 'K49': train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) else: raise NotImplementedError # fidelity_limit = 9 # Budget/Epochs under which the data will be sampled # # Cheap evaluations for low budget (Optimistic compromise) # Samples f_min samples from each class # where f_min = # of data points available for the lowest frequent class # # if num_epochs < fidelity_limit: # # Sampling from all classes equally # label_dict = {} # for i in range(len(train_dataset)): # c = train_dataset[i][-1] # if c not in label_dict.keys(): # label_dict[c] = [i] # else: # label_dict[c].append(i) # num_classes = len(label_dict.keys()) # # Frequency of most under-represented class # f_min = len(train_dataset) # for keys in label_dict.keys(): # if len(label_dict[keys]) < f_min: # f_min = len(label_dict[keys]) # selected_data = np.array([]) # for label in label_dict.keys(): # # Samples 2*f_min samples from each class (with replacement for classes with lesser data points) # selected_data = np.append(selected_data, np.random.choice(label_dict[label], 2*f_min)) # Decides if evaluation is on Test set or Validation set obtained from Train if test is False: # if num_epochs < fidelity_limit: # dataset_size = len(selected_data) # indices = list(selected_data.astype(int)) # else: dataset_size = len(train_dataset) indices = list(range(dataset_size)) validation_split = 0.3 # Splitting the Training Set into Train-Validation by 70%-30% split = int(np.floor(validation_split * dataset_size)) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=valid_sampler) else: # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) model = ConfigurableNet(model_config, num_classes=train_dataset.n_classes, height=train_dataset.img_rows, width=train_dataset.img_cols, channels=train_dataset.channels).to(device) total_model_params = np.sum(p.numel() for p in model.parameters()) equal_freq = [1 / train_dataset.n_classes for _ in range(train_dataset.n_classes)] logging.debug('Train Dataset balanced: {}'.format(np.allclose(train_dataset.class_frequency, equal_freq))) logging.debug(' Test Dataset balanced: {}'.format(np.allclose(test_dataset.class_frequency, equal_freq))) logging.info('Generated Network:') summary(model, (train_dataset.channels, train_dataset.img_rows, train_dataset.img_cols), device='cpu') # Train the model if model_optimizer == torch.optim.Adam: optimizer = model_optimizer(model.parameters(), lr=learning_rate, amsgrad=opti_aux_param) elif model_optimizer == torch.optim.SGD: optimizer = model_optimizer(model.parameters(), lr=learning_rate, momentum=opti_aux_param) else: optimizer = model_optimizer(model.parameters(), lr=learning_rate) total_step = len(train_loader) train_time = time.time() epoch_times = [] for epoch in range(num_epochs): logging.info('#' * 120) epoch_start_time = time.time() for i_batch, (images, labels) in enumerate(train_loader): images = images.to(device) labels = labels.to(device) # Forward -> Backward <- passes outputs = model(images) # outputs.detach().numpy() if type(train_criterion) == torch.nn.MSELoss: one_hot = torch.zeros((len(labels), 10)) for i, l in enumerate(one_hot): one_hot[i][labels[i]] = 1 labels = one_hot loss = train_criterion(outputs, labels) optimizer.zero_grad() # zero out gradients for new minibatch loss.backward() optimizer.step() if (i_batch + 1) % 100 == 0: logging.info('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, num_epochs, i_batch + 1, total_step, loss.item())) epoch_times.append(time.time() - epoch_start_time) train_time = time.time() - train_time # Test the model logging.info('~+~' * 40) model.eval() test_time = time.time() train_score, train_loss, _ = eval(model, train_loader, device, train_criterion, train=True) # Decides if evaluation is on Test set or Validation set obtained from Train if test: test_score, test_loss, cm = eval(model, test_loader, device, train_criterion) else: test_score, test_loss, cm = eval(model, validation_loader, device, train_criterion) logging.info("Evaluation done") test_time = time.time() - test_time if save_model_str: logging.info("Saving model...") # Save the model checkpoint can be restored via "model = torch.load(save_model_str)" if os.path.exists(save_model_str): save_model_str += '_'.join(time.ctime()) torch.save(model.state_dict(), save_model_str) logging.info("Returning from train()") return train_score, train_loss, test_score, test_loss, train_time, test_time, total_model_params, model, cm
def train( dataset, # model_config, old_model, data_dir, num_epochs=10, batch_size=50, learning_rate=0.001, train_criterion=torch.nn.CrossEntropyLoss, model_optimizer=torch.optim.Adam, opti_aux_param=False, data_augmentations=None, save_model_str=None, test=False): """ Training loop for configurableNet Enables Transfer Learning by readjusting the output layer to the # of classes in the 'dataset' passed :param dataset: which dataset to load (str) :param model_config: configurableNet config (dict) :param num_epochs: (int) :param batch_size: (int) :param learning_rate: model optimizer learning rate (float) :param train_criterion: Which loss to use during training (torch.nn._Loss) :param model_optimizer: Which model optimizer to use during trainnig (torch.optim.Optimizer) :param data_augmentations: List of data augmentations to apply such as rescaling. (list[transformations], transforms.Composition[list[transformations]], None) If none only ToTensor is used :return: """ if train_criterion == torch.nn.MSELoss: train_criterion = train_criterion( reduction='mean') # not instantiated until now else: train_criterion = train_criterion() # Device configuration (fixed to cpu as we don't provide GPUs for the project) device = torch.device( 'cpu') # 'cuda:0' if torch.cuda.is_available() else 'cpu') # Adding Rotation and Shear as transforms for Data Augmentation # https://discuss.pytorch.org/t/data-augmentation-in-pytorch/7925/9 # if data_augmentations is not None: # print('-+-'*40) # print("Data Aug happening!") # print('-+-'*40) # data_augmentations = transforms.Compose([ # transforms.ToPILImage(), # transforms.RandomApply([transforms.RandomRotation(15), # transforms.Resize((28, 28))]#, # # transforms.RandomAffine(degrees=15, translate=(0,0.2), # # scale=(0.8,1.2), shear=10)] # , p=0.3), # transforms.ToTensor() # ]) if data_augmentations is None: # We only use ToTensor here as that is al that is needed to make it work data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError if dataset == 'KMNIST': train_dataset = KMNIST(data_dir, True, data_augmentations) test_dataset = KMNIST(data_dir, False, data_augmentations) elif dataset == 'K49': train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) else: raise NotImplementedError # Though transfer_learning.py always passes test=True, this condition remains (for parity's sake) if test is False: dataset_size = len(train_dataset) indices = list(range(dataset_size)) validation_split = 0.3 split = int(np.floor(validation_split * dataset_size)) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=valid_sampler) else: # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # Copying incumbent's training parameters model_config = old_model.config # Rebuilding parent model and assigning learnt weights # old_model = old_model.state_dict() keys = old_model.state_dict().keys() k = [] for key in keys: k.append(key) n_classes = len(old_model.state_dict()[key]) channels = old_model.state_dict()[k[0]].shape[0] model = ConfigurableNet(model_config, num_classes=train_dataset.n_classes, height=train_dataset.img_rows, width=train_dataset.img_cols, channels=train_dataset.channels).to(device) # Old model weights assigned wherever applicable - new connections at the output layer has random weights params1 = old_model.named_parameters() params2 = model.named_parameters() dict_params2 = dict(params2) output_keys = [] for i, k in enumerate(keys): if i >= len(keys) - 2: output_keys.append(k) for name1, param1 in params1: if name1 not in output_keys: dict_params2[name1].data.copy_(param1.data) model.load_state_dict = collections.OrderedDict(dict_params2) # model.load_state_dict = old_model total_model_params = np.sum(p.numel() for p in model.parameters()) equal_freq = [ 1 / train_dataset.n_classes for _ in range(train_dataset.n_classes) ] logging.debug('Train Dataset balanced: {}'.format( np.allclose(train_dataset.class_frequency, equal_freq))) logging.debug(' Test Dataset balanced: {}'.format( np.allclose(test_dataset.class_frequency, equal_freq))) logging.info('Generated Network:') summary(model, (train_dataset.channels, train_dataset.img_rows, train_dataset.img_cols), device='cpu') # Train the model if model_optimizer == torch.optim.Adam: optimizer = model_optimizer(model.parameters(), lr=learning_rate, amsgrad=opti_aux_param) elif model_optimizer == torch.optim.SGD: optimizer = model_optimizer(model.parameters(), lr=learning_rate, momentum=opti_aux_param) else: optimizer = model_optimizer(model.parameters(), lr=learning_rate) total_step = len(train_loader) train_time = time.time() epoch_times = [] for epoch in range(num_epochs): logging.info('#' * 120) epoch_start_time = time.time() for i_batch, (images, labels) in enumerate(train_loader): images = images.to(device) labels = labels.to(device) # Forward -> Backward <- passes outputs = model(images) # outputs.detach().numpy() if type(train_criterion) == torch.nn.MSELoss: one_hot = torch.zeros((len(labels), 10)) for i, l in enumerate(one_hot): one_hot[i][labels[i]] = 1 labels = one_hot loss = train_criterion(outputs, labels) optimizer.zero_grad() # zero out gradients for new minibatch loss.backward() optimizer.step() if (i_batch + 1) % 100 == 0: logging.info( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, num_epochs, i_batch + 1, total_step, loss.item())) epoch_times.append(time.time() - epoch_start_time) train_time = time.time() - train_time # Test the model logging.info('~+~' * 40) model.eval() test_time = time.time() train_score, train_loss, _ = eval(model, train_loader, device, train_criterion, train=True) if test: test_score, test_loss, cm = eval(model, test_loader, device, train_criterion) else: test_score, test_loss, cm = eval(model, validation_loader, device, train_criterion) logging.info("Evaluation done") test_time = time.time() - test_time if save_model_str: logging.info("Saving model...") # Save the model checkpoint can be restored via "model = torch.load(save_model_str)" if os.path.exists(save_model_str): save_model_str += '_'.join(time.ctime()) torch.save(model.state_dict(), save_model_str) logging.info("Returning from train()") return train_score, train_loss, test_score, test_loss, train_time, test_time, total_model_params, model, cm
def create_run_ensemble(model_description, ensemble_config, seed=get('seed'), num_epochs=20, data_dir='./data', init_channels=get('init_channels'), batch_size=get('batch_size'), genotype_class='PCDARTS', data_augmentations=None, save_model_str=None): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled = True torch.cuda.manual_seed(seed) logging.info('gpu device = %s' % gpu) if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) # train_dataset = KMNIST(data_dir, True, data_augmentations) # test_dataset = KMNIST(data_dir, False, data_augmentations) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) genotype = eval("genotypes.%s" % genotype_class) trained_models = [] for i, model_state in enumerate(model_description.keys()): model = Network( init_channels, train_dataset.n_classes, model_description[model_state]['config']['n_conv_layers'], genotype) model.load_state_dict( torch.load(model_description[model_state]['model_path'])) model.cuda() model.drop_path_prob = model_description[model_state]['config'][ 'drop_path_prob'] trained_models.append(model) ensemble_model = EnsembleModel(trained_models, dense_units=ensemble_config['dense_units'], out_size=train_dataset.n_classes) ensemble_model = ensemble_model.cuda() summary(ensemble_model, input_size=(1, 28, 28)) criterion = torch.nn.CrossEntropyLoss() criterion = criterion.cuda() if ensemble_config['optimizer'] == 'sgd': optimizer = torch.optim.SGD( model.parameters(), lr=ensemble_config['initial_lr'], momentum=ensemble_config['sgd_momentum'], weight_decay=ensemble_config['weight_decay'], nesterov=ensemble_config['nesterov']) else: optimizer = get('opti_dict')[ensemble_config['optimizer']]( model.parameters(), lr=ensemble_config['initial_lr'], weight_decay=ensemble_config['weight_decay']) if ensemble_config['lr_scheduler'] == 'Cosine': lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, num_epochs) elif ensemble_config['lr_scheduler'] == 'Exponential': lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1) print('Started Training') for epoch in range(num_epochs): logging.info('epoch %d lr %e', epoch, lr_scheduler.get_lr()[0]) model.drop_path_prob = ensemble_config[ 'drop_path_prob'] * epoch / num_epochs for p in ensemble_model.model_1.parameters(): p.requires_grad = False for p in ensemble_model.model_2.parameters(): p.requires_grad = False for p in ensemble_model.model_3.parameters(): p.requires_grad = False for p in ensemble_model.out_classifier.parameters(): p.requires_grad = True train_acc, train_obj, models_avg = ensemble_train( train_loader, ensemble_model, criterion, optimizer, grad_clip=ensemble_config['grad_clip_value']) logging.info('train_acc %f', train_acc) logging.info('models_avg {}'.format(models_avg)) lr_scheduler.step() test_acc, test_obj, models_avg = ensemble_infer( test_loader, ensemble_model, criterion) logging.info('test_acc %f', test_acc) logging.info('models_avg {}'.format(models_avg)) if save_model_str: # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)" if not os.path.exists(save_model_str): os.mkdir(save_model_str) os.path.join(save_model_str, 'ENSEMBLE') torch.save(ensemble_model.state_dict(), os.path.join(save_model_str, time.ctime()))
def run_model(config, seed=0, data_dir='./data', genotype_class='PCDARTS', num_epochs=20, batch_size=get('batch_size'), init_channels=get('init_channels'), train_criterion=torch.nn.CrossEntropyLoss, data_augmentations=None, save_model_str=None, **kwargs): """ Training loop for configurableNet. :param model_config: network config (dict) :param data_dir: dataset path (str) :param num_epochs: (int) :param batch_size: (int) :param learning_rate: model optimizer learning rate (float) :param train_criterion: Which loss to use during training (torch.nn._Loss) :param model_optimizer: Which model optimizer to use during trainnig (torch.optim.Optimizer) :param data_augmentations: List of data augmentations to apply such as rescaling. (list[transformations], transforms.Composition[list[transformations]], None) If none only ToTensor is used :return: """ # instantiate optimize if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled=True torch.cuda.manual_seed(seed) logging.info('gpu device = %s' % gpu) logging.info("config = %s", config) if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) # train_dataset = KMNIST(data_dir, True, data_augmentations) # test_dataset = KMNIST(data_dir, False, data_augmentations) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) genotype = eval("genotypes.%s" % genotype_class) model = Network(init_channels, train_dataset.n_classes, config['n_conv_layers'], genotype) model = model.cuda() total_model_params = np.sum(p.numel() for p in model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = train_criterion criterion = criterion.cuda() if config['optimizer'] == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=config['initial_lr'], momentum=config['sgd_momentum'], weight_decay=config['weight_decay'], nesterov=config['nesterov']) else: optimizer = get('opti_dict')[config['optimizer']](model.parameters(), lr=config['initial_lr'], weight_decay=config['weight_decay']) if config['lr_scheduler'] == 'Cosine': lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epochs) elif config['lr_scheduler'] == 'Exponential': lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1) logging.info('Generated Network:') summary(model, (train_dataset.channels, train_dataset.img_rows, train_dataset.img_cols), device='cuda' if torch.cuda.is_available() else 'cpu') for epoch in range(num_epochs): lr_scheduler.step() logging.info('epoch %d lr %e', epoch, lr_scheduler.get_lr()[0]) model.drop_path_prob = config['drop_path_prob'] * epoch / num_epochs train_acc, train_obj = train(train_loader, model, criterion, optimizer, grad_clip=config['grad_clip_value']) logging.info('train_acc %f', train_acc) test_acc, test_obj = infer(test_loader, model, criterion) logging.info('test_acc %f', test_acc) if save_model_str: # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)" if os.path.exists(save_model_str): save_model_str += '_'.join(time.ctime()) torch.save(model.state_dict(), save_model_str) return test_acc
def create_majority_ensemble(model_description, ensemble_config, seed=get('seed'), num_epochs=20, data_dir='./data', init_channels=get('init_channels'), batch_size=get('batch_size'), genotype_class='PCDARTS', data_augmentations=None, save_model_str=None): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled = True torch.cuda.manual_seed(seed) logging.info('gpu device = %s' % gpu) if data_augmentations is None: # You can add any preprocessing/data augmentation you want here data_augmentations = transforms.ToTensor() elif isinstance(type(data_augmentations), list): data_augmentations = transforms.Compose(data_augmentations) elif not isinstance(data_augmentations, transforms.Compose): raise NotImplementedError train_dataset = K49(data_dir, True, data_augmentations) test_dataset = K49(data_dir, False, data_augmentations) # train_dataset = KMNIST(data_dir, True, data_augmentations) # test_dataset = KMNIST(data_dir, False, data_augmentations) # Make data batch iterable # Could modify the sampler to not uniformly random sample train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) criterion = torch.nn.CrossEntropyLoss() criterion = criterion.cuda() genotype = eval("genotypes.%s" % genotype_class) trained_models = [] for i, model_state in enumerate(model_description.keys()): model = Network( init_channels, train_dataset.n_classes, model_description[model_state]['config']['n_conv_layers'], genotype) model.load_state_dict( torch.load(model_description[model_state]['model_path'])) model.cuda() model.drop_path_prob = model_description[model_state]['config'][ 'drop_path_prob'] trained_models.append(model) ensemble_model = MajorityEnsembleModel(trained_models) print('Started Training') for epoch in range(num_epochs): test_acc, models_avg = majority_predict(test_loader, ensemble_model, criterion, 50) logging.info('test_acc %f', test_acc) logging.info('models_avg {}'.format(models_avg)) if save_model_str: # Save the model checkpoint, can be restored via "model = torch.load(save_model_str)" if not os.path.exists(save_model_str): os.mkdir(save_model_str) os.path.join(save_model_str, 'ENSEMBLE') torch.save(ensemble_model.state_dict(), os.path.join(save_model_str, time.ctime()))