def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') # Model parameters parser.add_argument('--model', type=str, default='MLPBinaryConnect', help='Model name: MLPBinaryNet, MLPBinaryConnect_STE') parser.add_argument('--bnmomentum', type=float, default=0.15, help='BN layer momentum value') # Optimization parameters parser.add_argument('--optim', type=str, default='BayesBiNN', help='Optimizer: BayesBiNN, STE, or Adam') parser.add_argument('--val-split', type=float, default=0.1, help='Random validation set ratio') parser.add_argument('--criterion', type=str, default='cross-entropy', help='loss funcion: square-hinge or cross-entropy') parser.add_argument('--batch-size', type=int, default=100, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument( '--train-samples', type=int, default=1, metavar='N', help='number of Monte Carlo samples used in BayesBiNN (default: 1)') parser.add_argument( '--test-samples', type=int, default=0, metavar='N', help= 'number of Monte Carlo samples used in evaluation for BayesBiNN (default: 1), if 0, point estimate using mean' 'is applied, which is similar to the Bop optimizer') parser.add_argument('--epochs', type=int, default=500, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=1e-4, metavar='LR', help='learning rate (default: 0.0001)') parser.add_argument('--lr-end', type=float, default=1e-16, metavar='LR-end', help='learning rate (default: 0.01)') parser.add_argument( '--lr-decay', type=float, default=0.9, metavar='LR-decay', help='learning rated decay factor for each epoch (default: 0.9)') parser.add_argument('--decay-steps', type=int, default=1, metavar='N', help='LR rate decay steps (default: 1)') parser.add_argument('--momentum', type=float, default=0.0, metavar='M', help='BayesBiNN momentum (default: 0.0)') parser.add_argument('--data-augmentation', action='store_true', default=False, help='Enable data augmentation') # Logging parameters parser.add_argument( '--log-interval', type=int, default=500, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument('--experiment-id', type=int, default=0, help='Experiment ID for log files (int)') # Computation parameters parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--lrschedular', type=str, default='Cosine', help='Mstep,Expo,Cosine') parser.add_argument('--drop-prob', type=float, default=0.2, help='dropout rate') parser.add_argument('--trainset_scale', type=int, default=1, help='scale of training set') parser.add_argument( '--lamda', type=float, default=10, metavar='lamda-init', help='initial mean value of the natural parameter lamda(default: 10)') parser.add_argument( '--lamda-std', type=float, default=0, metavar='lamda-init', help='linitial std value of the natural parameter lamda(default: 0)') parser.add_argument('--temperature', type=float, default=1e-10, metavar='temperature', help='temperature for BayesBiNN (default: 1e-8)') parser.add_argument('--kl-reweight', type=float, default=1.0, metavar='min temperature', help='initial temperature for BayesBiNN (default: 1)') parser.add_argument( '--bn-affine', type=float, default=0, metavar='bn-affine', help= 'whether there is bn learnable parameters, 1: learnable, 0: no (default: 0)' ) args = parser.parse_args() if args.model == 'MLPBinaryConnect_STE': args.optim = 'STE' # in this case, only STE optimizer is used if args.lr_decay > 1: raise ValueError( 'The end learning rate should be smaller than starting rate!!') args.use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed + args.experiment_id) np.random.seed(args.seed + args.experiment_id) now = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime(time.time())) # to avoid overwrite args.out_dir = os.path.join( './outputs', 'mnist_{}_{}_lr{}_{}_id{}'.format(args.model, args.optim, args.lr, now, args.experiment_id)) os.makedirs(args.out_dir, exist_ok=True) config_save_path = os.path.join( args.out_dir, 'configs', 'config_{}.json'.format(args.experiment_id)) os.makedirs(os.path.dirname(config_save_path), exist_ok=True) with open(config_save_path, 'w') as f: json.dump(args.__dict__, f, indent=2) args.device = torch.device("cuda" if args.use_cuda else "cpu") print('Running on', args.device) print('===========================') for key, val in vars(args).items(): print('{}: {}'.format(key, val)) print('===========================\n') # Data augmentation for MNIST if args.data_augmentation: transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )), ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) transform_test = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) # Defining the datasets kwargs = { 'num_workers': 2, 'pin_memory': True, 'drop_last': True } if args.use_cuda else {} train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform_train) if args.val_split > 0 and args.val_split < 1: val_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform_test) num_train = len(train_dataset) indices = list(range(num_train)) split = int(np.floor(args.val_split * num_train)) np.random.shuffle(indices) train_idx, val_idx = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_idx) val_sampler = SubsetRandomSampler(val_idx) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, sampler=train_sampler, **kwargs) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, sampler=val_sampler, **kwargs) print('{} train and {} validation datapoints.'.format( len(train_loader.sampler), len(val_loader.sampler))) else: train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = None print('{} train and {} validation datapoints.'.format( len(train_loader.sampler), 0)) test_dataset = datasets.MNIST('./data', train=False, transform=transform_test) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=True, **kwargs) print('{} test datapoints.\n'.format(len(test_loader.sampler))) # Defining the model in_features, out_features = 28 * 28, 10 num_units = 2048 if args.model == 'MLPBinaryConnect': model = MLPBinaryConnect(in_features, out_features, num_units, eps=1e-4, drop_prob=args.drop_prob, momentum=args.bnmomentum, batch_affine=(args.bn_affine == 1)) elif args.model == 'MLPBinaryConnect_STE': model = MLPBinaryConnect_STE(in_features, out_features, num_units, eps=1e-4, drop_prob=args.drop_prob, momentum=args.bnmomentum, batch_affine=(args.bn_affine == 1)) args.optim = 'STE' else: raise ValueError( 'Please select a network out of {MLP, BinaryConnect, BinaryNet}') print(model) model = model.to(args.device) # Defining the optimizer if args.optim == 'Adam' or args.optim == 'STE': optimizer = optim.Adam(model.parameters(), lr=args.lr) elif args.optim == 'BayesBiNN': effective_trainsize = len(train_loader.sampler) * args.trainset_scale optimizer = BayesBiNN(model, lamda_init=args.lamda, lamda_std=args.lamda_std, temperature=args.temperature, train_set_size=effective_trainsize, lr=args.lr, betas=args.momentum, num_samples=args.train_samples, reweight=args.kl_reweight) # Defining the criterion if args.criterion == 'square-hinge': criterion = SquaredHingeLoss( ) # use the squared hinge loss for MNIST dataset elif args.criterion == 'cross-entropy': criterion = nn.CrossEntropyLoss( ) # this loss depends on the model output, remember to change the model output else: raise ValueError( 'Please select loss criterion in {square-hinge, cross-entropy}') # Training the model start = time.time() results = train_model(args, model, [train_loader, val_loader, test_loader], criterion, optimizer) model, train_loss, train_acc, val_loss, val_acc, test_loss, test_acc = results save_train_history(args, train_loss, train_acc, val_loss, val_acc, test_loss, test_acc) # plot_result(args, train_loss, train_acc, test_loss, test_acc) time_total = timeSince(start) print('Task completed in {:.0f}m {:.0f}s'.format(time_total // 60, time_total % 60))
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch Cifar10 Example') # Model parameters parser.add_argument( '--model', type=str, default='VGGBinaryConnect', help='Model name: VGGBinaryConnect, VGGBinaryConnect_M1') parser.add_argument('--bnmomentum', type=float, default=0.2, help='BN layer momentum value') # Optimization parameters parser.add_argument('--optim', type=str, default='STE', help='Optimizer: STE, or Adam(default : STE)') parser.add_argument('--val-split', type=float, default=0.1, help='Random validation set ratio(default : 0.1)') parser.add_argument( '--criterion', type=str, default='cross-entropy', help= 'loss funcion: square-hinge or cross-entropy(default : cross-entropy)') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=1e-4, metavar='LR', help='learning rate (default: 1e-4)') parser.add_argument('--lr-end', type=float, default=1e-16, metavar='LR-end', help='learning rate (default: 1e-16)') parser.add_argument( '--lr-decay', type=float, default=0.9, metavar='LR-decay', help='learning rated decay factor for each epoch (default: 0.9)') parser.add_argument('--decay-steps', type=int, default=1, metavar='N', help='LR rate decay steps (default: 1)') parser.add_argument('--momentum', type=float, default=0.0, metavar='M', help='SGD momentum (default: 0.0)') parser.add_argument('--data-augmentation', action='store_true', default=False, help='Enable data augmentation') # Logging parameters parser.add_argument( '--log-interval', type=int, default=500, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument('--experiment-id', type=int, default=0, help='Experiment ID for log files (int)') # Computation parameters parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--lrschedular', type=str, default='Cosine', help='Mstep,Expo,Cosine') parser.add_argument('--drop-prob', type=float, default=0.2, help='dropout rate') parser.add_argument('--trainset_scale', type=int, default=1, help='scale of training set') parser.add_argument( '--lamda', type=float, default=10, metavar='lamda-init', help='initial mean value of the natural parameter lamda(default: 10)') parser.add_argument( '--lamda-std', type=float, default=0, metavar='lamda-init', help='linitial std value of the natural parameter lamda(default: 0)') parser.add_argument( '--bn-affine', type=float, default=0, metavar='bn-affine', help= 'whether there is bn learnable parameters, 1: learnable, 0: no (default: 0)' ) args = parser.parse_args() if args.lr_decay > 1: raise ValueError( 'The end learning rate should be smaller than starting rate!!') args.use_cuda = not args.no_cuda and torch.cuda.is_available() ngpus_per_node = torch.cuda.device_count() gpu_num = [] for i in range(ngpus_per_node): gpu_num.append(i) print("Number of GPUs:%d", ngpus_per_node) gpu_devices = ','.join([str(id) for id in gpu_num]) os.environ["CUDA_VISIBLE_DEVICES"] = gpu_devices if ngpus_per_node > 0: print("Use GPU: {} for training".format(gpu_devices)) torch.manual_seed(args.seed + args.experiment_id) np.random.seed(args.seed + args.experiment_id) now = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime(time.time())) # to avoid overwrite args.out_dir = os.path.join( './outputs', 'mnist_{}_{}_lr{}_{}_id{}'.format(args.model, args.optim, args.lr, now, args.experiment_id)) os.makedirs(args.out_dir, exist_ok=True) config_save_path = os.path.join( args.out_dir, 'configs', 'config_{}.json'.format(args.experiment_id)) os.makedirs(os.path.dirname(config_save_path), exist_ok=True) with open(config_save_path, 'w') as f: json.dump(args.__dict__, f, indent=2) args.device = torch.device("cuda" if args.use_cuda else "cpu") print('Running on', args.device) print('===========================') for key, val in vars(args).items(): print('{}: {}'.format(key, val)) print('===========================\n') # Data augmentation for MNIST if args.data_augmentation: transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) # Defining the datasets kwargs = {'num_workers': 1, 'pin_memory': True} if args.use_cuda else {} train_dataset = datasets.CIFAR10('./data', train=True, download=True, transform=transform_train) if args.val_split > 0 and args.val_split < 1: val_dataset = datasets.CIFAR10('./data', train=True, download=True, transform=transform_test) num_train = len(train_dataset) indices = list(range(num_train)) split = int(np.floor(args.val_split * num_train)) np.random.shuffle(indices) train_idx, val_idx = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_idx) val_sampler = SubsetRandomSampler(val_idx) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, sampler=train_sampler, **kwargs) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, sampler=val_sampler, **kwargs) print('{} train and {} validation datapoints.'.format( len(train_loader.sampler), len(val_loader.sampler))) else: train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = None print('{} train and {} validation datapoints.'.format( len(train_loader.sampler), 0)) test_dataset = datasets.CIFAR10('./data', train=False, transform=transform_test) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=True, **kwargs) print('{} test datapoints.\n'.format(len(test_loader.sampler))) # Defining the model in_channels, out_features = 3, 10 if args.model == 'VGGBinaryConnect': # model = VGGBinaryConnect(in_channels, out_features, eps=1e-5, momentum=args.bnmomentum, batch_affine=(args.bn_affine == 1)) elif args.model == 'VGGBinaryConnect_M1': model = VGGBinaryConnect_M1(in_channels, out_features, eps=1e-5, momentum=args.bnmomentum, batch_affine=(args.bn_affine == 1)) else: raise ValueError('Undefined Network') print(model) num_parameters = sum([l.nelement() for l in model.parameters()]) print("Number of Network parameters: {}".format(num_parameters)) model = torch.nn.DataParallel(model, device_ids=gpu_num) model = model.to(args.device) cudnn.benchmark = True # Defining the optimizer if args.optim == 'Adam' or args.optim == 'STE': optimizer = optim.Adam(model.parameters(), lr=args.lr) # Defining the criterion if args.criterion == 'square-hinge': criterion = SquaredHingeLoss( ) # use the squared hinge loss for MNIST dataset elif args.criterion == 'cross-entropy': criterion = nn.CrossEntropyLoss( ) # this loss depends on the model output, remember to change the model output else: raise ValueError( 'Please select loss criterion in {square-hinge, cross-entropy}') # Training the model start = time.time() results = train_model(args, model, [train_loader, val_loader, test_loader], criterion, optimizer) model, train_loss, train_acc, val_loss, val_acc, test_loss, test_acc = results save_train_history(args, train_loss, train_acc, val_loss, val_acc, test_loss, test_acc) # plot_result(args, train_loss, train_acc, test_loss, test_acc) time_total = timeSince(start) print('Task completed in {:.0f}m {:.0f}s'.format(time_total // 60, time_total % 60))
# Pass validation chunks' lengths, for this fold custom_metrics = CustomMetrics(window_size=WINDOW_SIZE, val_lens=data['val_len']) hist = model.fit( X_train, Y_train, validation_data=(X_val, Y_val), batch_size=batch_size, epochs=n_epochs, verbose=1, callbacks=[custom_metrics, model_checkpoint, early_stop], shuffle=True) # Plot and save training history # plot_loss_history(hist) save_train_history(hist, model_type) ####################################################################################################### # Save results from this fold curr_fold_hist = dict() curr_fold_hist.update(hist.history) curr_fold_hist.update(custom_metrics.metrics) folds_hist.append(curr_fold_hist) # break # Save training history from all folds save_train_history( folds_hist, f'ALL_{n_folds}fold_{arch_to_str(GRU_ARCH)}u_{dataset_type}')
print( f'INFO:\n\t{model_type}\n\t batch_size={batch_size}\n\t n_epochs={n_epochs}\n\t metric_to_monitor={metric_to_monitor}\n\t train_class_weights={train_class_weights}\n\t val_class_weights={val_class_weights}\n\t #params={model.count_params()}' ) hist = model.fit( X_train, Y_train, validation_data=(X_val, Y_val, val_sample_weights), batch_size=batch_size, epochs=n_epochs, verbose=1, sample_weight=train_sample_weights, callbacks=[ custom_metrics, model_checkpoint, # early_stop ], shuffle=True) ####################################################################################################### # Save training history # print(hist.history) hist.history['model_type'] = model_type hist.history['batch_size'] = batch_size hist.history['n_epochs'] = n_epochs hist.history['metric_to_monitor'] = metric_to_monitor hist.history['train_class_weights'] = train_class_weights hist.history['val_class_weights'] = val_class_weights hist.history['n_model_params'] = model.count_params() save_train_history(hist, model_checkpoint_path_prefix, f'{model_type}')