def train(self, dataset_map_fn, batch_size, epochs, model, device, optimizer): for epoch in range(1, epochs + 1): if self.verbose: print(f"Epoch {epoch}\n-------------------------------") train_dataloader, test_dataloader = get_dataloaders( dataset_map_fn=dataset_map_fn, batch_size=batch_size) model.train() self.train_epoch(model, train_dataloader, optimizer, device) model.eval() test_eval_dict = self.evaluate_epoch(model, test_dataloader, device) test_accuracy = test_eval_dict['accuracy'] * 100 test_loss = test_eval_dict['loss'] if self.verbose: print( f"Test Error: \n Accuracy: {test_accuracy:>0.1f}%, Avg loss: {test_loss:>8f}\n" ) if epoch % 10 == 0: tc.save(model.state_dict(), "model.pth") tc.save(optimizer.state_dict(), "optimizer.pth")
def start_evaluating(writer, seed, dataset_name, test_experiments_and_kwargs, model_logdir, checkpoint, normalize_inputs, batch_size, device_id): torch.manual_seed(seed) np.random.seed(seed) train_loader, _, test_loader, _, _ = get_dataloaders( dataset_name=dataset_name, batch_size=batch_size, train_size='max', val_size=0, device_id=device_id, normalize_inputs=normalize_inputs, num_workers=0) model = load_model_from_checkpoint(writer, model_logdir, checkpoint) device = torch.device(device_id if torch.cuda.is_available() else "cpu") model.to(device) test_experiments = [] for te, kwargs in test_experiments_and_kwargs: test_experiments.append(experiments.__dict__[te]( writer=writer, model=model, train_loader=train_loader, val_loader=test_loader, **kwargs)) results = {} for ex in test_experiments: results.update(ex.run(0, 0)) pprint(results) return results
def main(): parser = argparse.ArgumentParser() parser.add_argument('data_dir', help='Path to image files.', type=str) parser.add_argument('--save_dir', dest="save_dir", type=str, action="store", default="./", help="Directory to save checkpoints") parser.add_argument('--arch', dest="arch", type=str, action="store", default="densenet121", help="Architecture type default is densenet121") parser.add_argument('--learning_rate', dest="learning_rate", type=float, action="store", default=0.003) parser.add_argument('--epochs', dest="epochs", type=int, action="store", default=5) parser.add_argument('--hidden_units', dest="hidden_units", type=int, nargs='+', action="store", default=[512]) parser.add_argument('--gpu', action='store_true') num_outputs = 102 args = parser.parse_args() device = utils.get_device(args.gpu) dataloaders, class_to_idx = utils.get_dataloaders(args.data_dir) model, optimizer, hidden_layers = utils.get_model_and_optimizer( args.arch, args.learning_rate, num_outputs, device, args.hidden_units ) if not model: return model.class_to_idx = class_to_idx with active_session(): utils.train_model( model, optimizer, dataloaders, device, epochs=args.epochs, print_every=20 ) utils.save_model(model, args.learning_rate, args.epochs, optimizer, num_outputs, args.hidden_units, args.save_dir)
def main(dataset_name): print('Training resnet model for', dataset_name) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # load the dataset print('loading', dataset_name, 'dataset') dataloaders_dict = get_dataloaders(DATASET_DIR[dataset_name], TRAIN_CLASSES[dataset_name]) # load the resnet18 model print('loading the resnet model') model = models.resnet18() num_feat = model.fc.in_features print(len(TRAIN_CLASSES[dataset_name])) model.fc = nn.Linear(num_feat, len(TRAIN_CLASSES[dataset_name])) loss = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-04) print('training the model') model, val_loss = train(model, dataloaders_dict, loss, optimizer, device, no_of_epochs=20) model_path = 'save/dogs.pt' create_dirs(model_path) print('saving the model') save_model(model, model_path) print('done!')
def __init__(self, **kwargs): super(OODDetection, self).__init__(kwargs['writer']) self.model = kwargs['model'] self.train_loader = kwargs['train_loader'] self.val_loader = kwargs['val_loader'] self.run_interval = kwargs.get('run_interval', None) OOD_loader, _, _, _, _ = get_dataloaders('SVHN', self.val_loader.batch_size, len(self.val_loader.dataset), 0, str(self.model.device), normalize_inputs=True) self.OOD_loader = OOD_loader
def main(model_name): print(f'Fine-tuning {model_name} model for cats') device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # load the resnet50 model print('loading the pretrained model') model = models.resnet18(pretrained=True) num_feat = model.fc.in_features if model_name == 'dogs': model.fc = nn.Linear(num_feat, len(TRAIN_CLASSES["dogs"])) model.load_state_dict(torch.load('save/dogs.pt')) for param in model.parameters(): param.requires_grad = False model.fc = nn.Linear(num_feat, len(TRAIN_CLASSES['cats'])) # load the dataset print('loading cats dataset') dataloaders_dict = get_dataloaders(DATASET_DIR, TRAIN_CLASSES['cats']) loss = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-04) print('fine-tuning the model') model, val_loss = train(model, dataloaders_dict, loss, optimizer, device, no_of_epochs=20) if model_name == 'dogs': model_path = 'save/dogs_to_cats.pt' else: model_path = 'save/imagenet_to_cats.pt' save_model(model, model_path) print('done!')
def main(): args = parser.parse_args() assert args.n_views == 2, "Only two view training is supported. Please use --n-views 2." # check if gpu training is available if not args.disable_cuda and torch.cuda.is_available(): args.device = torch.device('cuda') cudnn.deterministic = True cudnn.benchmark = True else: args.device = torch.device('cpu') args.gpu_index = -1 set_random_seed(args.seed) train_loader, valid_loader = get_dataloaders(args) if args.mode == 'simclr': model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim) trainer_class = SimCLRTrainer elif args.mode == 'supervised': model = ResNetSimCLR(base_model=args.arch, out_dim=len(train_loader.dataset.classes)) trainer_class = SupervisedTrainer else: raise InvalidTrainingMode() if args.optimizer_mode == 'simclr': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader), eta_min=0, last_epoch=-1) else: optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200) # It’s a no-op if the 'gpu_index' argument is a negative integer or None. with torch.cuda.device(args.gpu_index): trainer = trainer_class(model=model, optimizer=optimizer, scheduler=scheduler, args=args) trainer.train(train_loader, valid_loader)
def __init__(self, config: dict): """ Initialize the trainer Parameters --- config: dict configuration dictionary with the following keys: { "exp_name" "debug" "seed" "batch_size" "epochs" } """ self.config = config self.exp_dir = RESULTS / config['exp_name'] self.exp_dir.mkdir(parents=True, exist_ok=True) self.checkpoint_dir = CHECKPOINTS / config['exp_name'] self.checkpoint_dir.mkdir(parents=True, exist_ok=True) self.log_dir = self.exp_dir / LOG_DIR self.log_dir.mkdir(parents=True, exist_ok=True) self.writer = SummaryWriter(log_dir=self.log_dir) log_name = config["exp_name"]+".log" self.logger = logging.getLogger(__name__) logfile_handler = logging.FileHandler(filename=self.exp_dir / log_name) logfile_handler.setLevel(level = (logging.DEBUG if config["debug"] else logging.INFO)) logfile_format = logging.Formatter('%(asctime)s - %(levelname)10s - %(funcName)15s : %(message)s') logfile_handler.setFormatter(logfile_format) self.logger.addHandler(logfile_handler) self.logger.setLevel(level = (logging.DEBUG if config["debug"] else logging.INFO)) self.logger.info("-"*50) self.logger.info(f"EXPERIMENT: {config['exp_name']}") self.logger.info("-"*50) self.logger.info(f"Setting seed: {config['seed']}") np.random.seed(config['seed']) torch.manual_seed(config['seed']) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False self.logger.info(f"Loading data ...") self.train_dl, self.valid_dl, self.test_dl, self.vocab = get_dataloaders(config['batch_size'], DATA_DIR) vocab_emb = self.vocab.vectors # Init trackers self.current_iter = 0 self.current_epoch = 0 self.best_accuracy = 0. if config['encoder'] == 'EmbeddingEncoder': encoded_dim = vocab_emb.shape[-1] encoder = EmbeddingEncoder(embeddings=vocab_emb) elif config['encoder'] == 'UniLSTM': encoded_dim = config['hidden_dim'] encoder = UniLSTM(embeddings=vocab_emb, batch_size=config['batch_size'], hidden_size=config['hidden_dim'], device=config['device'], num_layers=config['num_layers']) elif config['encoder'] == 'BiLSTM': encoded_dim = 2*config['hidden_dim'] encoder = BiLSTM(embeddings=vocab_emb, batch_size=config['batch_size'], hidden_size=config['hidden_dim'], device=config['device'], num_layers=config['num_layers']) elif config['encoder'] == 'BiLSTMPool': encoded_dim = 2*config['hidden_dim'] encoder = BiLSTMPool(embeddings=vocab_emb, batch_size=config['batch_size'], hidden_size=config['hidden_dim'], device=config['device'], num_layers=config['num_layers']) else: self.logger.error("Encoder not available") sys.exit(1) self.model = Classifier(encoder, encoded_dim) self.logger.info(f"Using device: {config['device']}") self.model.to(config['device']) self.opt = optim.Adam(self.model.parameters(), lr=config['learning_rate']) self.criterion = nn.CrossEntropyLoss() if 'load_checkpoint' in config: self.load_checkpoint(config['load_checkpoint'])
def train_model( writer, seed, dataset_name, model_class_name, model_kwargs, normalize_inputs, batch_size, train_size, val_size, epochs, total_batches, optimizer_class_name, optimizer_kwargs, lr_scheduler_class_name, lr_scheduler_kwargs, model_logdir=None, checkpoint=None, train_experiments_and_kwargs=[], device_id='cpu'): torch.manual_seed(seed) np.random.seed(seed) device = torch.device(device_id if torch.cuda.is_available() else "cpu") model_class = models.__dict__[model_class_name] train_loader, val_loader, _, in_shape, n_classes = get_dataloaders(dataset_name=dataset_name, batch_size=batch_size, train_size=train_size, val_size=val_size, device_id=device_id, normalize_inputs=normalize_inputs) if model_logdir or checkpoint: model = load_model_from_checkpoint(writer, model_logdir, checkpoint) else: model_kwargs['n_classes'] = n_classes model_kwargs['net_kwargs']['in_shape'] = in_shape model = model_class(writer, **model_kwargs) save_model_kwargs(writer, model_class_name, model_kwargs) optimizer = model.get_optimizer(optimizer_class_name, optimizer_kwargs) scheduler = torch.optim.lr_scheduler.__dict__[lr_scheduler_class_name](optimizer, **lr_scheduler_kwargs) train_experiments = [] for ex in train_experiments_and_kwargs: train_experiments.append(experiments.__dict__[ex[0]](writer=writer, model=model, train_loader=train_loader, val_loader=val_loader, **ex[1])) model.initialize(train_loader) model.to(device) if epochs is None: epochs = ceil(total_batches / len(train_loader)) for epoch in range(1, epochs + 1): train_epoch(writer, model, train_loader, optimizer, scheduler, epoch, total_batches, train_experiments)
args = parse_args() SEED = args.seed random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.backends.cudnn.deterministic = True device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Preprocessing pp_start_time = time.time() trainloader, testloader = get_dataloaders(args) pp_end_time = time.time() pp_mins, pp_secs = epoch_time(pp_end_time - pp_start_time) print(f'Preprocessing time: {pp_mins}m {pp_secs}s') with wandb.init(project='RegulQuant', entity='womeiyouleezi', config=args): if args.run_name: wandb.run.name = args.run_name if (not args.save_file): file_name = wandb.run.name else: file_name = args.save_file # make model net = get_model(args).to(device)
parser.add_argument('--verbose', default=1, type=int) args = parser.parse_args() args.cuda = torch.cuda.device_count() != 0 torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) enc = Encoder() dec = Decoder() generator = AEGenerator(enc, dec) discriminator = Discriminator() dagan = DAGAN(generator, discriminator) d_learning_rate = 2e-4 g_learning_rate = 2e-4 optim_betas = (0.9, 0.999) opt_g = torch.optim.Adam(generator.parameters(), lr=g_learning_rate, betas=optim_betas) opt_d = torch.optim.Adam(discriminator.parameters(), lr=d_learning_rate, betas=optim_betas) trainloader, testloader = utils.get_dataloaders(data=args.data, train_bs=args.batch_size, ohe_labels=True) train(trainloader, testloader, dagan, opt_g, opt_d, args)
def main(): avaliable_modelnames = [ m for m in dir(models) if m[0] != '_' and type(getattr(models, m)).__name__ != 'module' ] parser = argparse.ArgumentParser(description='PyTorch RICAP Training') # hardware parser.add_argument('--num_workers', type=int, default=4, help='number of workers loading data') # dataset parser.add_argument('--dataset', type=str, default='cifar10', choices=['cifar10', 'cifar100', 'ImageNet'], help='dataset for training') parser.add_argument('--dataroot', type=str, default='data/', help='path to dataset') # model parser.add_argument('--model', '-m', type=str, required=True, choices=avaliable_modelnames, help='model name') parser.add_argument('--depth', '-d', type=int, required=True, help='number of layers') parser.add_argument( '--params', '-p', type=str, default=None, help='model parameters such as widen factor for Wide ResNet') parser.add_argument('--postfix', type=str, default='', help='postfix for saved model name') # hyperparameters parser.add_argument( '--epoch', '-e', type=int, default=200, help='number of epochs: (default: 200 for Wide ResNet)') parser.add_argument('--batch', type=int, default=128, help='batchsize') parser.add_argument('--lr', type=float, default=0.1, help='default learning rate') parser.add_argument( '--droplr', type=float, default=0.2, help='adaptive learning rate ratio: (default: 0.2 for Wide ResNet)') parser.add_argument( '--adlr', type=str, default=None, help= 'epochs at which learning rate is adapted (x droplr); e.g., \'60,120,160\' for Wide ResNet' ) parser.add_argument('--momentum', type=float, default=0.9, help='momentum') parser.add_argument('--wd', type=float, default=0.0005, help='weight decay: (default: 0.0005 for Wide ResNet)') # data augmentation parser.add_argument('--crop', type=int, default=None, help='crop size') parser.add_argument('--beta_of_ricap', type=float, default=0.0, help='beta of ricap augmentation') # save and resume parser.add_argument( '--resume', '-r', type=int, default=0, help='epoch at which resume from checkpoint. -1 for latest') parser.add_argument( '--savefreq', type=int, default=5, help='frequency to save model and to mark it the latest') parser.add_argument('--nocuda', action='store_true', default=False, help='disable cuda devices.') args = parser.parse_args() print('==> Preparing dataset loaders..') dataloaders = utils.get_dataloaders(datasetname=args.dataset, dataroot=args.dataroot, batchsize=args.batch, num_workers=args.num_workers, cropsize=args.crop) # prepare log saving file name # save target : model information (.dat), result (.log), model parameters (.pth), optimizer parameters (.opt) savefilename_prefix = 'checkpoint/{model}-{depth}{params}_{dataset}{postfix}'.format( model=args.model, depth=args.depth, params='-{}'.format(args.params) if args.params is not None else '', dataset=args.dataset, postfix='_{}'.format(args.postfix) if args.postfix != '' else '', ) # define learning rate strategy if args.adlr is None: args.adlr = np.array([60, 120, 160]) else: assert re.match( '[0-9 ,]+', args.adlr), 'Error: invalid adaptive learning rate: {}'.format( args.adlr) args.adlr = np.array(sorted(eval('[{}]'.format(args.adlr)))) lr_current = args.lr # prepare cnn model and optimizer print('==> Building model..') if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') network = getattr(models, args.model)(args.dataset, args.depth, args.params) optimizer = optim.SGD(network.parameters(), lr=lr_current, momentum=args.momentum, weight_decay=args.wd, nesterov=True) # write model information to save fine (.dat) with open('{}.dat'.format(savefilename_prefix), 'w') as of: print('==> Command', file=of) import sys print(' '.join(sys.argv), file=of) print('\n', file=of) print('==> Parameters', file=of) arg_str = '\n'.join([ '--{} {}'.format(k, str(getattr(args, k))) for k in dir(args) if '_' not in k ]) print(arg_str, file=of) print('\n', file=of) print('==> Network', file=of) num_params = 0 for param in network.parameters(): num_params += param.numel() print('Number of parameters: %d' % num_params, file=of) print(network, file=of) # prepare trainer datasetname = args.dataset if datasetname == "cifar10": num_class = 10 elif datasetname == "cifar100": num_class = 100 elif datasetname == "ImageNet": num_class = 1000 use_cuda = torch.cuda.is_available() and not args.nocuda trainer = trainers.make_trainer(network, dataloaders, optimizer, use_cuda=use_cuda, beta_of_ricap=args.beta_of_ricap) # initialize logs and epoch num if args.resume == 0: logs = [] epoch_start = 0 else: # if resuming # load model and optimizer parameter, start from pre-saved checkpoint print('==> Resuming from checkpoint..') if args.resume < 0: args.resume = 'latest' checkpoint = '{}_{}'.format(savefilename_prefix, args.resume) map_location = lambda storage, location: storage.cuda( ) if use_cuda else storage network.load_state_dict( torch.load(checkpoint + '.pth', map_location=map_location)) optimizer.load_state_dict( torch.load(checkpoint + '.opt', map_location=map_location)) logs = list(np.loadtxt(checkpoint + '.log', ndmin=2)) epoch_start = len(logs) # update learning rate based on define learning rate strategy def update_learning_rate(epoch, ite): lr_adapted = args.lr * args.droplr**np.sum(args.adlr < epoch) if not lr_current == lr_adapted: print('Learning rate is adapted: {} -> {}'.format( lr_current, lr_adapted)) utils.adjust_learning_rate(optimizer, lr_adapted) return lr_adapted # save network and optimizer parameter to save files (.pth, .opt) def savemodel(savefilename): torch.save(network.state_dict(), savefilename + '.pth') torch.save(optimizer.state_dict(), savefilename + '.opt') np.savetxt(savefilename + '.log', logs) # train and test loop epoch_end = args.epoch for epoch in range(epoch_start + 1, epoch_end + 1): lr_current = update_learning_rate(epoch, len(dataloaders[0]) * (epoch - 1)) print('Epoch: {} / Iterations: {}'.format( epoch, len(dataloaders[0]) * (epoch - 1))) ret_train = trainer.epoch(train=True, lr=lr_current) ret_test = trainer.epoch(train=False, lr=lr_current) logs.append([ epoch, ] + ret_train + ret_test + [lr_current, len(dataloaders[0]) * epoch]) # save model and optimizer parameters if epoch % args.savefreq == 0 or epoch == epoch_end: print('Saving model as the latest..') savefilename = '{}_{}'.format(savefilename_prefix, 'latest') savemodel(savefilename)
def train_model(args): dataloaders = get_dataloaders(args) dataset_sizes = { 'train': len(dataloaders['train'].dataset), 'val': len(dataloaders['val'].dataset), 'test': len(dataloaders['test'].dataset) } device = 'cuda' if torch.cuda.is_available() else 'cpu' # set up model = load_model(args, device) loss_fn = get_loss_fn(args.dataset, args.loss_type) if args.train_decoders: parameters = list(model.photo_decoder.parameters()) + list( model.sketch_decoder.parameters()) elif args.model in ['EmbedGAN']: parameters = list(model.G.parameters()) + list(model.D.parameters()) else: parameters = model.parameters() if args.optim == 'sgd': optimizer = optim.SGD(parameters, lr=args.lr, weight_decay=args.wd, momentum=.9, nesterov=True) elif args.optim == 'adam': optimizer = optim.Adam(parameters, lr=args.lr, weight_decay=args.wd) scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=len(dataloaders['train']) // 10, gamma=.9) writer = SummaryWriter(args.log_dir + "/{}".format(args.name)) save_dir = Path(args.save_dir) / ('{}'.format(args.name)) if not save_dir.exists(): os.mkdir(save_dir) best_model = None best_loss = float('inf') batch_num = 0 for epoch in range(args.num_epochs): print('Epoch {}/{}'.format(epoch, args.num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': scheduler.step() model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode epoch_metrics = defaultdict(float) # Iterate over data. for inputs, labels in dataloaders[phase]: # zero the parameter gradients optimizer.zero_grad() N = len(inputs) # converts list of tuples of images paths of length N into flattened # tensor of size N * args.loss_type inputs = load_sketchy_images(inputs, args.loss_type, device, args.img_size) labels = labels.to(device) with torch.set_grad_enabled(phase == 'train'): if args.loss_type in [ "vae", "vae+embed", "vae+embed+classify" ]: batch_metrics = vae_forward( inputs, labels, model, loss_fn, writer, device, batch_num, args.alpha, N, args.name, modality=args.modality, compare_embed=args.loss_type in ["vae+embed", "vae+embed+classify"], classify=args.loss_type in ['vae+embed+classify', 'single_vae']) elif args.loss_type in [ "ae", "ae+embed", "ae+embed+classify" ]: batch_metrics = ae_forward( inputs, labels, model, loss_fn, writer, device, batch_num, args.alpha, N, args.name, modality=args.modality, compare_embed=args.loss_type in ["ae+embed", "ae+embed+classify"], classify=args.loss_type in ['ae+embed+classify', 'single_ae']) elif args.loss_type in ['gan']: batch_metrics = gan_forward(inputs, labels, model, loss_fn, writer, device, batch_num, N) else: batch_metrics = classify_contrast_forward( inputs, labels, model, loss_fn, writer, device, batch_num, args.alpha, args.loss_type, N) for criteria_name in batch_metrics: epoch_metrics[criteria_name] += batch_metrics[ criteria_name] / dataset_sizes[phase] loss = batch_metrics['loss'] del batch_metrics if phase == "train": batch_num += 1 loss.backward() optimizer.step() epoch_loss = epoch_metrics['loss'].item() log_metrics(epoch_metrics, writer, phase, epoch) # deep copy the model if phase == 'val' and epoch_loss < best_loss: best_loss = epoch_loss now = datetime.datetime.now() torch.save( model.state_dict(), save_dir / f"{now.month}{now.day}{now.hour}{now.minute}_{best_loss}") best_model = copy.deepcopy(model.state_dict()) writer.close() now = datetime.datetime.now() torch.save( model.state_dict(), save_dir / f"end_{now.month}{now.day}{now.hour}{now.minute}_{best_loss}") # load best model weights model.load_state_dict(best_model) now = datetime.datetime.now() torch.save(model.state_dict(), save_dir / "best")
compression_factors = [1, 0.5, 0.25, 0.1, 0.05, 0.01] sensing_schemes = [RandomProjection, RSTD] scheme_names = ["Gaussian Sensing", "Random Subsampling"] S = 200 # 200 achieves ~91.8% accuracy at 100% MR test_accuracy = np.zeros((len(sensing_schemes), len(compression_factors))) # Loop over sensing schemes and compression factors for i, ss in enumerate(sensing_schemes): for j, cf in enumerate(compression_factors): # Define the data transformation for this network sensing_transform = ss(cf, IM_DIM) trans = transforms.Compose([transforms.ToTensor(), sensing_transform]) # Build the dataloaders trainloader, valloader, testloader = get_dataloaders( batch_size, val_split, trans, n_workers) # regular / proxy images # (uncomment the line below if you want results for sparse recovered images) # trainloader, valloader, testloader = get_sparse_recovered_dataloaders(sensing_transform, S, batch_size, val_split, n_workers) # Construct the model net = MNISTClassifier(resnet20(), lr, lr_milestones) if torch.cuda.is_available(): trainer = pl.Trainer(gpus=2, accelerator='ddp', max_epochs=num_epochs, progress_bar_refresh_rate=bar_refresh_rate) else: trainer = pl.Trainer(gpus=0, max_epochs=num_epochs, progress_bar_refresh_rate=bar_refresh_rate)
import torch from globals import * from init import init from net import SentimentAnalyzer from train_eval import train_loop, evaluate from utils import get_dataloaders if __name__ == "__main__": # Load the datasets and used vocabulary init(config) # Get dataloaders for train/validation/test sets train_loader, valid_loader, test_loader = get_dataloaders( config['train'], config['val'], config['test']) if net_config.mode == "train": # Create fresh new instance of the RNN net = SentimentAnalyzer(config['vocab'], net_config.hidden_dim, net_config.layers, net_config.dropout, net_config.bidirectional).to(device) # Train the network train_loop(net, train_loader, valid_loader, test_loader) else: # Create fresh new instance of the RNN which # holds loaded pretrained weights net = SentimentAnalyzer(config['vocab'], net_config.hidden_dim, net_config.layers, net_config.dropout, net_config.bidirectional).to(device) # Load pretrained model parameters net.load_state_dict(torch.load(net_config.pretrained_loc))
def create_cams(args): device = 'cuda' if torch.cuda.is_available() else 'cpu' model = load_model(args, device) features, weight_softmax = set_up_model(model) with open("/home/robincheong/data/sketchy/idx_to_class_dict.pkl", "rb") as f: classes = pickle.load(f) loader = get_dataloaders(args)[args.phase] num_cams = 0 for inputs, labels in loader: print("Getting logits") labels = labels.numpy() file_paths = [example.split('++') for example in inputs] N = len(inputs) inputs = load_sketchy_images(inputs, args.loss_type, device, args.img_size) sketches, photos = torch.split(inputs, N) sketch_probs, sketch_idx = get_probs_and_idx(sketches, model, device, is_sketch=True) photo_probs, photo_idx = get_probs_and_idx(photos, model, device, is_sketch=False) print(sketch_probs.shape) print("Generating CAMs") for i in range(N): if num_cams > args.num_cams: break num_cams += 1 print_top_5(sketch_probs[i], sketch_idx[i], classes, labels[i], "sketch") print_top_5(photo_probs[i], photo_idx[i], classes, labels[i], "photo") CAMs = { "sketch": return_CAM( features[0][i], weight_softmax, [sketch_idx[i][np.where(sketch_idx[i] == labels[i])]]), "photo": return_CAM(features[1][i], weight_softmax, [photo_idx[i][np.where(photo_idx[i] == labels[i])]]) } # render the CAM and output for modality, path in [("sketch", file_paths[i][0]), ("photo", file_paths[i][1])]: print('Rendering {} CAMs for the correct class: {}'.format( modality, classes[labels[i]])) img = cv2.imread(str(path)) height, width, _ = img.shape heatmap = cv2.applyColorMap( cv2.resize(CAMs[modality][0], (width, height)), cv2.COLORMAP_JET) result = heatmap * 0.3 + img * 0.5 cam_fname = 'cams/{}_{}{}.jpg'.format(modality, classes[labels[i]], args.suffix) cv2.imwrite(cam_fname, result) break
help='dropout ratio for AlexNet.') args = parser.parse_args() print("Script arguments:\n", args) device = 'cuda' if torch.cuda.is_available() else 'cpu' best_acc = 0 start_epoch = 0 working_dir = os.path.join(args.output_dir, args.train_id) os.makedirs(working_dir, exist_ok=True) writer = SummaryWriter(working_dir) # Setup data. print('==> Preparing data..') trainloader, testloader = utils.get_dataloaders(dataset=args.dataset, batch_size=args.batch_size, data_root=args.data_root) # Setup model # ---------------------------------------- print('==> Building model..') if args.dataset == "imagenet": models = { 'presnet18': PreActResNet18, 'glouncv-alexnet': alexnet, 'glouncv-presnet34': preresnet34, 'glouncv-mobilenetv2_w1': mobilenetv2_w1 } net = models.get(args.arch, None)() elif args.dataset == "cifar100":
def main(cfg: DictConfig) -> None: print("Params: \n") print(OmegaConf.to_yaml(cfg)) time.sleep(10) best_acc = 0 start_epoch = 0 working_dir = os.path.join(get_original_cwd(), cfg.output_dir, cfg.train_id) os.makedirs(working_dir, exist_ok=True) writer = SummaryWriter(working_dir) # Setup data. # -------------------- print('=> Preparing data..') trainloader, testloader = utils.get_dataloaders( dataset=cfg.dataset.name, batch_size=cfg.dataset.batch_size, data_root=cfg.dataset.data_root) net = setup_network(cfg.dataset.name, cfg.dataset.arch) net = tweak_network(net, bit=cfg.quantizer.bit, train_conf=cfg.train_conf, quant_mode=cfg.quant_mode, arch=cfg.dataset.arch, cfg=cfg) net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True print(net) print("Number of learnable parameters: ", sum(p.numel() for p in net.parameters() if p.requires_grad) / 1e6, "M") time.sleep(5) load_checkpoint(net, init_from=cfg.dataset.init_from) params = create_train_params(model=net, main_wd=cfg.quantizer.wd, delta_wd=0, skip_keys=['.delta', '.alpha'], verbose=cfg.verbose) criterion = nn.CrossEntropyLoss() # Setup optimizer # ---------------------------- if cfg.quantizer.optimizer == 'sgd': print("=> Use SGD optimizer") optimizer = optim.SGD(params, lr=cfg.quantizer.lr, momentum=0.9, weight_decay=cfg.quantizer.wd) elif cfg.quantizer.optimizer == 'adam': print("=> Use Adam optimizer") optimizer = optim.Adam(params, lr=cfg.quantizer.lr, weight_decay=cfg.quantizer.wd) lr_scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=cfg.dataset.epochs) if cfg.evaluate: print("==> Start evaluating ...") test(net, testloader, criterion, -1) exit() # ----------------------------------------------- # Reset to 'warmup_lr' if we are using warmup strategy. if cfg.quantizer.enable_warmup: assert cfg.quantizer.bit == 1 for param_group in optimizer.param_groups: param_group['lr'] = cfg.quantizer.warmup_lr # Initialization # ------------------------------------------------ if cfg.quantizer.bit != 32 and "quan" in cfg.train_conf: simple_initialization(net, trainloader, num_batches=cfg.dataset.num_calibration_batches, train_conf=cfg.train_conf) # Training # ----------------------------------------------- save_checkpoint_epochs = list(range(10)) for epoch in range(start_epoch, cfg.dataset.epochs): train_loss, train_acc1 = train(net, optimizer, trainloader, criterion, epoch, cfg=cfg) test_loss, test_acc1, curr_acc = test(net, testloader, criterion, epoch) # Save checkpoint. if curr_acc > best_acc: best_acc = curr_acc utils.save_checkpoint(net, lr_scheduler, optimizer, curr_acc, epoch, filename=os.path.join( working_dir, 'ckpt_best.pth')) print('Saving..') print('Best accuracy: ', best_acc) if lr_scheduler is not None: lr_scheduler.step() write_metrics(writer, epoch, net, \ optimizer, train_loss, train_acc1, test_loss, test_acc1, prefix="Standard_Training") print('Best accuracy: ', best_acc)
def main(): print("\n_________________________________________________\n") print(now(), "train_model.py main() running.") parser = argparse.ArgumentParser(description="Deep Thinking") parser.add_argument("--checkpoint", default="check_default", type=str, help="where to save the network") parser.add_argument("--dataset", default="CIFAR10", type=str, help="dataset") parser.add_argument("--depth", default=1, type=int, help="depth of the network") parser.add_argument("--epochs", default=200, type=int, help="number of epochs for training") parser.add_argument("--lr", default=0.1, type=float, help="learning rate") parser.add_argument("--lr_factor", default=0.1, type=float, help="learning rate decay factor") parser.add_argument("--lr_schedule", nargs="+", default=[100, 150], type=int, help="how often to decrease lr") parser.add_argument("--mode", default="default", type=str, help="which testing mode?") parser.add_argument("--model", default="resnet18", type=str, help="model for training") parser.add_argument("--model_path", default=None, type=str, help="where is the model saved?") parser.add_argument("--no_save_log", action="store_true", help="do not save log file") parser.add_argument("--optimizer", default="SGD", type=str, help="optimizer") parser.add_argument("--output", default="output_default", type=str, help="output subdirectory") parser.add_argument("--problem", default="classification", type=str, help="problem type (classification or segmentation)") parser.add_argument("--save_json", action="store_true", help="save json") parser.add_argument("--save_period", default=None, type=int, help="how often to save") parser.add_argument("--test_batch_size", default=50, type=int, help="batch size for testing") parser.add_argument("--test_dataset", type=str, default=None, help="name of the testing dataset") parser.add_argument("--test_iterations", default=None, type=int, help="how many, if testing with a different " "number iterations than training") parser.add_argument("--train_batch_size", default=128, type=int, help="batch size for training") parser.add_argument("--train_log", default="train_log.txt", type=str, help="name of the log file") parser.add_argument("--val_period", default=20, type=int, help="how often to validate") parser.add_argument("--width", default=4, type=int, help="width of the network") args = parser.parse_args() if args.save_period is None: args.save_period = args.epochs print(args) # summary writer train_log = args.train_log try: array_task_id = train_log[:-4].split("_")[-1] except: array_task_id = 1 writer = SummaryWriter(log_dir=f"{args.output}/runs/{train_log[:-4]}") if not args.no_save_log: to_log_file(args, args.output, train_log) # set device device = "cuda" if torch.cuda.is_available() else "cpu" #################################################### # Dataset and Network and Optimizer trainloader, testloader = get_dataloaders( args.dataset, args.train_batch_size, test_batch_size=args.test_batch_size) # load model from path if a path is provided if args.model_path is not None: print(f"Loading model from checkpoint {args.model_path}...") net, start_epoch, optimizer_state_dict = load_model_from_checkpoint( args.model, args.model_path, args.dataset, args.width, args.depth) start_epoch += 1 else: net = get_model(args.model, args.dataset, args.width, args.depth) start_epoch = 0 optimizer_state_dict = None net = net.to(device) pytorch_total_params = sum(p.numel() for p in net.parameters()) optimizer = get_optimizer(args.optimizer, args.model, net, args.lr, args.dataset) print(net) print( f"This {args.model} has {pytorch_total_params/1e6:0.3f} million parameters." ) print(f"Training will start at epoch {start_epoch}.") if optimizer_state_dict is not None: print(f"Loading optimizer from checkpoint {args.model_path}...") optimizer.load_state_dict(optimizer_state_dict) warmup_scheduler = warmup.ExponentialWarmup(optimizer, warmup_period=0) else: warmup_scheduler = warmup.ExponentialWarmup(optimizer, warmup_period=5) lr_scheduler = MultiStepLR(optimizer, milestones=args.lr_schedule, gamma=args.lr_factor, last_epoch=-1) optimizer_obj = OptimizerWithSched(optimizer, lr_scheduler, warmup_scheduler) np.set_printoptions(precision=2) torch.backends.cudnn.benchmark = True test_setup = TestingSetup(args.problem.lower(), args.mode.lower()) #################################################### #################################################### # Train print(f"==> Starting training for {args.epochs - start_epoch} epochs...") for epoch in range(start_epoch, args.epochs): loss, acc = train(net, trainloader, args.problem.lower(), optimizer_obj, device) print(f"{now()} Training loss at epoch {epoch}: {loss}") print(f"{now()} Training accuracy at epoch {epoch}: {acc}") # if the loss is nan, then stop the training if np.isnan(float(loss)): print("Loss is nan, exiting...") sys.exit() # tensorboard loss writing writer.add_scalar("Loss/loss", loss, epoch) writer.add_scalar("Accuracy/acc", acc, epoch) for i in range(len(optimizer.param_groups)): writer.add_scalar(f"Learning_rate/group{i}", optimizer.param_groups[i]["lr"], epoch) if (epoch + 1) % args.val_period == 0: train_acc = test(net, trainloader, test_setup, device) test_acc = test(net, testloader, test_setup, device) print(f"{now()} Training accuracy: {train_acc}") print(f"{now()} Testing accuracy: {test_acc}") stats = [train_acc, test_acc] stat_names = ["train_acc", "test_acc"] for stat_idx, stat in enumerate(stats): stat_name = os.path.join("val", stat_names[stat_idx]) writer.add_scalar(stat_name, stat, epoch) if (epoch + 1) % args.save_period == 0 or (epoch + 1) == args.epochs: state = { "net": net.state_dict(), "epoch": epoch, "optimizer": optimizer.state_dict() } out_str = os.path.join( args.checkpoint, f"{args.model}_{args.dataset}_{args.optimizer}" f"_depth={args.depth}" f"_width={args.width}" f"_lr={args.lr}" f"_batchsize={args.train_batch_size}" f"_epoch={args.epochs-1}" f"_{array_task_id}.pth") print("saving model to: ", args.checkpoint, " out_str: ", out_str) if not os.path.isdir(args.checkpoint): os.makedirs(args.checkpoint) torch.save(state, out_str) writer.flush() writer.close() #################################################### #################################################### # Test print("==> Starting testing...") if args.test_iterations is not None: assert isinstance( net.iters, int), "Cannot test feed-forward model with iterations." net.iters = args.test_iterations train_acc = test(net, trainloader, test_setup, device) test_acc = test(net, testloader, test_setup, device) print(f"{now()} Training accuracy: {train_acc}") print(f"{now()} Testing accuracy: {test_acc}") model_name_str = f"{args.model}_depth={args.depth}_width={args.width}" stats = OrderedDict([("model", model_name_str), ("num_params", pytorch_total_params), ("learning rate", args.lr), ("lr_factor", args.lr_factor), ("lr", args.lr), ("epochs", args.epochs), ("train_batch_size", args.train_batch_size), ("optimizer", args.optimizer), ("dataset", args.dataset), ("train_acc", train_acc), ("test_acc", test_acc), ("test_iter", args.test_iterations)]) if args.save_json: to_json(stats, args.output)