def load_model(model, optim, path): checkpoint = torch.load(path) model.load_state_dict(checkpoint['model_state_dict']) optim.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] loss = checkpoint['loss'] return model, optim, epoch, loss
def load_model(net, optim, save_path, filename, device): """Load a model and its optimizer Args: net (nn.Module): architecture of the saved model optim (torch.optim): optimizer to load save_path (str): path where the file is stored filename (str): filename to open device (torch.device): device to load the model and optimizer to Returns: net (nn.Module): loaded model optim (torch.optim): optimizer of the loaded model best_acc (int): performance of the model epoch (int): number of epoch the model were trained """ state = torch.load(os.path.join(save_path, filename), map_location=device) net.load_state_dict(state['net']) best_acc = state['acc'] epoch = state['epoch'] try: optim_state = state['optim'] except KeyError: optim_state = None if optim_state and optim: optim.load_state_dict(optim_state) return net, optim, best_acc, epoch
def load_checkpoint(self, checkpoint_path, optim=None, only_model=False): """ Write docstring """ if os.path.isfile(checkpoint_path): print('[PROGRESS] Loading checkpoint: {}'.format(checkpoint_path), end="", flush=True) # Load the checkpoint checkpoint = torch.load(checkpoint_path, map_location='cpu') # Load the model state dictionary from the checkpoint self.model.load_state_dict(checkpoint['state_dict']) print('\r[INFO] Checkpoint has been loaded: {}'.format(checkpoint_path)) if not only_model: # Load optimization method parameters from the checkpoint optim.load_state_dict(checkpoint['optimizer']) # Load the necessary checkpoint key values to the states dictionary which contains loss and history values/lists self.states.update({key: value for key, value in checkpoint.items() if key not in ['optimizer', 'state_dict']}) print('[INFO] History lists have been loaded') print('[INFO] Resuming from epoch {}'.format(checkpoint['epoch']+1)) # content of checkpoint is loaded to the instance; so, delete checkpoint variable to create space on the GPU del checkpoint torch.cuda.empty_cache() return optim else: raise FileNotFoundError('Checkpoint file not found: %s' % checkpoint_path)
def load(name): state_dicts = torch.load(name) model.load_state_dict(state_dicts['net']) try: optim.load_state_dict(state_dicts['opt']) except ValueError: print('Cannot load optimizer for some reason or other')
def load_model(model_path, model, optim): ckpt = torch.load(model_path) model.load_state_dict(ckpt['state_dict']) optim.load_state_dict(ckpt['optimizer']) epoch = ckpt['epoch'] return model, epoch, optim
def load_model(net, optim, path): print ("==> restoring checkpoint") ckpt = torch.load(path) epoch = ckpt['epoch'] net.load_state_dict(ckpt['state_dict']) optim.load_state_dict(ckpt['optimizer']) print ("==> loaded checkpoint '{}' (epoch {})".format(path, epoch)) return net, optim, epoch
def load(name): state_dicts = torch.load(name) network_state_dict = {k:v for k,v in state_dicts['net'].items() if 'tmp_var' not in k} combined_model.load_state_dict(network_state_dict) try: optim.load_state_dict(state_dicts['opt']) feature_optim.load_state_dict(state_dicts['opt_f']) except: print('Cannot load optimizer for some reason or other')
def load(name, load_opt=False, model=model): print("loads fn_model from ", name) state_dicts = torch.load(name, map_location='cpu') #print(state_dicts) model.load_state_dict(state_dicts['net']) model.to(c.device) if load_opt: try: optim.load_state_dict(state_dicts['opt']) except ValueError: print('Cannot load optimizer for some reason or other')
def loadModel(conf, device): if conf.modelSave == "best": fileToLoad = conf.modelFile else: fileToLoad = conf.modelFileLoad print("Loading {}".format(fileToLoad), flush=True) model, optim = makeModel(conf, device) checkpoint = torch.load(fileToLoad) model.load_state_dict(checkpoint['model_state_dict']) optim.load_state_dict(checkpoint['optim_state_dict']) return model, optim
def load_checkpoint(checkpoint_path: str, model: nn.Module, optim: optimizer.Optimizer) -> Tuple[int, int, float]: """Loads training checkpoint. :param checkpoint_path: path to checkpoint :param model: model to update state :param optim: optimizer to update state :return tuple of starting epoch id, starting step id, best checkpoint score """ checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint[_MODEL_STATE_DICT]) optim.load_state_dict(checkpoint[_OPTIMIZER_STATE_DICT]) start_epoch_id = checkpoint[_EPOCH] + 1 step = checkpoint[_STEP] + 1 best_score = checkpoint[_BEST_SCORE] return start_epoch_id, step, best_score
def build_optim(_model, train_args, checkpoint=None): saved_optimizer_state_dict = None if checkpoint: optim = checkpoint['optim'] saved_optimizer_state_dict = optim.state_dict() else: optim = AdamW(_model.parameters(), lr=train_args.lr, eps=1e-8) if train_args.train_from is not None: optim.load_state_dict(saved_optimizer_state_dict) if train_args.device != 'cpu': for state in optim.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda(device=train_args.device) return optim
def load_checkpoint(fname, model, optim): if os.path.isfile(fname): print("\nCheckpoint file found. Resuming from checkpoint.\n") checkpoint = torch.load(fname) if 'model_state_dict' in checkpoint: model.load_state_dict(checkpoint['model_state_dict']) print("Model parameters loaded from checkpoint.") if 'optimizer_state_dict' in checkpoint: optim.load_state_dict(checkpoint['optimizer_state_dict']) print("Optimizer parameters loaded from checkpoint.") if 'loss' in checkpoint: min_loss = checkpoint['loss'] print("Previous validation loss loaded.") if 'epoch' in checkpoint: prev_epochs = checkpoint['epoch'] print("Continuing training from epoch: {}".format(prev_epochs)) return model, optim, min_loss, prev_epochs
def load_model(output, epoch, model, model_name, optim=None, scheduler=None, csv_path=None): checkpoint_name = "{}.pth".format(model_name) try: print("checkpoint: ", os.path.join(output, checkpoint_name)) checkpoint = torch.load(os.path.join(output, checkpoint_name)) try: model.load_state_dict(checkpoint['state_dict'], strict=False) if optim != None: optim.load_state_dict(checkpoint['opt_dict']) if scheduler != None: scheduler.load_state_dict(checkpoint['scheduler_dict']) epoch_resume = checkpoint["epoch"] + 1 bestLoss = checkpoint["best_loss"] except: model.load_state_dict(checkpoint) epoch_resume = 0 if csv_path != None: stats = dict( epoch_resume="Resuming from epoch {}\n".format(epoch_resume)) write_csv_stats(csv_path, stats) return epoch_resume except FileNotFoundError: print("No checkpoint found\n") except: # saved model in nn.DataParallel # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in checkpoint.items(): name = k[7:] # remove `module.` new_state_dict[name] = v model.load_state_dict(new_state_dict) return 0
def load_model(model_path): if "vgg16" in model_path: model = models.vgg16(pretrained=True) elif "vgg19" in model_path: model = models.vgg19(pretrained=True) elif "densenet121" in model_path: model = models.densenet121(pretrained=True) elif "densenet161" in model_path: model = models.densenet161(pretrained=True) state = torch.load(model_path) model.classifier = state['classifier'] optim = state['optimizer'] model.load_state_dict(state['state_dict']) optim.load_state_dict(state['optimizer_state_dict']) model.class_to_idx = state['class_to_idx'] return model, optim
def train_lstm(x, y, training, lr=0.003): Epochs = 120 policy_model = PreAccLstm(D=args.hidden, layers=args.layers) print(policy_model) checkpoint = load_checkpoint(args) policy_model = torch.nn.DataParallel(policy_model).cuda() loss_func = nn.L1Loss() optim = torch.optim.Adam(policy_model.parameters(), lr=lr) if checkpoint is not None: policy_model.load_state_dict(checkpoint['state_dict']) optim.load_state_dict(checkpoint['optimizer']) torch_trainset = Data.TensorDataset(x, y) # print (torch_trainset) train_loader = Data.DataLoader(dataset=torch_trainset, batch_size=4, shuffle=True, num_workers=4) train = [train_loader] # print ("debug 0 ", training) for tr in training: # print ("debug 4", tr) idx = np.random.choice(tr[0].size(0), int(tr[0].size(0) * 0.95)) # print(idx) # print(tr[1][idx]) torch_trainset = Data.TensorDataset(tr[0][idx], tr[1][idx]) train_loader = Data.DataLoader(dataset=torch_trainset, batch_size=2, shuffle=True, num_workers=2) train.append(train_loader) # print("debug1", train) for epoch in range(Epochs): ### Train for one epoch loss = LSTM_train(policy_model, train, loss_func, optim, epoch) return policy_model, loss
def main(): args = get_args() device, dtype = args.device, args.dtype train_loader, val_loader = get_loaders(args.dataroot, args.batch_size, args.batch_size, args.input_size, args.workers, args.world_size, args.local_rank) model = MnasNet(n_class=args.num_classes, width_mult=args.scaling, drop_prob=0.0, num_steps=len(train_loader) * args.epochs) num_parameters = sum([l.nelement() for l in model.parameters()]) flops = flops_benchmark.count_flops(MnasNet, 1, device, dtype, args.input_size, 3, width_mult=args.scaling) if not args.child: print(model) print('number of parameters: {}'.format(num_parameters)) print('FLOPs: {}'.format(flops)) # define loss function (criterion) and optimizer criterion = CrossEntropyLoss() mixup = Mixup(args.num_classes, args.mixup, args.smooth_eps) model, criterion = model.to(device=device, dtype=dtype), criterion.to(device=device, dtype=dtype) if args.dtype == torch.float16: for module in model.modules(): # FP batchnorm if is_bn(module): module.to(dtype=torch.float32) if args.distributed: args.device_ids = [args.local_rank] dist.init_process_group(backend=args.dist_backend, init_method=args.dist_init, world_size=args.world_size, rank=args.local_rank) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank) print('Node #{}'.format(args.local_rank)) else: model = torch.nn.parallel.DataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank) optimizer_class = torch.optim.SGD optimizer_params = { "lr": args.learning_rate, "momentum": args.momentum, "weight_decay": args.decay, "nesterov": True } if args.find_clr: optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.decay, nesterov=True) find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode, save_path=args.save_path) return if args.sched == 'clr': scheduler_class = CyclicLR scheduler_params = { "base_lr": args.min_lr, "max_lr": args.max_lr, "step_size": args.epochs_per_step * len(train_loader), "mode": args.mode } elif args.sched == 'multistep': scheduler_class = MultiStepLR scheduler_params = {"milestones": args.schedule, "gamma": args.gamma} elif args.sched == 'cosine': scheduler_class = CosineLR scheduler_params = { "max_epochs": args.epochs, "warmup_epochs": args.warmup, "iter_in_epoch": len(train_loader) } elif args.sched == 'gamma': scheduler_class = StepLR scheduler_params = {"step_size": 30, "gamma": args.gamma} else: raise ValueError('Wrong scheduler!') optim = OptimizerWrapper(model, optimizer_class=optimizer_class, optimizer_params=optimizer_params, scheduler_class=scheduler_class, scheduler_params=scheduler_params, use_shadow_weights=args.dtype == torch.float16) best_test = 0 # optionally resume from a checkpoint data = None if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=device) args.start_epoch = checkpoint['epoch'] - 1 best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optim.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) elif os.path.isdir(args.resume): checkpoint_path = os.path.join( args.resume, 'checkpoint{}.pth.tar'.format(args.local_rank)) csv_path = os.path.join(args.resume, 'results{}.csv'.format(args.local_rank)) print("=> loading checkpoint '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path, map_location=device) args.start_epoch = checkpoint['epoch'] - 1 best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optim.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_path, checkpoint['epoch'])) data = [] with open(csv_path) as csvfile: reader = csv.DictReader(csvfile) for row in reader: data.append(row) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.evaluate: loss, top1, top5 = test(model, val_loader, criterion, device, dtype, args.child) # TODO return csv_logger = CsvLogger(filepath=args.save_path, data=data, local_rank=args.local_rank) csv_logger.save_params(sys.argv, args) claimed_acc1 = None claimed_acc5 = None if args.input_size in claimed_acc_top1: if args.scaling in claimed_acc_top1[args.input_size]: claimed_acc1 = claimed_acc_top1[args.input_size][args.scaling] if not args.child: csv_logger.write_text( 'Claimed accuracy is {:.2f}% top-1'.format(claimed_acc1 * 100.)) train_network(args.start_epoch, args.epochs, optim, model, train_loader, val_loader, criterion, mixup, device, dtype, args.batch_size, args.log_interval, csv_logger, args.save_path, claimed_acc1, claimed_acc5, best_test, args.local_rank, args.child)
def continuous_optim(tensor_list, train_data, loss_fun, epochs=10, val_data=None, other_args=dict()): """ Train a tensor network using gradient descent on input dataset Args: tensor_list: List of tensors encoding the network being trained train_data: The data used to train the network loss_fun: Scalar-valued loss function of the type tens_list, data -> scalar_loss (This depends on the task being learned) epochs: Number of epochs to train for. When val_data is given, setting epochs=None implements early stopping val_data: The data used for validation other_args: Dictionary of other arguments for the optimization, with some options below (feel free to add more) optim: Choice of Pytorch optimizer (default='SGD') lr: Learning rate for optimizer (default=1e-3) bsize: Minibatch size for training (default=100) reps: Number of times to repeat training data per epoch (default=1) print: Whether to print info (default=True) dyn_print: use dynamic printing (default=False) hist: Whether to return losses from train and val sets (default=False) momentum: Momentum value for continuous optimization (default=0) cvg_threshold: threshold to test convergence of optimization (optimization is stopped if |(prev_loss - cur_loss)/prev_loss| < cvg_threshold If None, convergence is not checked. If epochs is set as well, then optimziation is stopped either when convergence criteria is met or when epochs is reached (default:None) lr_scheduler: a function taking an optimizer as input and returning a learning rate scheduler for this optimizer (default:None) save_optimizer_state: if True, other_args should have an empty dict for the key optimizer_state. This dict will contain {optimizer_state: optimizer state_dict, lr_scheduler_state: scheduler state_dict (if any)} after the function returns. (default:False) load_optimzer_state: a dictionnary that will be used to initialize the optimizer (and scheduler if any) from a previously saved optimizer state. (default: None) grad_masking_function: a function taking the list of tensor parameters between the backward pass and the optimizer step (can be used to e.g. zero out parts of the gradient) (default: None) stop_condition: a function taking the training and validation loss as input after each epoch and returning True if optimization should be stopped (default: None) Returns: better_list: List of tensors with same shape as tensor_list, but having been optimized using the appropriate optimizer. When validation data is given, the model with the lowest validation loss is output, otherwise the model with lowest training loss first_loss: Initial loss of the model on the validation set, before any training. If no val set is provided, the first training loss is instead returned best_loss: The value of the validation/training loss for the model output as better_list best_epoch: epoch at which best_model was found loss_record: If hist=True in other_args, history of all validation and training losses is returned as a tuple of Pytorch vectors (train_loss, val_loss), with each vector having length equal to number of epochs of training. When no validation loss is provided, the second item (val_loss) is an empty tensor. """ # Check input and initialize local record variables early_stop = epochs is None has_val = val_data is not None optim = other_args['optim'] if 'optim' in other_args else 'SGD' lr = other_args['lr'] if 'lr' in other_args else 1e-3 bsize = other_args['bsize'] if 'bsize' in other_args else 100 reps = other_args['reps'] if 'reps' in other_args else 1 prnt = other_args['print'] if 'print' in other_args else True hist = other_args['hist'] if 'hist' in other_args else False dyn_print = other_args['dyn_print'] if 'dyn_print' in other_args else False lr_scheduler = other_args[ 'lr_scheduler'] if 'lr_scheduler' in other_args else None cvg_threshold = other_args[ 'cvg_threshold'] if 'cvg_threshold' in other_args else None save_optimizer_state = other_args[ 'save_optimizer_state'] if 'save_optimizer_state' in other_args else None load_optimizer_state = other_args[ 'load_optimizer_state'] if 'load_optimizer_state' in other_args else None grad_masking_function = other_args[ 'grad_masking_function'] if 'grad_masking_function' in other_args else None momentum = other_args['momentum'] if 'momentum' in other_args else 0 stop_condition = other_args[ 'stop_condition'] if 'stop_condition' in other_args else None if save_optimizer_state and (not 'optimizer_state' in other_args): raise ValueError( "an empty dictionnary should be passed as the optimizer_state argument to store the" " optimizer state.") if early_stop and not has_val: raise ValueError("Early stopping (epochs=None) requires val_data " "to be input") loss_rec, first_loss, best_loss, best_network, best_epoch = [], None, np.infty, tensor_list, 0 if hist: loss_record = ([], []) # (train_record, val_record) # Function to maybe print, conditioned on `prnt` m_print = lambda s: print(s, end='\r' if dyn_print else '\n') if prnt else None # Function to record loss information and return whether to stop def record_loss(new_loss, new_network, epoch_num): # Load record variables from outer scope nonlocal loss_rec, first_loss, best_loss, best_network, best_epoch # Check for first and best loss if best_loss is None or new_loss < best_loss: best_loss, best_network, best_epoch = new_loss, new_network, epoch_num if first_loss is None: first_loss = new_loss # Update loss record and check for early stopping. If you want to # change early stopping criteria, this is the place to do it. window = 2 # Number of epochs kept for checking early stopping warmup = 1 # Number of epochs before early stopping is checked if len(loss_rec) < window: stop, loss_rec = False, loss_rec + [new_loss] else: # stop = new_loss > sum(loss_rec)/len(loss_rec) stop = (new_loss > max(loss_rec)) and (epoch_num >= warmup) loss_rec = loss_rec[1:] + [new_loss] return stop # Another loss logging function, but for recording *all* loss history @torch.no_grad() def loss_history(new_loss, is_val): if not hist: return nonlocal loss_record loss_record[int(is_val)].append(new_loss) # Function to run TN on validation data @torch.no_grad() def run_val(t_list): val_loss = [] # Note that `batchify` uses different logic for different types # of input, so update batchify when you work on tensor completion for batch in batchify(val_data): val_loss.append(loss_fun(t_list, batch)) if has_val: val_loss = torch.mean(torch.tensor(val_loss)) return val_loss # Copy tensor_list so the original is unchanged tensor_list = copy_network(tensor_list) # Record the initial validation loss (if we validation dataset) if has_val: record_loss(run_val(tensor_list), tensor_list, 0) # Initialize optimizer, using only the keyword args in the optim = getattr(torch.optim, optim) opt_args = signature(optim).parameters.keys() kwargs = {'lr': lr, 'momentum': momentum} # <- Add new options here kwargs = {k: v for (k, v) in kwargs.items() if k in opt_args} optim = optim(tensor_list, **kwargs) # Initialize the optimizer if lr_scheduler: # instantiate learning rate scheduler scheduler = lr_scheduler(optim) if load_optimizer_state: optim.load_state_dict( other_args["load_optimizer_state"]["optimizer_state"]) if lr_scheduler: scheduler.load_state_dict( other_args["load_optimizer_state"]["lr_scheduler_state"]) # Loop over validation and training for given number of epochs ep = 1 prev_loss = np.infty while epochs is None or ep <= epochs: # Train network on all the training data #from copy import deepcopy prev_tensor_list = copy_network(tensor_list) #prev_tensor_list = tensor_list train_loss, num_train = 0., 0 for batch in batchify(train_data, batch_size=bsize, reps=reps): loss = loss_fun(tensor_list, batch) optim.zero_grad() loss.backward() if grad_masking_function: grad_masking_function(tensor_list) optim.step() with torch.no_grad(): num_train += 1 train_loss += loss train_loss /= num_train if lr_scheduler: scheduler.step(train_loss) loss_history(train_loss, is_val=False) val_loss = run_val(tensor_list) if has_val else None val_loss_str = f"Val. loss: {val_loss.data:.10f}" if has_val else "" m_print( f"EPOCH {ep} {'('+str(reps)+' reps)' if reps > 1 else ''}\t\t{val_loss_str}\t\t Train loss: {train_loss.data:.10f}\t\t Convergence: {np.abs(train_loss-prev_loss)/prev_loss:.10f}" ) # Get validation loss if we have it, otherwise record training loss if has_val: # Get and record validation loss, check early stopping condition loss_history(val_loss, is_val=True) if record_loss( val_loss, copy_network(tensor_list) if has_val else prev_tensor_list, ep) and early_stop: print(f"\nEarly stopping condition reached") break else: record_loss( train_loss, copy_network(tensor_list) if has_val else prev_tensor_list, ep) if cvg_threshold and np.abs(train_loss - prev_loss) / prev_loss < cvg_threshold: print(f"\nConvergence criteria reached") break if stop_condition and stop_condition(train_loss=train_loss, val_loss=val_loss): print(f"\nStopping condition reached") break prev_loss = train_loss ep += 1 m_print("") # Save the optimizer state if needed if save_optimizer_state: other_args["optimizer_state"]["optimizer_state"] = optim.state_dict() if lr_scheduler: other_args["optimizer_state"][ "lr_scheduler_state"] = scheduler.state_dict() if hist: loss_record = tuple(torch.tensor(fr) for fr in loss_record) return best_network, first_loss, best_loss, best_epoch, loss_record else: return best_network, first_loss, best_loss
if args.task == 's2m': optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) else: optimizer = torch.optim.Adam(model.parameters(), args.lr) if args.resume: print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) criterion_bce = nn.BCELoss() criterion_cel = nn.CrossEntropyLoss() best_prec1 = 0 best_pred_y = [] best_gt_y = [] global_step = 0 total_steps = args.grl_rampup_epochs * len(source_loader) def train(epoch): model.train()
def main(run_id, pretrained, data_files, model_params, training_params, device): best_acc1 = 0 batch_size = training_params['batch_size'] test_batch_size = training_params['test_batch_size'] epochs = training_params['epochs'] start_epoch = training_params['start_epoch'] n_warmup_steps = training_params['n_warmup_steps'] log_interval = training_params['log_interval'] # model is trained for binary classification (for datalaoder) if model_params['NUM_SPOOF_CLASS'] == 2: binary_class = True else: binary_class = False kwargs = { 'num_workers': 2, 'pin_memory': True } if device == torch.device('cuda') else {} # create model model = Detector(**model_params).to(device) num_model_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print('===> Model total parameter: {}'.format(num_model_params)) # Wrap model for multi-GPUs, if necessary if device == torch.device('cuda') and torch.cuda.device_count() > 1: print('multi-gpu') model = nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer optim = optimizer.ScheduledOptim( torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), betas=(0.9, 0.98), eps=1e-09, weight_decay=1e-4, lr=3e-4, amsgrad=True), training_params['n_warmup_steps']) # optionally resume from a checkpoint if pretrained: if os.path.isfile(pretrained): print("===> loading checkpoint '{}'".format(pretrained)) checkpoint = torch.load(pretrained) start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) optim.load_state_dict(checkpoint['optimizer']) print("===> loaded checkpoint '{}' (epoch {})".format( pretrained, checkpoint['epoch'])) else: print("===> no checkpoint found at '{}'".format(pretrained)) # Data loading code train_data = SpoofDatsetSystemID(data_files['train_scp'], data_files['train_utt2index'], binary_class) val_data = SpoofDatsetSystemID(data_files['dev_scp'], data_files['dev_utt2index'], binary_class) train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader(val_data, batch_size=test_batch_size, shuffle=True, **kwargs) best_epoch = 0 early_stopping, max_patience = 0, 100 # for early stopping os.makedirs("model_snapshots/" + run_id, exist_ok=True) for epoch in range(start_epoch, start_epoch + epochs): trainer.train(train_loader, model, optim, epoch, device, log_interval) acc1 = validate.validate(val_loader, data_files['dev_utt2systemID'], model, device, log_interval) is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) # adjust learning rate + early stopping if is_best: early_stopping = 0 best_epoch = epoch + 1 else: early_stopping += 1 if epoch - best_epoch > 2: optim.increase_delta() best_epoch = epoch + 1 if early_stopping == max_patience: break # save model optimizer.save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optim.state_dict(), }, is_best, "model_snapshots/" + str(run_id), str(epoch) + ('_%.3f' % acc1) + ".pth.tar")
def main(config): device = torch.device("cuda") generator_config = config['generator'] # Model experiments total epochs and beginning epoch initial_epoch = generator_config['initial_epoch'] # O by default and otherwise 'N' if loading num_epochs = generator_config['num_epochs'] # Total Number of Epochs plt_sep = generator_config['plot_separate'] # Plot the train, valid and test separately: 0 or 1 lamb = generator_config['set_lamda'] # Plot the train, valid and test separately: 0 or 1 loss_up = generator_config['loss_up'] # Plot the train, valid and test separately: 0 or 1 bsz = generator_config['batch_size'] # Batch size for training/testing loaders # Get the model architecture model_params = config['model_params'] fin = model_params['fin'] # Input node features fou1 = model_params['fou1'] # Output node features for first GC block clus = model_params['clus'] # Number of clusters learned for first GC block fou2 = model_params['fou2'] # Output node features for second GC block hlin = model_params['hlin'] # Output of the first liner layer outp = model_params['outp'] # Number of output classes psudim = model_params['psudim'] # Dimension of the pseudo-coordinates optm_config = config['optimizer'] b1 = optm_config['B1'] # B1 for Adam Optimizer: Ex. 0.9 b2 = optm_config['B2'] # B2 for Adam Optimizer: Ex. 0.999 lr = optm_config['LR'] # Learning Rate: Ex. 0.001 directory_config = config['directories'] out_dir = directory_config['out_dir'] # Path to save the outputs of the experiments config_name = directory_config['ConfigName'] # Configuration Name to Uniquely Identify this Experiment log_path = join(out_dir, config_name, 'log') # Path to save the training log files main_path = directory_config['datafile'] # Full Path of the dataset. Folder contains train, valid and test if not os.path.exists(log_path): os.makedirs(log_path) if not os.path.exists(join(log_path, 'weights')): # Path to save the weights of training os.makedirs(join(log_path, 'weights')) # Initialize the model, optimizer and data loader model = GCNet(fin=fin, fou1=fou1, clus=clus, fou2=fou2, hlin=hlin, outp=outp, psudim=psudim) # Create the model model = model.to(device) compute_loss = torch.nn.BCEWithLogitsLoss() # Loss function: Binary cross-entropy with logits optimizer = optm.Adam(model.parameters(), lr=lr, betas=(b1, b2)) train_set = GeometricDataset('train', main_path) train_loader = DataLoader(train_set, batch_size=bsz, num_workers=4, shuffle=True) valid_set = GeometricDataset('valid', main_path) valid_loader = DataLoader(valid_set, batch_size=bsz, num_workers=4, shuffle=False) test_set = GeometricDataset('test', main_path) test_loader = DataLoader(test_set, batch_size=bsz, num_workers=4, shuffle=False) if initial_epoch > 0: print("===> Loading pre-trained weight {}".format(initial_epoch - 1)) weight_path = 'weights/model-{:04d}.pt'.format(initial_epoch - 1) checkpoint = torch.load(join(log_path, weight_path)) model.load_state_dict(checkpoint['model_state_dict']) optm.load_state_dict(checkpoint['optimizer_state_dict']) def checkpoint(epc): w_path = 'weights/model-{:04d}.pt'.format(epc) torch.save( {'epoch': epc, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict()}, join(log_path, w_path)) # setup our callbacks used to plot curves my_metric = ['Accuracy'] my_loss = ['Loss'] logger = Logger(mylog_path=log_path, mylog_name="training.log", myloss_names=my_loss, mymetric_names=my_metric) ls_plt = LossPlotter(mylog_path=log_path, mylog_name="training.log", myloss_names=my_loss, mymetric_names=my_metric, cmb_plot=plt_sep) def train(loader): lss_all = acc_all = 0 model.zero_grad() model.train() for data in tqdm(loader): data.to(device) optimizer.zero_grad() out, reg = model(data) loss = (loss_up * compute_loss(out, func.one_hot(torch.LongTensor([data.sx.item()]), num_classes=2).float().cuda())) + (lamb * reg) acc = bin_accuracy(torch.max(out, 1)[1], data.sx) loss.backward() optimizer.step() lss_all += loss.item() acc_all += acc metric = np.array([lss_all / len(loader), acc_all / len(loader)]) return metric def test(loader): lss_all = acc_all = 0 model.eval() with torch.no_grad(): for data in tqdm(loader): data.to(device) out, reg = model(data) loss = (loss_up * compute_loss(out, func.one_hot(torch.LongTensor([data.sx.item()]), num_classes=2).float().cuda())) + (lamb * reg) acc = bin_accuracy(torch.max(out, 1)[1], data.sx) lss_all += loss.item() acc_all += acc metric = np.array([lss_all / len(loader), acc_all / len(loader)]) return metric print("===> Starting Model Training at Epoch: {}".format(initial_epoch)) for epoch in range(initial_epoch, num_epochs): start = time.time() print("\n\n") print("Epoch:{}".format(epoch)) train_metric = train(train_loader) print( "===> Training Epoch {}: Loss = {:.4f}, Accuracy = {:.4f}".format(epoch, train_metric[0], train_metric[1])) val_metric = test(valid_loader) print("===> Validation Epoch {}: Loss = {:.4f}, Accuracy = {:.4f}".format(epoch, val_metric[0], val_metric[1])) test_metric = test(test_loader) print("===> Testing Epoch {}: Loss = {:.4f}, Accuracy = {:.4f}".format(epoch, test_metric[0], test_metric[1])) logger.to_csv(np.concatenate((train_metric, val_metric, test_metric)), epoch) ls_plt.plotter() checkpoint(epoch) end = time.time() print("===> Epoch:{} Completed in {:.4f} seconds".format(epoch, end - start)) print("===> Done Training for Total {:.4f} Epochs".format(num_epochs))
def main(args): fine_tune = not args.no_finetune pre_train = True lr = args.lr input_size = args.input_size order = 2 embedding = args.embedding_dim model_names_list = args.model_names_list args.exp_dir = os.path.join(args.dataset, args.exp_dir) if args.dataset in ['cars', 'aircrafts']: keep_aspect = False else: keep_aspect = True if args.dataset in ['aircrafts']: crop_from_size = [(x * 256) // 224 for x in input_size] else: crop_from_size = input_size if 'inat' in args.dataset: split = {'train': 'train', 'val': 'val'} else: split = {'train': 'train_val', 'val': 'test'} if len(input_size) > 1: assert order == len(input_size) if not keep_aspect: input_size = [(x, x) for x in input_size] crop_from_size = [(x, x) for x in crop_from_size] exp_root = '../exp' checkpoint_folder = os.path.join(exp_root, args.exp_dir, 'checkpoints') if not os.path.isdir(checkpoint_folder): os.makedirs(checkpoint_folder) init_checkpoint_folder = os.path.join(exp_root, args.exp_dir, 'init_checkpoints') if not os.path.isdir(init_checkpoint_folder): os.makedirs(init_checkpoint_folder) # log the setup for the experiments args_dict = vars(args) with open(os.path.join(exp_root, args.exp_dir, 'args.txt'), 'a') as f: f.write(json.dumps(args_dict, sort_keys=True, indent=4)) # make sure the dataset is ready if 'inat' in args.dataset: setup_dataset('inat') else: setup_dataset(args.dataset) # ================== Craete data loader ================================== data_transforms = { 'train': [transforms.Compose([ transforms.Resize(x[0]), # transforms.CenterCrop(x[1]), transforms.RandomCrop(x[1]), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) \ for x in zip(crop_from_size, input_size)], 'val': [transforms.Compose([ transforms.Resize(x[0]), transforms.CenterCrop(x[1]), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) \ for x in zip(crop_from_size, input_size)], } if args.dataset == 'cub': from CUBDataset import CUBDataset as dataset elif args.dataset == 'cars': from CarsDataset import CarsDataset as dataset elif args.dataset == 'aircrafts': from AircraftsDataset import AircraftsDataset as dataset elif 'inat' in args.dataset: from iNatDataset import iNatDataset as dataset if args.dataset == 'inat': subset = None else: subset = args.dataset[len('inat_'):] subset = subset[0].upper() + subset[1:] else: raise ValueError('Unknown dataset: %s' % task) if 'inat' in args.dataset: dset = {x: dataset(dset_root['inat'], split[x], subset, \ transform=data_transforms[x]) for x in ['train', 'val']} dset_test = dataset(dset_root['inat'], 'test', subset, \ transform=data_transforms['val']) else: dset = {x: dataset(dset_root[args.dataset], split[x], \ transform=data_transforms[x]) for x in ['train', 'val']} dset_test = dataset(dset_root[args.dataset], 'test', \ transform=data_transforms['val']) dset_loader = {x: torch.utils.data.DataLoader(dset[x], batch_size=args.batch_size, shuffle=True, num_workers=8, drop_last=drop_last) \ for x, drop_last in zip(['train', 'val'], [True, False])} device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #======================= Initialize the model ========================= # The argument embedding is used only when tensor_sketch is True # The argument order is used only when the model parameters are shared # between feature extractors model = create_bcnn_model(model_names_list, len(dset['train'].classes), args.pooling_method, fine_tune, pre_train, embedding, order, m_sqrt_iter=args.matrix_sqrt_iter, fc_bottleneck=args.fc_bottleneck, proj_dim=args.proj_dim, update_sketch=args.update_sketch, gamma=args.gamma) model = model.to(device) model = torch.nn.DataParallel(model) # Setup the loss fxn criterion = nn.CrossEntropyLoss() #====================== Initialize optimizer ============================== init_model_checkpoint = os.path.join(init_checkpoint_folder, 'checkpoint.pth.tar') start_itr = 0 optim_fc = initialize_optimizer( model, args.init_lr, optimizer='sgd', wd=args.init_wd, finetune_model=False, proj_lr=args.proj_lr, proj_wd=args.proj_wd, ) logger_name = 'train_init_logger' logger = initializeLogging( os.path.join(exp_root, args.exp_dir, 'train_init_history.txt'), logger_name) model_train_fc = False fc_model_path = os.path.join(exp_root, args.exp_dir, 'fc_params.pth.tar') if not args.train_from_beginning: if os.path.isfile(fc_model_path): # load the fc parameters if they are already trained print("=> loading fc parameters'{}'".format(fc_model_path)) checkpoint = torch.load(fc_model_path) model.load_state_dict(checkpoint['state_dict']) print("=> loaded fc initialization parameters") else: if os.path.isfile(init_model_checkpoint): # load the checkpoint if it exists print( "=> loading checkpoint '{}'".format(init_model_checkpoint)) checkpoint = torch.load(init_model_checkpoint) start_itr = checkpoint['itr'] model.load_state_dict(checkpoint['state_dict']) optim_fc.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint for the fc initialization") # resume training model_train_fc = True else: # Training everything from the beginning model_train_fc = True start_itr = 0 if model_train_fc: # do the training if not fine_tune: model.eval() model = train_model(model, dset_loader, criterion, optim_fc, batch_size_update=256, epoch=args.init_epoch, logger_name=logger_name, start_itr=start_itr, checkpoint_folder=init_checkpoint_folder, fine_tune=fine_tune) shutil.copyfile( os.path.join(init_checkpoint_folder, 'model_best.pth.tar'), fc_model_path) if fine_tune: optim = initialize_optimizer(model, args.lr, optimizer=args.optimizer, wd=args.wd, finetune_model=fine_tune, beta1=args.beta1, beta2=args.beta2) # if 'inat' not in args.dataset: if True: scheduler = torch.optim.lr_scheduler.LambdaLR( optim, lr_lambda=lambda epoch: 0.1**(epoch // 25)) else: scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optim, 'max') logger_name = 'train_logger' logger = initializeLogging( os.path.join(exp_root, args.exp_dir, 'train_history.txt'), logger_name) start_itr = 0 # load from checkpoint if exist if not args.train_from_beginning: checkpoint_filename = os.path.join(checkpoint_folder, 'checkpoint.pth.tar') if os.path.isfile(checkpoint_filename): print("=> loading checkpoint '{}'".format(checkpoint_filename)) checkpoint = torch.load(checkpoint_filename) start_itr = checkpoint['itr'] model.load_state_dict(checkpoint['state_dict']) optim.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) print("=> loaded checkpoint '{}' (iteration{})".format( checkpoint_filename, checkpoint['itr'])) # parallelize the model if using multiple gpus # if torch.cuda.device_count() > 1: # Train the miodel model = train_model( model, dset_loader, criterion, optim, batch_size_update=args.batch_size_update_model, # maxItr=args.iteration, logger_name=logger_name, epoch=args.epoch, logger_name=logger_name, checkpoint_folder=checkpoint_folder, start_itr=start_itr, scheduler=scheduler)
checkpoint = torch.load("fin.pth") print(model.state_dict().keys()) states_to_load = {} for name, param in checkpoint["state_dict"].items(): if name.startswith("conv"): states_to_load[name] = param for c in states_to_load: print(c) print("Number of parameter variables to load:", len(states_to_load)) model_state = model.state_dict() print("Number of parameter variables in the model:", len(model_state)) model_state.update(states_to_load) model.load_state_dict(model_state) optim.load_state_dict(checkpoint["optimizer"]) train_part34(model, optim) #check_accuracy_part34(loader_val, model) input("End Part load") ######################################## model = MyModel() optim = optim.Adam(model.parameters(), lr=0.001) for (x, y) in loader_train: #print(x.shape) model(x) break losses = train_part34(model, optim, epochs=2)
output_shape = v_y.shape[2] lstm = mdnLSTM(input_size=input_size, hidden_size=hidden_size, number_mixtures=number_mixtures).to(DEVICE) optim = torch.optim.Adam(lstm.parameters(), lr=args.learning_rate) model_save_name = 'model' if args.load: if not os.path.exists(args.model_loadname): print("load model: %s does not exist" % args.model_loadname) sys.exit() else: print("loading %s" % args.model_loadname) lstm_dict = torch.load(args.model_loadname) lstm.load_state_dict(lstm_dict['state_dict']) optim.load_state_dict(lstm_dict['optimizer']) train_cnts = lstm_dict['train_cnts'] train_losses = lstm_dict['train_losses'] test_cnts = lstm_dict['test_cnts'] test_losses = lstm_dict['test_losses'] loop(data_loader, save_every=save_every, num_epochs=args.num_epochs, train_losses=train_losses, test_losses=test_losses, train_cnts=train_cnts, test_cnts=test_cnts, dummy=args.dummy) embed()
import torch import torch.nn as nn import torch.optim as optim import os os.environ['CUDA_VISIBLE_DEVICES'] = "3" class fnn(nn.Module): def __init__(self): super(fnn, self).__init__() self.linear model = nn.Sequential(nn.Linear(5, 10), nn.Linear(10, 5)) model = model.cuda() optim = optim.Adam(model.parameters()) torch.save(optim.state_dict(), 'qq') optim.load_state_dict(torch.load('qq')) for state in optim.state: print(state)
test_loader = DataLoader(VQADataset(img_feats, test_qa_map, args.use_q==1), batch_size=args.batch_size, shuffle=False, collate_fn=pad_collate_fn) model = LSTMTextModel(visual_dim=args.feat_dim, lang_dim=args.wv_dim, hidden_dim=args.hidden_dim, out_dim=1, mlp_dims=[1024, 512, 512], embed_weights=embeds, finetune_embeds=args.finetune_embeds, n_layers=args.n_layers, bidirectional=args.bidir, img2seq=args.img2seq, dropout=args.dropout) if args.loss == 'BCE': loss_fn = torch.nn.BCEWithLogitsLoss() elif args.loss == 'rank': loss_fn = torch.nn.MarginRankingLoss(margin=args.margin) # only pass in parameters that require grad optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd) if args.use_pretrain and args.pretrained_path: print('Using pretrained model', args.pretrained_path) pretrained = torch.load(args.pretrained_path) model.load_state_dict(pretrained['model']) optim.load_state_dict(pretrained['optim']) # set model lr to new lr for param_group in optim.param_groups: before = param_group['lr'] param_group['lr'] = args.lr print('optim lr: before={} / after={}'.format(before, args.lr)) if USE_GPU: print("Use GPU") model = model.cuda() loss_fn = loss_fn.cuda() else: print("Use CPU") if args.mode == 'train': best_acc = 0 stats = {'train_loss':[], 'train_acc':[], 'val_acc':[]}
def main(): #######################################1.data loader########################################### train_transforms = transforms.Compose([ transforms.Scale( 256), # rescale the image keeping the original aspect ratio transforms.CenterCrop(256), # we get only the center of that rescaled transforms.RandomCrop(224), # random crop within the center crop transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) val_transforms = transforms.Compose([ transforms.Scale( 256), # rescale the image keeping the original aspect ratio transforms.CenterCrop(224), # we get only the center of that rescaled transforms.ToTensor(), ]) test_transforms = transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(256), transforms.ToTensor(), ]) traindata = Data(opts.train_list_str, None, None, train_transforms, None, None, opts.data_path, opts.num_classes) valdata = Data(None, None, opts.val_list_str, None, val_transforms, None, opts.data_path, opts.num_classes) testdata = Data(None, opts.test_list_str, None, None, None, test_transforms, opts.data_path, opts.num_classes) train_loader = DataLoader(traindata, batch_size=opts.batch_size, shuffle=True, num_workers=opts.workers, pin_memory=True) print('Training loader prepared') val_loader = DataLoader(valdata, batch_size=opts.batch_size, shuffle=False, num_workers=opts.workers, pin_memory=True) print('Validation loader prepared') #testloader=Dataloader(testdata,testloader,) ##########################################2.model################################################# model = imvstxt() model.visionMLP = torch.nn.DataParaller(model.visionMLP, devce_ids=[0, 1]) if opts.cuda: model.cuda() ########################################3.train && optimer########################################### #define loss function (criterion) and optimzer #cosine similarity between embeddings ->input1 ,input2,target if opts.cuda: cosine_crit = nn.CosineEmbeddingLoss(0.1).cuda() else: cosine_crit = nn.CosineEmbeddingLoss(0.1) if opts.semantic_reg: weights_class = torch.Tensor(opts.numClasses).fill_(1) weights_class[0] = 0 # the background class is set to 0, i.e. ignore # CrossEntropyLoss combines LogSoftMax and NLLLoss in one single class class_crit = nn.CosineEmbeddingLoss(weigth=weights_class).cuda() # we will use two different criterion criterion = [cosine_crit, class_crit] else: criterion = cosine_crit ##creating different parameter groups vision_params = list(map(id, model.visionMLP.parameters())) base_params = filter(lambda p: id(p) not in vision_params, model.parameters()) optim = optim.Adam([{ 'params': base_params }, { 'params': model.visionMLP.parameters(), 'lr': opts.lr * opts.freeVision }], lr=opts.lr * opts.freeText) #if checkpoint exsit if opts.resume: if os.path.isfile(opts.resume): print("=> loading checkpoint '{}'".format(opts.resume)) checkpoint = torch.load(opts.resume) opts.start_epoch = checkpoint['epoch'] best_val = checkpoint['best_val'] model.load_state_dict(checkpoint['state_dict']) optim.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( opts.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opts.resume)) best_val = float('inf') else: best_val = float('inf') #model trainer = trainer.Trainer(cuda=opts.cuda, model=model, optimizer=optim, criterion=criterion, train_loader=train_loader, val_loader=val_loader, max_iter=opts.max_iter) try: trainer.train(best_val) except: raise
def train(self, model, dset_type, train_loader, val_loader, resume=False, num_epochs=10, log_nth=0): """ Train a given model with the provided data. Inputs: - model: model object initialized from a torch.nn.Module - dset_type: data set type, string: SUN or NYU - train_loader: train data in torch.utils.data.DataLoader - val_loader: val data in torch.utils.data.DataLoader - resume: bool parameter, indicating training mode - num_epochs: total number of training epochs - log_nth: log training accuracy and loss every nth iteration """ optim = self.optim(model.parameters(), **self.optim_args) criterion = self.loss_func self._reset_histories() iter_per_epoch = len(train_loader) val_iter_per_epoch = len(val_loader) print(val_iter_per_epoch) if resume: print("[PROGRESS] Selected Training Mode: RESUME") if dset_type == 'NYU': if (not self.is_HHA): model_path = '../models/nyu/checkpoint25.pth.tar' else: model_path = '../models/nyu_hha/checkpoint25.pth.tar' elif dset_type == 'SUN': if (not self.is_HHA): model_path = '../models/sun/checkpoint25.pth.tar' else: model_path = '../models/sun_hha/checkpoint25.pth.tar' if os.path.isfile(model_path): print("[PROGRESS] Loading checkpoint: '{}'".format(model_path)) checkpoint = torch.load(model_path) self.best_model = model self.start_epoch = checkpoint['epoch'] self.best_val_acc = checkpoint['best_val_acc'] model.load_state_dict(checkpoint['state_dict']) self.best_model.load_state_dict(checkpoint['best_state_dict']) self.train_loss_history = checkpoint['train_loss_hist'] self.train_acc_history = checkpoint['train_acc_hist'] self.val_acc_history = checkpoint['val_acc_hist'] optim.load_state_dict(checkpoint['optimizer']) print("[PROGRESS] Checkpoint loaded") print("[PROGRESS] Resuming from epoc {}".format( checkpoint['epoch'])) print("[PROGRESS] TRAINING CONTINUES") else: print("[ERROR] No checkpoint found at '{}'".format(model_path)) else: print("[PROGRESS] Selected Training Mode: NEW") print("[PROGRESS] TRAINING STARTS") #print(self.train_loss_history) #print(self.train_acc_history) #print(self.val_acc_history) end_epoch = self.start_epoch + num_epochs for epoch in range(self.start_epoch, end_epoch): # loop over the dataset multiple times timestep1 = time() self.update_learning_rate(optim, epoch) running_loss = 0.0 model.train() for i, data in enumerate(train_loader, 0): timestep2 = time() rgb_inputs = Variable(data[0].cuda(self.gpu_device)) d_inputs = Variable(data[1].cuda(self.gpu_device)) labels = Variable(data[2].cuda(self.gpu_device)) batch_size = len(rgb_inputs) first_it = (i == 0) and (epoch == 0) epoch_end = ((i + 1) % iter_per_epoch) == 0 # zero the parameter gradients optim.zero_grad() # forward + backward + optimize outputs = model(rgb_inputs, d_inputs) loss = criterion(outputs, labels) loss.backward() optim.step() self.running_loss += loss.data[0] running_loss += loss.data[0] # print statistics if (i + 1) % log_nth == 0 or ( i + 1 ) == iter_per_epoch: # print every log_nth mini-batches timestep3 = time() running_loss = running_loss / log_nth print( "\r[EPOCH: %d/%d Iter: %d/%d ] Loss: %.3f Best Acc: %.3f LR: %.2e Time: %.2f seconds" % (epoch + 1, end_epoch, i + 1, iter_per_epoch, running_loss, self.best_val_acc, optim.param_groups[0]['lr'], (timestep3 - timestep2))), # log and save the accuracies if epoch_end: train_scores = [] val_scores = [] self.running_loss /= (i + 1) # print(self.running_loss) # print(self.running_loss, i+1) self.train_loss_history.append(self.running_loss) _, train_preds = torch.max(outputs, 1) labels_mask = labels > 0 labels = labels - 1 train_scores.append( np.mean((train_preds == labels )[labels_mask].data.cpu().numpy())) model.eval() for batch in val_loader: val_rgb_inputs = Variable(batch[0].cuda( self.gpu_device)) val_d_inputs = Variable(batch[1].cuda(self.gpu_device)) val_labels = Variable(batch[2].cuda(self.gpu_device)) val_outputs = model(val_rgb_inputs, val_d_inputs) _, val_preds = torch.max(val_outputs, 1) val_labels_mask = val_labels > 0 val_labels = val_labels - 1 val_scores.append( np.mean((val_preds == val_labels )[val_labels_mask].data.cpu().numpy())) train_acc = np.mean(train_scores) val_acc = np.mean(val_scores) self.train_acc_history.append(train_acc) self.val_acc_history.append(val_acc) print( "[EPOCH: %d/%d] TRAIN Acc/Loss: %.3f/%.3f VALIDATION Acc: %.3f " % (epoch + 1, end_epoch, train_acc, self.running_loss, val_acc)) self.running_loss = 0.0 # Save the checkpoint and update the model is_best = val_acc > self.best_val_acc if is_best: self.best_model = model if is_best or (epoch + 1) % 10 == 0: self.best_val_acc = max(val_acc, self.best_val_acc) self.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_state_dict': self.best_model.state_dict(), 'best_val_acc': self.best_val_acc, 'train_loss_hist': self.train_loss_history, 'train_acc_hist': self.train_acc_history, 'val_acc_hist': self.val_acc_history, 'optimizer': optim.state_dict() }, is_best, dset_type) timestep4 = time() #print('Epoch %i took %.2f seconds' %(epoch + 1,timestep4 - timestep1)) # Calculate IoU and Mean accuracies num_classes = val_outputs.size(1) print num_classes val_confusion = np.zeros((num_classes, 3)) IoU = 0 mean_acc = 0 for batch in val_loader: val_rgb_inputs = Variable(batch[0].cuda(self.gpu_device)) val_d_inputs = Variable(batch[1].cuda(self.gpu_device)) val_labels = Variable(batch[2].cuda(self.gpu_device)) val_outputs = self.best_model(val_rgb_inputs, val_d_inputs) _, val_preds = torch.max(val_outputs, 1) val_labels = val_labels - 1 for i in range(num_classes): val_labels_mask = val_labels == i val_preds_mask = val_preds == i TP = np.sum((val_preds == val_labels )[val_labels_mask].data.cpu().numpy()) #print TP val_confusion[i, 0] += TP val_confusion[i, 1] += np.sum( (val_labels == val_labels)[val_labels_mask].data.cpu().numpy()) - TP val_confusion[i, 2] += np.sum( (val_preds == val_preds)[val_preds_mask].data.cpu().numpy()) - TP for i in range(num_classes): TP, FP, FN = val_confusion[i] print(TP + FP, FN) IoU += TP / (TP + FP + FN) mean_acc += TP / (TP + FP) IoU /= num_classes mean_acc /= num_classes print("[FINAL] TRAINING COMPLETED") print( " Best VALIDATION Accuracy: %.3f IoU: %.3f Mean Accuracy: %.3f" % (self.best_val_acc, IoU, mean_acc)) print( " Orgnal. FuseNet Accuracy: 0.66 IoU: 0.327 Mean Accuracy: 0.434" )
def main(args): log_dir = args.exp_dir+'/log' if os.path.exists(log_dir): shutil.rmtree(log_dir) writer = SummaryWriter(log_dir) batch_size = 32 maxIter = 10000 split = 'val' input_size = 224 if not os.path.isdir(args.exp_dir): os.makedirs(args.exp_dir) if not os.path.isdir(os.path.join(args.exp_dir, args.task)): os.makedirs(os.path.join(args.exp_dir, args.task)) checkpoint_folder = os.path.join(args.exp_dir, args.task, 'checkpoints') if not os.path.isdir(checkpoint_folder): os.makedirs(checkpoint_folder) logger_name = 'train_logger' logger = initializeLogging(os.path.join(args.exp_dir, args.task, 'train_history.txt'), logger_name) # ================== Craete data loader ================================== data_transforms = { 'train': transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Resize(input_size), transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } if args.task == 'cub': from CUBDataset import CUBDataset image_datasets = {split: CUBDataset(dset_root['cub'], split, create_val=True, transform=data_transforms[split]) \ for split in ['train', 'val']} elif args.task == 'cars': from CarsDataset import CarsDataset image_datasets = {split: CarsDataset(dset_root['cars'], split, create_val=True, transform=data_transforms[split]) \ for split in ['train', 'val']} elif args.task == 'aircrafts': from AircraftsDataset import AircraftsDataset image_datasets = {split: AircraftsDataset(dset_root['aircrafts'], split, transform=data_transforms[split]) \ for split in ['train', 'val']} elif args.task[:len('inat_')] == 'inat_': from iNatDataset import iNatDataset task = args.task subtask = task[len('inat_'):] subtask = subtask[0].upper() + subtask[1:] image_datasets = {split: iNatDataset(dset_root['inat'], split, subtask, transform=data_transforms[split]) \ for split in ['train', 'val']} else: raise ValueError('Unknown dataset: %s' % task) num_classes = image_datasets['train'].get_num_classes() dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=args.batch_size, shuffle=True, num_workers=4) \ for x in ['train', 'val']} device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #======================= Initialize the model============================== model_ft, input_size = initialize_model(args.model, num_classes, feature_extract=False, use_pretrained=True) if args.stn: model_ft = STNet(model_ft) model_ft = model_ft.to(device) #====================== Initialize optimizer ============================== optim = initialize_optimizer(model_ft, feature_extract=False, stn=args.stn) # Setup the loss fxn criterion = nn.CrossEntropyLoss() start_epoch = 0 # load from checkpoint if exist if not args.train_from_beginning: checkpoint_filename = os.path.join(checkpoint_folder, 'checkpoint.pth.tar') if os.path.isfile(checkpoint_filename): print("=> loading checkpoint '{}'".format(checkpoint_filename)) checkpoint = torch.load(checkpoint_filename) start_epoch = checkpoint['epoch'] best_acc= checkpoint['best_acc'] model_ft.load_state_dict(checkpoint['state_dict']) optim.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(checkpoint_filename, checkpoint['epoch'])) # parallelize the model if using multiple gpus if torch.cuda.device_count() > 1: model_ft = torch.nn.DataParallel(model_ft) # Train the miodel model_ft = train_model(model_ft, dataloaders_dict, criterion, optim, num_epochs=args.num_epochs, is_inception=(args.model=="inception"), logger_name=logger_name, checkpoint_folder=checkpoint_folder, start_epoch=start_epoch, writer=writer)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-e', '--exp_name', default='resnet50_vggface') parser.add_argument('-c', '--config', type=int, default=1, choices=configurations.keys()) parser.add_argument('-d', '--dataset_path', default='/srv/data1/arunirc/datasets/vggface2') parser.add_argument('-m', '--model_path', default=None, help='Initialize from pre-trained model') parser.add_argument('--resume', help='Checkpoint path') parser.add_argument('--bottleneck', action='store_true', default=False, help='Add a 512-dim bottleneck layer with L2 normalization') args = parser.parse_args() # gpu = args.gpu cfg = configurations[args.config] out = get_log_dir(args.exp_name, args.config, cfg, verbose=False) resume = args.resume # os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) cuda = torch.cuda.is_available() torch.manual_seed(1337) if cuda: torch.cuda.manual_seed(1337) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # enable if all images are same size # ----------------------------------------------------------------------------- # 1. Dataset # ----------------------------------------------------------------------------- # Images should be arranged like this: # data_root/ # class_1/....jpg.. # class_2/....jpg.. # ......./....jpg.. data_root = args.dataset_path kwargs = {'num_workers': 4, 'pin_memory': True} if cuda else {} RGB_MEAN = [ 0.485, 0.456, 0.406 ] RGB_STD = [ 0.229, 0.224, 0.225 ] # Data transforms # http://pytorch.org/docs/master/torchvision/transforms.html train_transform = transforms.Compose([ transforms.Scale(256), # smaller side resized transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean = RGB_MEAN, std = RGB_STD), ]) val_transform = transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean = RGB_MEAN, std = RGB_STD), ]) # Data loaders - using PyTorch built-in objects # loader = DataLoaderClass(DatasetClass) # * `DataLoaderClass` is PyTorch provided torch.utils.data.DataLoader # * `DatasetClass` loads samples from a dataset; can be a standard class # provided by PyTorch (datasets.ImageFolder) or a custom-made class. # - More info: http://pytorch.org/docs/master/torchvision/datasets.html#imagefolder traindir = osp.join(data_root, 'train') dataset_train = datasets.ImageFolder(traindir, train_transform) # For unbalanced dataset we create a weighted sampler # * Balanced class sampling: https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3 weights = utils.make_weights_for_balanced_classes( dataset_train.imgs, len(dataset_train.classes)) weights = torch.DoubleTensor(weights) sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights)) train_loader = torch.utils.data.DataLoader( dataset_train, batch_size=cfg['batch_size'], sampler = sampler, **kwargs) valdir = osp.join(data_root, 'val-crop') val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, val_transform), batch_size=cfg['batch_size'], shuffle=False, **kwargs) # print 'dataset classes:' + str(train_loader.dataset.classes) num_class = len(train_loader.dataset.classes) print 'Number of classes: %d' % num_class # ----------------------------------------------------------------------------- # 2. Model # ----------------------------------------------------------------------------- model = torchvision.models.resnet50(pretrained=False) if type(model.fc) == torch.nn.modules.linear.Linear: # Check if final fc layer sizes match num_class if not model.fc.weight.size()[0] == num_class: # Replace last layer print model.fc model.fc = torch.nn.Linear(2048, num_class) print model.fc else: pass else: pass if args.model_path: # If existing model is to be loaded from a file checkpoint = torch.load(args.model_path) if checkpoint['arch'] == 'DataParallel': # if we trained and saved our model using DataParallel model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4, 5, 6, 7]) model.load_state_dict(checkpoint['model_state_dict']) model = model.module # get network module from inside its DataParallel wrapper else: model.load_state_dict(checkpoint['model_state_dict']) # Optionally add a "bottleneck + L2-norm" layer after GAP-layer # TODO -- loading a bottleneck model might be a problem .... do some unit-tests if args.bottleneck: layers = [] layers.append(torch.nn.Linear(2048, 512)) layers.append(nn.BatchNorm2d(512)) layers.append(torch.nn.ReLU(inplace=True)) layers.append(models.NormFeat()) # L2-normalization layer layers.append(torch.nn.Linear(512, num_class)) model.fc = torch.nn.Sequential(*layers) # TODO - config options for DataParallel and device_ids model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4, 5, 6, 7]) if cuda: model.cuda() start_epoch = 0 start_iteration = 0 # Loss - cross entropy between predicted scores (unnormalized) and class labels (integers) criterion = nn.CrossEntropyLoss() if cuda: criterion = criterion.cuda() if resume: # Resume training from last saved checkpoint checkpoint = torch.load(resume) model.load_state_dict(checkpoint['model_state_dict']) start_epoch = checkpoint['epoch'] start_iteration = checkpoint['iteration'] else: pass # ----------------------------------------------------------------------------- # 3. Optimizer # ----------------------------------------------------------------------------- params = filter(lambda p: p.requires_grad, model.parameters()) # Parameters with p.requires_grad=False are not updated during training. # This can be specified when defining the nn.Modules during model creation if 'optim' in cfg.keys(): if cfg['optim'].lower()=='sgd': optim = torch.optim.SGD(params, lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) elif cfg['optim'].lower()=='adam': optim = torch.optim.Adam(params, lr=cfg['lr'], weight_decay=cfg['weight_decay']) else: raise NotImplementedError('Optimizers: SGD or Adam') else: optim = torch.optim.SGD(params, lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) if resume: optim.load_state_dict(checkpoint['optim_state_dict']) # ----------------------------------------------------------------------------- # [optional] Sanity-check: forward pass with a single batch # ----------------------------------------------------------------------------- DEBUG = False if DEBUG: # model = model.cpu() dataiter = iter(val_loader) img, label = dataiter.next() print 'Labels: ' + str(label.size()) # batchSize x num_class print 'Input: ' + str(img.size()) # batchSize x 3 x 224 x 224 im = img.squeeze().numpy() im = im[0,:,:,:] # get first image in the batch im = im.transpose((1,2,0)) # permute to 224x224x3 im = im * [ 0.229, 0.224, 0.225 ] # unnormalize im = im + [ 0.485, 0.456, 0.406 ] im[im<0] = 0 f = plt.figure() plt.imshow(im) plt.savefig('sanity-check-im.jpg') # save transformed image in current folder inputs = Variable(img) if cuda: inputs = inputs.cuda() model.eval() outputs = model(inputs) print 'Network output: ' + str(outputs.size()) model.train() else: pass # ----------------------------------------------------------------------------- # 4. Training # ----------------------------------------------------------------------------- trainer = train.Trainer( cuda=cuda, model=model, criterion=criterion, optimizer=optim, init_lr=cfg['lr'], lr_decay_epoch = cfg['lr_decay_epoch'], train_loader=train_loader, val_loader=val_loader, out=out, max_iter=cfg['max_iteration'], interval_validate=cfg.get('interval_validate', len(train_loader)), ) trainer.epoch = start_epoch trainer.iteration = start_iteration trainer.train()
input_size = 50 lr = 1e-4 rnn = RNN(input_size, hidden_size) optim = optim.Adam(rnn.parameters(), lr=lr, weight_decay=1e-6) if use_cuda: rnn.cuda() rnn_epoch = 0 total_passes = 0 train_loss = [] test_loss = [] if args.rnn_model_loadpath is not None: if os.path.exists(args.rnn_model_loadpath): rnn_model_dict = torch.load(args.rnn_model_loadpath) rnn.load_state_dict(rnn_model_dict['state_dict']) optim.load_state_dict(rnn_model_dict['optimizer']) rnn_epoch = rnn_model_dict['epoch'] try: total_passes = rnn_model_dict['total_passes'] train_loss = rnn_model_dict['train_loss'] test_loss = rnn_model_dict['test_loss'] except: print("could not load total passes") print("loaded rnn from %s at epoch %s" % (args.rnn_model_loadpath, rnn_epoch)) else: print("could not find model at %s" % args.rnn_model_loadpath) sys.exit() else: print("creating new model")
def main(): parser = argparse.ArgumentParser() parser.add_argument('-e', '--exp_name', default='resnet50_vggface') parser.add_argument('-c', '--config', type=int, default=1, choices=configurations.keys()) parser.add_argument('-d', '--dataset_path', default='/srv/data1/arunirc/datasets/vggface2') parser.add_argument('-m', '--model_path', default=None, help='Initialize from pre-trained model') parser.add_argument('--resume', help='Checkpoint path') parser.add_argument( '--bottleneck', action='store_true', default=False, help='Add a 512-dim bottleneck layer with L2 normalization') args = parser.parse_args() # gpu = args.gpu cfg = configurations[args.config] out = get_log_dir(args.exp_name, args.config, cfg, verbose=False) resume = args.resume # os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) cuda = torch.cuda.is_available() torch.manual_seed(1337) if cuda: torch.cuda.manual_seed(1337) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # enable if all images are same size # ----------------------------------------------------------------------------- # 1. Dataset # ----------------------------------------------------------------------------- # Images should be arranged like this: # data_root/ # class_1/....jpg.. # class_2/....jpg.. # ......./....jpg.. data_root = args.dataset_path kwargs = {'num_workers': 4, 'pin_memory': True} if cuda else {} RGB_MEAN = [0.485, 0.456, 0.406] RGB_STD = [0.229, 0.224, 0.225] # Data transforms # http://pytorch.org/docs/master/torchvision/transforms.html train_transform = transforms.Compose([ transforms.Scale(256), # smaller side resized transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=RGB_MEAN, std=RGB_STD), ]) val_transform = transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=RGB_MEAN, std=RGB_STD), ]) # Data loaders - using PyTorch built-in objects # loader = DataLoaderClass(DatasetClass) # * `DataLoaderClass` is PyTorch provided torch.utils.data.DataLoader # * `DatasetClass` loads samples from a dataset; can be a standard class # provided by PyTorch (datasets.ImageFolder) or a custom-made class. # - More info: http://pytorch.org/docs/master/torchvision/datasets.html#imagefolder traindir = osp.join(data_root, 'train') dataset_train = datasets.ImageFolder(traindir, train_transform) # For unbalanced dataset we create a weighted sampler # * Balanced class sampling: https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3 weights = utils.make_weights_for_balanced_classes( dataset_train.imgs, len(dataset_train.classes)) weights = torch.DoubleTensor(weights) sampler = torch.utils.data.sampler.WeightedRandomSampler( weights, len(weights)) train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=cfg['batch_size'], sampler=sampler, **kwargs) valdir = osp.join(data_root, 'val-crop') val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, val_transform), batch_size=cfg['batch_size'], shuffle=False, **kwargs) # print 'dataset classes:' + str(train_loader.dataset.classes) num_class = len(train_loader.dataset.classes) print 'Number of classes: %d' % num_class # ----------------------------------------------------------------------------- # 2. Model # ----------------------------------------------------------------------------- model = torchvision.models.resnet50(pretrained=False) if type(model.fc) == torch.nn.modules.linear.Linear: # Check if final fc layer sizes match num_class if not model.fc.weight.size()[0] == num_class: # Replace last layer print model.fc model.fc = torch.nn.Linear(2048, num_class) print model.fc else: pass else: pass if args.model_path: # If existing model is to be loaded from a file checkpoint = torch.load(args.model_path) if checkpoint['arch'] == 'DataParallel': # if we trained and saved our model using DataParallel model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4, 5, 6, 7]) model.load_state_dict(checkpoint['model_state_dict']) model = model.module # get network module from inside its DataParallel wrapper else: model.load_state_dict(checkpoint['model_state_dict']) # Optionally add a "bottleneck + L2-norm" layer after GAP-layer # TODO -- loading a bottleneck model might be a problem .... do some unit-tests if args.bottleneck: layers = [] layers.append(torch.nn.Linear(2048, 512)) layers.append(nn.BatchNorm2d(512)) layers.append(torch.nn.ReLU(inplace=True)) layers.append(models.NormFeat()) # L2-normalization layer layers.append(torch.nn.Linear(512, num_class)) model.fc = torch.nn.Sequential(*layers) # TODO - config options for DataParallel and device_ids model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4, 5, 6, 7]) if cuda: model.cuda() start_epoch = 0 start_iteration = 0 # Loss - cross entropy between predicted scores (unnormalized) and class labels (integers) criterion = nn.CrossEntropyLoss() if cuda: criterion = criterion.cuda() if resume: # Resume training from last saved checkpoint checkpoint = torch.load(resume) model.load_state_dict(checkpoint['model_state_dict']) start_epoch = checkpoint['epoch'] start_iteration = checkpoint['iteration'] else: pass # ----------------------------------------------------------------------------- # 3. Optimizer # ----------------------------------------------------------------------------- params = filter(lambda p: p.requires_grad, model.parameters()) # Parameters with p.requires_grad=False are not updated during training. # This can be specified when defining the nn.Modules during model creation if 'optim' in cfg.keys(): if cfg['optim'].lower() == 'sgd': optim = torch.optim.SGD(params, lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) elif cfg['optim'].lower() == 'adam': optim = torch.optim.Adam(params, lr=cfg['lr'], weight_decay=cfg['weight_decay']) else: raise NotImplementedError('Optimizers: SGD or Adam') else: optim = torch.optim.SGD(params, lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) if resume: optim.load_state_dict(checkpoint['optim_state_dict']) # ----------------------------------------------------------------------------- # [optional] Sanity-check: forward pass with a single batch # ----------------------------------------------------------------------------- DEBUG = False if DEBUG: # model = model.cpu() dataiter = iter(val_loader) img, label = dataiter.next() print 'Labels: ' + str(label.size()) # batchSize x num_class print 'Input: ' + str(img.size()) # batchSize x 3 x 224 x 224 im = img.squeeze().numpy() im = im[0, :, :, :] # get first image in the batch im = im.transpose((1, 2, 0)) # permute to 224x224x3 im = im * [0.229, 0.224, 0.225] # unnormalize im = im + [0.485, 0.456, 0.406] im[im < 0] = 0 f = plt.figure() plt.imshow(im) plt.savefig( 'sanity-check-im.jpg') # save transformed image in current folder inputs = Variable(img) if cuda: inputs = inputs.cuda() model.eval() outputs = model(inputs) print 'Network output: ' + str(outputs.size()) model.train() else: pass # ----------------------------------------------------------------------------- # 4. Training # ----------------------------------------------------------------------------- trainer = train.Trainer( cuda=cuda, model=model, criterion=criterion, optimizer=optim, init_lr=cfg['lr'], lr_decay_epoch=cfg['lr_decay_epoch'], train_loader=train_loader, val_loader=val_loader, out=out, max_iter=cfg['max_iteration'], interval_validate=cfg.get('interval_validate', len(train_loader)), ) trainer.epoch = start_epoch trainer.iteration = start_iteration trainer.train()