def commit_insert(self): if self.keys is None: self.keys = Parameter(self.keys_to_be_inserted) self.values = Parameter(self.values_to_be_inserted) elif self.keys_to_be_inserted is not None: keys = torch.cat([self.keys.detach(),self.keys_to_be_inserted],0) self.keys = Parameter(keys) values = [self.values.detach(),self.values_to_be_inserted] values = torch.cat(values,0) self.values = Parameter(values) # Move most recently used key-value pairs to the back if len(self.move_to_back) != 0: unmoved_ids = list(set(range(len(self.keys))) - self.move_to_back) moved_ids = list(self.move_to_back) unmoved_keys = self.keys.detach()[unmoved_ids] moved_keys = self.keys.detach()[moved_ids] self.keys = Parameter(torch.cat([unmoved_keys, moved_keys], 0)) unmoved_values = self.values.detach()[unmoved_ids] moved_values = self.values.detach()[moved_ids] self.values = Parameter(torch.cat([unmoved_values,moved_values], 0)) self.move_to_back = set() if len(self.keys) > self.max_memory: # Expel oldest key to maintain total memory for key in self.keys[:-self.max_memory]: del self.key_cache[tuple(key.detach().cpu().numpy())] self.keys = Parameter(self.keys[-self.max_memory:].detach()) self.values = Parameter(self.values[-self.max_memory:].detach()) self.keys_to_be_inserted = None self.values_to_be_inserted = None params = [self.keys, self.values] self.optimizer = get_optimizer(self.opt_name,params,self.lr) self.kdtree.build_index(self.keys.detach().cpu().numpy()) self.stale_index = False
def update(self, value, index): """ Set self.values[index] = value """ values = self.values.detach() values[index] = value[0].detach() self.values = Parameter(values) params = [self.keys, self.values] self.optimizer = get_optimizer(self.opt_name,params,self.lr)
def run(cfg): '''Load save path''' cfg.log_string('Data save path: %s' % (cfg.save_path)) checkpoint = CheckpointIO(cfg) '''Load device''' cfg.log_string('Loading device settings.') device = load_device(cfg) '''Load data''' cfg.log_string('Loading dataset.') train_loader = get_dataloader(cfg.config, mode='train') test_loader = get_dataloader(cfg.config, mode='test') '''Load net''' cfg.log_string('Loading model.') net = get_model(cfg.config, device=device) if isinstance(net, list): checkpoint.register_modules(voxnet=net[0]) checkpoint.register_modules(refnet=net[1]) else: checkpoint.register_modules(voxnet=net) cfg.log_string('loading loss function') loss_func = get_loss(cfg.config, device) '''Load optimizer''' cfg.log_string('Loading optimizer.') optimizer = get_optimizer(config=cfg.config, net=net) if isinstance(net, list): checkpoint.register_modules(voxopt=optimizer[0]) checkpoint.register_modules(refopt=optimizer[1]) else: checkpoint.register_modules(voxopt=optimizer) '''Load scheduler''' cfg.log_string('Loading optimizer scheduler.') scheduler = load_scheduler(config=cfg.config, optimizer=optimizer) if isinstance(net, list): checkpoint.register_modules(voxsch=scheduler[0]) checkpoint.register_modules(refsch=scheduler[1]) else: checkpoint.register_modules(voxsch=scheduler) '''Load trainer''' cfg.log_string('Loading trainer.') trainer = get_trainer(cfg.config) '''Start to train''' cfg.log_string('Start to train.') #cfg.log_string('Total number of parameters in {0:s}: {1:d}.'.format(cfg.config['method'], sum(p.numel() for p in net.parameters()))) trainer(cfg, net, loss_func, optimizer, scheduler, train_loader=train_loader, test_loader=test_loader, device=device, checkpoint=checkpoint) cfg.log_string('Training finished.')
def __init__(self, env, args, device='cpu'): """ Instantiate an NEC Agent ---------- env: gym.Env gym environment to train on args: args class from argparser args are from from train.py: see train.py for help with each arg device: string 'cpu' or 'cuda:0' depending on use_cuda flag from train.py """ self.environment_type = args.environment_type self.env = env self.device = device # Hyperparameters self.epsilon = args.initial_epsilon self.final_epsilon = args.final_epsilon self.epsilon_decay = args.epsilon_decay self.gamma = args.gamma self.N = args.N # Transition queue and replay memory self.transition_queue = [] self.replay_every = args.replay_every self.replay_buffer_size = args.replay_buffer_size self.replay_memory = ReplayMemory(self.replay_buffer_size) # CNN for state embedding network self.frames_to_stack = args.frames_to_stack self.embedding_size = args.embedding_size self.in_height = args.in_height self.in_width = args.in_width self.cnn = CNN(self.frames_to_stack, self.embedding_size, self.in_height, self.in_width).to(self.device) # Differentiable Neural Dictionary (DND): one for each action self.kernel = inverse_distance self.num_neighbors = args.num_neighbors self.max_memory = args.max_memory self.lr = args.lr self.dnd_list = [] for i in range(env.action_space.n): self.dnd_list.append( DND(self.kernel, self.num_neighbors, self.max_memory, args.optimizer, self.lr)) # Optimizer for state embedding CNN self.q_lr = args.q_lr self.batch_size = args.batch_size self.optimizer = get_optimizer(args.optimizer, self.cnn.parameters(), self.lr)
def create_experiment(config): """Creates an experiment based on config.""" device = torch.device(config.device) logging.info("using {}".format(config.device)) experiment = Experiment(config.name, config.save_dir) experiment.register_config(config) logger = None if config.use_tflogger: logger = Logger(config.tflog_dir) experiment.register_logger(logger) torch.manual_seed(config.rseed) model = NRU(device, config.input_size, config.output_size, num_layers=config.num_layers, layer_size=config.layer_size, output_activation="linear", layer_norm=config.layer_norm, use_relu=config.use_relu, memory_size=config.memory_size, k=config.k).to(device) experiment.register_model(model) data_iterator = get_data_iterator(config) experiment.register_data_iterator(data_iterator) optimizer = get_optimizer(model.parameters(), config) model.register_optimizer(optimizer) tr = MyContainer() tr.updates_done = 0 tr.epochs_done = 0 tr.ce = {} tr.ce["train"] = [] tr.accuracy = {} tr.accuracy["valid"] = [] tr.accuracy["test"] = [] tr.grad_norm = [] experiment.register_train_statistics(tr) return experiment, model, data_iterator, tr, logger, device
def _train(self): print(f"\n({self.experim_name}) training...\n") model = get_model(self.args).to(self.device) optimizer = get_optimizer(self.args, model) lr_scheduler = get_lr_scheduler(self.args, optimizer=optimizer, iters_per_epoch=len(self.dataloader)) for e in range(1, 1 + self.n_epochs): model, optimizer, lr_scheduler = self._train_epoch( e, model, optimizer, lr_scheduler) self._val(e, model) if self.debug: break self.best_miou = -1.0 return model
def train(model, dataloader, device, optimizer_name, loss_name, lr, verbose): optimizer_object = get_optimizer(optimizer_name) optimizer = optimizer_object(model.parameters(), lr=lr) loss_fn = get_loss(loss_name) model.train() running_loss = 0.0 running_corrects = 0 for inputs, targets in dataloader: inputs = inputs.to(device) targets = targets.to(device) bs = len(targets) classes = torch.zeros((bs, 10)) for i in range(bs): classes[i][targets[i]] = 1 classes = classes.to(device) outputs = model(inputs) loss = loss_fn()(outputs, classes) optimizer.zero_grad() loss.backward() optimizer.step() _, preds = torch.max(outputs.data, 1) running_loss += loss.item() running_corrects += torch.sum(preds == targets.data) loss = running_loss / 60000 acc = running_corrects.data.item() / 60000 if verbose: print(f'Training results: Loss: {loss:.4f} Acc: {acc:.4f}') return acc
def train(model, dataloader, device, optimizer_name, loss_name, lr): optimizer_object = get_optimizer(optimizer_name) optimizer = optimizer_object(model.parameters(), lr=lr) loss_fn = get_loss(loss_name) model.train() running_loss = 0.0 running_corrects = 0 for inputs, targets in dataloader: inputs = inputs.to(device) targets = targets.to(device) bs = len(targets) classes = torch.zeros((bs, 10)) for i in range(bs): classes[i][targets[i]] = 1 classes = classes.to(device) outputs = model(inputs) loss = loss_fn()(outputs, classes) # LeCun & al. used Maximum Log Likehood optimizer.zero_grad() loss.backward() optimizer.step() _, preds = torch.max(outputs.data, 1) # statistics running_loss += loss.item() running_corrects += torch.sum(preds == targets.data) loss = running_loss / 60000 acc = running_corrects.data.item() / 60000 print('Training results: Loss: {:.4f} Acc: {:.4f}'.format(loss, acc)) return acc
def main(): args = parse_args() reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED backbone_model = eval('models.' + config.BACKBONE_MODEL + '.get_pose_net')( config, is_train=True) model = eval('models.' + config.MODEL + '.get_multiview_pose_net')( backbone_model, config) print(model) this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../../lib/models', config.MODEL + '.py'), final_output_dir) shutil.copy2(args.cfg, final_output_dir) logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } gpus = [int(i) for i in config.GPUS.split(',')] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() optimizer = get_optimizer(config, model) start_epoch = config.TRAIN.BEGIN_EPOCH if config.TRAIN.RESUME: start_epoch, model, optimizer = load_checkpoint(model, optimizer, final_output_dir) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR) # Data loading code normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + config.DATASET.TRAIN_DATASET)( config, config.DATASET.TRAIN_SUBSET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + config.DATASET.TEST_DATASET)( config, config.DATASET.TEST_SUBSET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=True) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=True) best_perf = 0.0 best_model = False for epoch in range(start_epoch, config.TRAIN.END_EPOCH): lr_scheduler.step() train(config, train_loader, model, criterion, optimizer, epoch, final_output_dir, writer_dict) perf_indicator = validate(config, valid_loader, valid_dataset, model, criterion, final_output_dir, writer_dict) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint({ 'epoch': epoch + 1, 'model': get_model_name(config), 'state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar') logger.info('saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model_p, model_d = eval('models.' + cfg.MODEL.NAME + '.get_adaptive_pose_net')(cfg, is_train=True) if cfg.TRAIN.CHECKPOINT: logger.info('=> loading model from {}'.format(cfg.TRAIN.CHECKPOINT)) model_p.load_state_dict(torch.load(cfg.TRAIN.CHECKPOINT)) else: model_state_file = os.path.join(final_output_dir, 'checkpoint.pth') logger.info('=> loading model from {}'.format(model_state_file)) model_p.load_state_dict(torch.load(model_state_file)) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'pre_train_global_steps': 0, 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model_p, (dump_input, ), verbose=False) logger.info(get_model_summary(model_p, dump_input)) model_p = torch.nn.DataParallel(model_p, device_ids=cfg.GPUS).cuda() model_d = torch.nn.DataParallel(model_d, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer for pose_net criterion_p = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() optimizer_p = get_optimizer(cfg, model_p) # define loss function (criterion) and optimizer for domain criterion_d = torch.nn.BCEWithLogitsLoss().cuda() optimizer_d = get_optimizer(cfg, model_d) # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_pre_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_PRE_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_pre_loader = torch.utils.data.DataLoader( train_pre_dataset, batch_size=cfg.TRAIN.PRE_BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) syn_labels = train_dataset._load_syrip_syn_annotations() train_loader = torch.utils.data.DataLoader( train_dataset, sampler=BalancedBatchSampler(train_dataset, syn_labels), batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) ''' train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY ) ''' valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) best_perf = 0.0 best_model = False last_epoch = -1 begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model_p.load_state_dict(checkpoint['state_dict']) optimizer_p.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) # freeze some layers idx = 0 print('Parametersssssssssssssss') for param in model_p.parameters(): if idx <= 108: #fix 108 for stage 2 + bottleneck or fix 483 for stage 3 + stage 2+ bottleneck param.requires_grad = False #print(param.data.shape) idx = idx + 1 lr_scheduler_p = torch.optim.lr_scheduler.MultiStepLR( optimizer_p, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) lr_scheduler_d = torch.optim.lr_scheduler.MultiStepLR( optimizer_d, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR) epoch_D = cfg.TRAIN.PRE_EPOCH losses_D_list = [] acces_D_list = [] acc_num_total = 0 num = 0 losses_d = AverageMeter() # Pretrained Stage print('Pretrained Stage:') print('Start to train Domain Classifier-------') for epoch_d in range(epoch_D): # epoch model_d.train() model_p.train() for i, (input, target, target_weight, meta) in enumerate(train_pre_loader): # iteration # compute output for pose_net feature_outputs, outputs = model_p(input) #print(feature_outputs.size()) # compute for domain classifier domain_logits = model_d(feature_outputs.detach()) domain_label = (meta['synthetic'].unsqueeze(-1) * 1.0).cuda(non_blocking=True) # print(domain_label) loss_d = criterion_d(domain_logits, domain_label) loss_d.backward(retain_graph=True) optimizer_d.step() # compute accuracy of classifier acc_num = 0 for j in range(len(domain_label)): if (domain_logits[j] > 0 and domain_label[j] == 1.0) or ( domain_logits[j] < 0 and domain_label[j] == 0.0): acc_num += 1 acc_num_total += 1 num += 1 acc_d = acc_num * 1.0 / input.size(0) acces_D_list.append(acc_d) optimizer_d.zero_grad() losses_d.update(loss_d.item(), input.size(0)) if i % cfg.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Accuracy_d: {3} ({4})\t' \ 'Loss_d: {loss_d.val:.5f} ({loss_d.avg:.5f})'.format( epoch_d, i, len(train_pre_loader), acc_d, acc_num_total * 1.0 / num, loss_d = losses_d) logger.info(msg) writer = writer_dict['writer'] pre_global_steps = writer_dict['pre_train_global_steps'] writer.add_scalar('pre_train_loss_D', losses_d.val, pre_global_steps) writer.add_scalar('pre_train_acc_D', acc_d, pre_global_steps) writer_dict['pre_train_global_steps'] = pre_global_steps + 1 losses_D_list.append(losses_d.val) print('Training Stage (Step I and II):') losses_P_list = [] acces_P_list = [] losses_p = AverageMeter() acces_p = AverageMeter() for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler_p.step() # train for one epoch losses_P_list, losses_D_list, acces_P_list, acces_D_list = train_adaptive( cfg, train_loader, model_p, model_d, criterion_p, criterion_d, optimizer_p, optimizer_d, epoch, final_output_dir, tb_log_dir, writer_dict, losses_P_list, losses_D_list, acces_P_list, acces_D_list, acc_num_total, num, losses_p, acces_p, losses_d) # evaluate on validation set perf_indicator = validate_adaptive(cfg, valid_loader, valid_dataset, model_p, criterion_p, final_output_dir, tb_log_dir, writer_dict) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model_p.state_dict(), 'best_state_dict': model_p.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer_p.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( 'saving final model state to {}'.format(final_model_state_file)) torch.save(model_p.module.state_dict(), final_model_state_file) writer_dict['writer'].close() np.save('./losses_D.npy', np.array(losses_D_list)) # Adversarial-D np.save('./losses_P.npy', np.array(losses_P_list)) # P np.save('./acces_P.npy', np.array(acces_P_list)) # P np.save('./acces_D.npy', np.array(acces_D_list)) # D
def train(use_cuda: bool, n_epochs: int, validate_every: int, use_dropout: bool, partitions: Partitions, optimizer_name: str, lr: float, wd: float, momentum: bool): logger = logging.getLogger('logger') no_test = True model_path = "./model_output/pairwise/model_{0}" partitions.generate_partitions(PairPartition, no_test=no_test) training_data = Balanced(partitions.train) if validate_every > 0: balanced_validation = Balanced(partitions.val) training_pairs = AllPairs(partitions.train) search_length = training_pairs.n_references validation_pairs = AllPairs(partitions.val) testing_pairs = AllPairs(partitions.test) if not no_test else None else: balanced_validation = None training_pairs = None validation_pairs = None testing_pairs = None search_length = None # get a siamese network, see Siamese class for architecture siamese = Siamese(dropout=use_dropout) siamese = initialize_weights(siamese, use_cuda) if use_cuda: siamese = siamese.cuda() criterion = BCELoss() optimizer = get_optimizer(siamese, optimizer_name, lr, wd, momentum) try: logger.info("Training network with pairwise loss...") progress = TrainingProgress() models = training.train_siamese_network(siamese, training_data, criterion, optimizer, n_epochs, use_cuda) for epoch, (model, training_batch_losses) in enumerate(models): utils.network.save_model(model, model_path.format(epoch)) training_loss = training_batch_losses.mean() if validate_every != 0 and epoch % validate_every == 0: validation_batch_losses = inference.siamese_loss( model, balanced_validation, criterion, use_cuda) validation_loss = validation_batch_losses.mean() training_mrr, training_rank = inference.mean_reciprocal_ranks( model, training_pairs, use_cuda) val_mrr, val_rank = inference.mean_reciprocal_ranks( model, validation_pairs, use_cuda) progress.add_mrr(train=training_mrr, val=val_mrr) progress.add_rank(train=training_rank, val=val_rank) progress.add_loss(train=training_loss, val=validation_loss) else: progress.add_mrr(train=np.nan, val=np.nan) progress.add_rank(train=np.nan, val=np.nan) progress.add_loss(train=training_loss, val=np.nan) progress.graph("Siamese", search_length) # load weights from best model if we validated throughout if validate_every > 0: siamese = siamese.train() utils.network.load_model( siamese, model_path.format(np.argmax(progress.val_mrr))) # otherwise just save most recent model utils.network.save_model(siamese, model_path.format('best')) utils.network.save_model( siamese, './output/{0}/pairwise'.format(utilities.get_trial_number())) if not no_test: logger.info( "Results from best model generated during training, evaluated on test data:" ) rrs = inference.reciprocal_ranks(siamese, testing_pairs, use_cuda) utilities.log_final_stats(rrs) progress.pearson(log=True) progress.save("./output/{0}/pairwise.pickle".format( utilities.get_trial_number())) return siamese except Exception as e: utils.network.save_model(siamese, model_path.format('crash_backup')) logger.critical("Exception occurred while training: {0}".format( str(e))) logger.critical(traceback.print_exc()) sys.exit()
# Load Weights into the decoder = copy_weights(hfvae, decoder) SECOND_STAGE = False if SECOND_STAGE: z_train = encoder.predict(x_train)[0][0] latent_dim = np.prod(z_train.shape[1:]) z_train = np.reshape(z_train, (-1, latent_dim)) second_vae, second_encoder, second_decoder = two_stage.get_second_stage( latent_dim) # Compile model optimizer = utils.get_optimizer(z_train.shape[0] // batch_size, initial_lr=1e-3) second_vae.compile(optimizer=optimizer, loss=None, metrics=[utils.cos_sim]) second_vae.fit(z_train, None, batch_size=batch_size, epochs=epochs) second_vae.save_weights('saved_weights/secondstage_NVAE_' + data + '.h5') GMM = True if GMM: from sklearn.mixture import GaussianMixture #we may only work on z_mean of the innermost layer z_train = encoder.predict(x_train)[0][0] #print("ltatent dim = ",z_train.shape[1]) z_density = GaussianMixture(n_components=10, max_iter=100) z_density.fit(z_train)
def main_per_worker(process_index, ngpus_per_node, args): update_config(cfg, args) # torch seed torch.cuda.manual_seed(random.random()) # cudnn cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED #proc_rank proc_rank = args.rank * ngpus_per_node + process_index #create logger logger, output_dir = create_logger(cfg, proc_rank) # logger.info(pprint.pformat(args)) # logger.info(cfg) model = get_model(cfg, cfg.MODEL.FILE, cfg.MODEL.NAME) emb = InceptionResnetV1(pretrained='vggface2', classify=False) assert cfg.MODEL.APPEARANCE.WEIGHTS != '' load_eval_model(cfg.MODEL.APPEARANCE.WEIGHTS, emb) # TODO change based on the paper optimizer = get_optimizer(cfg, model) model, optimizer, last_iter = load_checkpoint(cfg, model, optimizer) lr_scheduler = get_lr_scheduler(cfg, optimizer, last_iter) transform = FacenetInferenceTransform(size=(cfg.TRAIN.INPUT_MIN, cfg.TRAIN.INPUT_MAX)) train_dataset = TrackletpairDataset(cfg.DATASET.ROOT, transform=transform, is_train=True) eval_dataset = TrackletpairDataset(cfg.DATASET.ROOT, transform=transform, is_train=False) # distribution if args.distributed: logger.info( f'Init process group: dist_url: {args.dist_url}, ' f'world_size: {args.world_size}, ' f'machine: {args.rank}, ' f'rank:{proc_rank}' ) dist.init_process_group( backend=cfg.DIST_BACKEND, init_method=args.dist_url, world_size=args.world_size, rank=proc_rank ) torch.cuda.set_device(process_index) model.cuda() emb.cuda() model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[process_index] ) emb = torch.nn.parallel.DistributedDataParallel( emb, device_ids=[process_index] ) train_sampler = BalancedBatchSampler( train_dataset ) batch_size = cfg.DATASET.IMG_NUM_PER_GPU else: assert proc_rank == 0, ('proc_rank != 0, it will influence ' 'the evaluation procedure') model = torch.nn.DataParallel(model).cuda() emb = torch.nn.DataParallel(emb).cuda() train_sampler = BalancedBatchSampler( train_dataset ) batch_size = cfg.DATASET.IMG_NUM_PER_GPU * ngpus_per_node train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), drop_last=False, collate_fn=tracklet_pair_collect, num_workers=cfg.WORKERS, pin_memory=True, sampler=train_sampler ) eval_loader = torch.utils.data.DataLoader( eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False, collate_fn=tracklet_pair_collect, num_workers=cfg.WORKERS ) criterion = nn.CrossEntropyLoss() Trainer = trackletpairConnectTrainer( cfg, model, optimizer, lr_scheduler, criterion, output_dir, 'acc', last_iter, proc_rank, pre_ap_model=emb, ) while True: Trainer.train(train_loader, eval_loader) # eval Trainer.evaluate(eval_loader)
args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) config['config_file'] = args.config.replace('/','.').split('.')[-2] seed = config['seed'] np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) n_epochs = config['optimization']['n_epochs'] if not args.disable_cuda and torch.cuda.is_available(): device = torch.device('cuda:{}'.format(args.gpu)) else: device = torch.device('cpu') logger = Logger(config) model = get_model(config['model']) optim = get_optimizer(model.parameters(),config['optimization']) train_loader, valid_loader, test_loader = get_data(config['data']) ## Train for i in range(n_epochs): for data, label in train_loader: break
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model_builder = importlib.import_module("models." + cfg.MODEL.NAME).get_fovea_net model = model_builder(cfg, is_train=True) # xiaofeng add for load parameter if cfg.TEST.MODEL_FILE: logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) # copy model file -- xiaofeng comment it # this_dir = os.path.dirname(__file__) # shutil.copy2(os.path.join(this_dir, '../models', cfg.MODEL.NAME + '.py'), final_output_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # define loss function (criterion) and optimizer criterion = HybridLoss(roi_weight=cfg.LOSS.ROI_WEIGHT, regress_weight=cfg.LOSS.REGRESS_WEIGHT, use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT, hrnet_only=cfg.TRAIN.HRNET_ONLY).cuda() # Data loading code # normalize = transforms.Normalize( # mean=[0.134, 0.207, 0.330], std=[0.127, 0.160, 0.239] # ) # train_dataset = importlib.import_module('dataset.'+cfg.DATASET.DATASET).Dataset( # cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, # transforms.Compose([ # transforms.ToTensor(), # normalize, # ]) # ) # valid_dataset = importlib.import_module('dataset.'+cfg.DATASET.DATASET).Dataset( # cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, # transforms.Compose([ # transforms.ToTensor(), # normalize, # ]) # ) # # train_loader = torch.utils.data.DataLoader( # train_dataset, # batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), # shuffle=cfg.TRAIN.SHUFFLE, # num_workers=cfg.WORKERS, # pin_memory=cfg.PIN_MEMORY # ) # valid_loader = torch.utils.data.DataLoader( # valid_dataset, # batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), # shuffle=False, # num_workers=cfg.WORKERS, # pin_memory=cfg.PIN_MEMORY # ) db_trains = [] db_vals = [] final_full_test = cfg.TRAIN.FULL_DATA normalize_1 = transforms.Normalize(mean=[0.282, 0.168, 0.084], std=[0.189, 0.110, 0.062]) train_dataset_1 = importlib.import_module('dataset.' + cfg.DATASET.DATASET).Dataset( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET_1, True, transforms.Compose([ transforms.ToTensor(), normalize_1, ])) db_trains.append(train_dataset_1) normalize_2 = transforms.Normalize(mean=[0.409, 0.270, 0.215], std=[0.288, 0.203, 0.160]) train_dataset_2 = importlib.import_module('dataset.' + cfg.DATASET.DATASET).Dataset( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET_2, True, transforms.Compose([ transforms.ToTensor(), normalize_2, ])) db_trains.append(train_dataset_2) if final_full_test is True: normalize_3 = transforms.Normalize(mean=[0.404, 0.271, 0.222], std=[0.284, 0.202, 0.163]) train_dataset_3 = importlib.import_module( 'dataset.' + cfg.DATASET.DATASET).Dataset( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, True, transforms.Compose([ transforms.ToTensor(), normalize_3, ])) db_trains.append(train_dataset_3) train_dataset = ConcatDataset(db_trains) logger.info("Combined Dataset: Total {} images".format(len(train_dataset))) train_batch_size = cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) normalize = transforms.Normalize(mean=[0.404, 0.271, 0.222], std=[0.284, 0.202, 0.163]) val_dataset_1 = importlib.import_module('dataset.' + cfg.DATASET.DATASET).Dataset( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) db_vals.append(val_dataset_1) if final_full_test is True: normalize_1 = transforms.Normalize(mean=[0.282, 0.168, 0.084], std=[0.189, 0.110, 0.062]) val_dataset_2 = importlib.import_module('dataset.' + cfg.DATASET.DATASET).Dataset( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET_1, False, transforms.Compose([ transforms.ToTensor(), normalize_1, ])) db_vals.append(val_dataset_2) normalize_2 = transforms.Normalize(mean=[0.409, 0.270, 0.215], std=[0.288, 0.203, 0.160]) val_dataset_3 = importlib.import_module('dataset.' + cfg.DATASET.DATASET).Dataset( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET_2, False, transforms.Compose([ transforms.ToTensor(), normalize_2, ])) db_vals.append(val_dataset_3) valid_dataset = ConcatDataset(db_vals) logger.info("Val Dataset: Total {} images".format(len(valid_dataset))) test_batch_size = cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) logger.info("Train len: {}, batch_size: {}; Test len: {}, batch_size: {}" \ .format(len(train_loader), train_batch_size, len(valid_loader), test_batch_size)) best_metric = 1e6 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH if cfg.TEST.MODEL_FILE: checkpoint_file = cfg.TEST.MODEL_FILE else: checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) # begin_epoch = checkpoint['epoch'] begin_epoch = 0 # xiaofeng change it best_metric = checkpoint['metric'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) if cfg.TRAIN.LR_EXP: # llr=lr∗gamma∗∗epoch lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, cfg.TRAIN.GAMMA1, last_epoch=-1) else: lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): start_time = timer() lr_scheduler.step() # evaluate on validation set # lr_metric, hr_metric, final_metric = validate( # cfg, valid_loader, valid_dataset, model, criterion, # final_output_dir, tb_log_dir, writer_dict, db_vals # ) # print("validation before training spent time:") # timer(start_time) # timing ends here for "start_time" variable # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) print("epoch %d train spent time:" % (epoch)) train_time = timer( start_time) # timing ends here for "start_time" variable # if epoch >= int(cfg.TRAIN.END_EPOCH/10): # evaluate on validation set lr_metric, hr_metric, final_metric = validate( cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict, db_vals) print("validation spent time:") val_time = timer( train_time) # timing ends here for "start_time" variable min_metric = min(lr_metric, hr_metric, final_metric) if min_metric <= best_metric: best_metric = min_metric best_model = True logger.info('=> epoch [{}] best model result: {}'.format( epoch, best_metric)) else: best_model = False # xiaofeng changed it if best_model is True: logger.info('=> saving checkpoint to {}'.format(final_output_dir)) # transfer the model to CPU before saving to fix unstable bug: # github.com/pytorch/pytorch/issues/10577 model = model.cpu() save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'metric': final_metric, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) model = model.cuda() print("saving spent time:") end_time = timer( val_time) # timing ends here for "start_time" variable elif (epoch % 60 == 0) and (epoch != 0): logger.info('=> saving epoch {} checkpoint to {}'.format( epoch, final_output_dir)) # transfer the model to CPU before saving to fix unstable bug: # github.com/pytorch/pytorch/issues/10577 time_str = time.strftime('%Y-%m-%d-%H-%M') if cfg.TRAIN.HRNET_ONLY: checkpoint_filename = 'checkpoint_HRNET_epoch%d_%s.pth' % ( epoch, time_str) else: checkpoint_filename = 'checkpoint_Hybrid_epoch%d_%s.pth' % ( epoch, time_str) model = model.cpu() save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'metric': final_metric, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir, checkpoint_filename) model = model.cuda() # xiaofeng change time_str = time.strftime('%Y-%m-%d-%H-%M') if cfg.TRAIN.HRNET_ONLY: model_name = 'final_state_HRNET_%s.pth' % (time_str) else: model_name = 'final_state_Hybrid_%s.pth' % (time_str) final_model_state_file = os.path.join(final_output_dir, model_name) logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close() # save a final checkpoint model = model.cpu() save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'metric': final_metric, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir, "checkpoint_final_state.pth")
def build_model(self, eval=False): if self.train_mode: with tf.variable_scope(tf.get_variable_scope()) as outer_scope: self.learning_rate = tf.constant(self.hparams.learning_rate) #self.learning_rate = self._get_learning_rate_warmup(self.hparams) #self.learning_rate = self._get_learning_rate_decay() opt = utils.get_optimizer(self.hparams, self.learning_rate) tower_grads = [] losses = [] controller = "/cpu:0" self._train_models = [] for i, id in enumerate(gpu_utils.get_available_gpus()): name = 'tower_%d' % i with tf.device(gpu_utils.assign_to_device(id, controller)), tf.name_scope(name): model = self.Model() model( self.hparams, tf.estimator.ModeKeys.TRAIN, self._batched_input_train) loss = model.loss with tf.name_scope("compute_gradients"): grad_and_vars = opt.compute_gradients( loss, var_list=model.trainable_variables(), colocate_gradients_with_ops=self.hparams.colocate_gradients_with_ops) vars = [var for _, var in grad_and_vars] grads, _, _ = model_utils.gradient_clip([grad for grad, var in grad_and_vars], max_gradient_norm=MAX_GRADIENT_NORM) tower_grads.append(zip(grads, vars)) losses.append(loss) outer_scope.reuse_variables() self._train_models.append(model) self._train_model = model self.params = model.trainable_variables() with tf.name_scope("apply_gradients"), tf.device(controller): average_grads = [] for grad_and_vars in zip(*tower_grads): grads = [g for g, _ in grad_and_vars] for g, v in grad_and_vars: print(g, v) grad = tf.reduce_mean(grads, 0) v = grad_and_vars[0][1] grad_and_var = (grad, v) average_grads.append(grad_and_var) self.update = opt.apply_gradients(average_grads, self._global_step) self.loss = tf.reduce_mean(losses) self._summary = tf.summary.merge([ tf.summary.scalar('train_loss', self.loss), tf.summary.scalar("learning_rate", self.learning_rate), ]) # init dev model if self.hparams.dev_data is not None: with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): self.dev_model = self.Model() self.hparams.batch_size = self.hparams.eval_batch_size self.dev_model( self.hparams, tf.estimator.ModeKeys.EVAL, self._batched_input_dev) if eval or self.eval_mode: with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): self.test_model = self.Model() self.hparams.batch_size = self.hparams.eval_batch_size self.test_model( self.hparams, tf.estimator.ModeKeys.EVAL, self._batched_input_test) self._eval_summary = tf.no_op() self.print_logs()
def main(): os.environ["CUDA_VISIBLE_DEVICES"] = "1" args = parse_args() print('out') print(args) reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval('models.' + config.MODEL.NAME + '.get_pose_net')( config, is_train=True) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', config.MODEL.NAME + '.py'), final_output_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (config.TRAIN.BATCH_SIZE, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, ), verbose=False) gpus = [int(i) for i in config.GPUS.split(',')] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() optimizer = get_optimizer(config, model) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR) # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=True) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=True) best_perf = 0.0 best_model = False for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH): lr_scheduler.step() #print("model check!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") #for i,p in enumerate(model.parameters()): # print(p.requires_grad) # train for one epoch train(config, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(config, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': get_model_name(config), 'state_dict': model.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar') logger.info( 'saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) t_checkpoints = cfg.KD.TEACHER #注意是在student配置文件中修改 train_type = cfg.KD.TRAIN_TYPE #注意是在student配置文件中修改 train_type = get_train_type(train_type, t_checkpoints) logger.info('=> train type is {} '.format(train_type)) if train_type == 'FPD': cfg_name = 'student_' + os.path.basename(args.cfg).split('.')[0] else: cfg_name = os.path.basename(args.cfg).split('.')[0] save_yaml_file(cfg_name, cfg, final_output_dir) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=True) # fpd method, default NORMAL if train_type == 'FPD': tcfg = cfg.clone() tcfg.defrost() tcfg.merge_from_file(args.tcfg) tcfg.freeze() tcfg_name = 'teacher_' + os.path.basename(args.tcfg).split('.')[0] save_yaml_file(tcfg_name, tcfg, final_output_dir) # teacher model tmodel = eval('models.' + tcfg.MODEL.NAME + '.get_pose_net')( tcfg, is_train=False) load_checkpoint(t_checkpoints, tmodel, strict=True, model_info='teacher_' + tcfg.MODEL.NAME) tmodel = torch.nn.DataParallel(tmodel, device_ids=cfg.GPUS).cuda() # define kd_pose loss function (criterion) and optimizer kd_pose_criterion = JointsMSELoss( use_target_weight=tcfg.LOSS.USE_TARGET_WEIGHT).cuda() # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) if cfg.TRAIN.CHECKPOINT: load_checkpoint(cfg.TRAIN.CHECKPOINT, model, strict=True, model_info='student_' + cfg.MODEL.NAME) model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # you can choose or replace pose_loss and kd_pose_loss type, including mse,kl,ohkm loss ect # define pose loss function (criterion) and optimizer pose_criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) # evaluate on validation set validate(cfg, valid_loader, valid_dataset, tmodel, pose_criterion, final_output_dir, tb_log_dir, writer_dict) validate(cfg, valid_loader, valid_dataset, model, pose_criterion, final_output_dir, tb_log_dir, writer_dict) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # fpd method, default NORMAL if train_type == 'FPD': # train for one epoch fpd_train(cfg, train_loader, model, tmodel, pose_criterion, kd_pose_criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) else: # train for one epoch train(cfg, train_loader, model, pose_criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(cfg, valid_loader, valid_dataset, model, pose_criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main_per_worker(process_index, ngpus_per_node, args): update_config(cfg, args) # torch seed torch.cuda.manual_seed(random.random()) # cudnn cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED #proc_rank proc_rank = args.rank * ngpus_per_node + process_index #create logger logger, output_dir = create_logger(cfg, proc_rank) # logger.info(pprint.pformat(args)) # logger.info(cfg) model = InceptionResnetV1(pretrained='vggface2', classify=False, path=[cfg.MODEL.FEATURE_PATH, cfg.MODEL.LOGITS_PATH]) optimizer = get_optimizer(cfg, model) model, optimizer, last_iter = load_checkpoint(cfg, model, optimizer) lr_scheduler = get_lr_scheduler(cfg, optimizer, last_iter) train_transform = FacenetTransform(size=(cfg.TRAIN.INPUT_MIN, cfg.TRAIN.INPUT_MAX)) train_dataset = FacenetTripletDataset(cfg.DATASET.ROOT, transform=train_transform, is_train=True) eval_transform = FacenetTransform(size=cfg.TEST.TEST_SIZE) eval_dataset = FacenetTripletDataset(cfg.DATASET.ROOT, transform=eval_transform, is_train=False) # distribution if args.distributed: logger.info( f'Init process group: dist_url: {args.dist_url}, ' f'world_size: {args.world_size}, ' f'machine: {args.rank}, ' f'rank:{proc_rank}' ) dist.init_process_group( backend=cfg.DIST_BACKEND, init_method=args.dist_url, world_size=args.world_size, rank=proc_rank ) torch.cuda.set_device(process_index) model.cuda() model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[process_index] ) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset ) batch_size = cfg.DATASET.IMG_NUM_PER_GPU else: assert proc_rank == 0, ('proc_rank != 0, it will influence ' 'the evaluation procedure') model = torch.nn.DataParallel(model).cuda() train_sampler = None batch_size = cfg.DATASET.IMG_NUM_PER_GPU * ngpus_per_node train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), drop_last=True, collate_fn=facenet_triplet_collect, num_workers=cfg.WORKERS, pin_memory=True, sampler=train_sampler ) eval_loader = torch.utils.data.DataLoader( eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False, collate_fn=facenet_triplet_collect, num_workers=cfg.WORKERS ) criterion = triplet_loss Trainer = get_trainer( cfg, model, optimizer, lr_scheduler, criterion, output_dir, last_iter, proc_rank, ) while True: Trainer.train(train_loader, eval_loader) # eval Trainer.evaluate(eval_loader)
def trainIters(args): epoch_resume = 0 model_dir = os.path.join('../models/', args.model_name + '_prev_inference_mask') if args.resume: # will resume training the model with name args.model_name encoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, load_args = load_checkpoint( args.model_name, args.use_gpu) epoch_resume = load_args.epoch_resume encoder = FeatureExtractor(load_args) decoder = RSISMask(load_args) encoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict) encoder.load_state_dict(encoder_dict) decoder.load_state_dict(decoder_dict) args = load_args elif args.transfer: # load model from args and replace last fc layer encoder_dict, decoder_dict, _, _, load_args = load_checkpoint( args.transfer_from, args.use_gpu) encoder = FeatureExtractor(load_args) decoder = RSISMask(args) encoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict) encoder.load_state_dict(encoder_dict) decoder.load_state_dict(decoder_dict) else: encoder = FeatureExtractor(args) decoder = RSISMask(args) # model checkpoints will be saved here make_dir(model_dir) # save parameters for future use pickle.dump(args, open(os.path.join(model_dir, 'args.pkl'), 'wb')) encoder_params = get_base_params(args, encoder) skip_params = get_skip_params(encoder) decoder_params = list(decoder.parameters()) + list(skip_params) dec_opt = get_optimizer(args.optim, args.lr, decoder_params, args.weight_decay) enc_opt = get_optimizer(args.optim_cnn, args.lr_cnn, encoder_params, args.weight_decay_cnn) if args.resume: enc_opt.load_state_dict(enc_opt_dict) dec_opt.load_state_dict(dec_opt_dict) from collections import defaultdict dec_opt.state = defaultdict(dict, dec_opt.state) if not args.log_term: print("Training logs will be saved to:", os.path.join(model_dir, 'train.log')) sys.stdout = open(os.path.join(model_dir, 'train.log'), 'w') sys.stderr = open(os.path.join(model_dir, 'train.err'), 'w') print(args) # objective function for mask mask_siou = softIoULoss() if args.use_gpu: encoder.cuda() decoder.cuda() mask_siou.cuda() crits = mask_siou optims = [enc_opt, dec_opt] if args.use_gpu: torch.cuda.synchronize() start = time.time() # vars for early stopping best_val_loss = args.best_val_loss acc_patience = 0 mt_val = -1 # keep track of the number of batches in each epoch for continuity when plotting curves loaders = init_dataloaders(args) num_batches = {'train': 0, 'val': 0} #area_range = [[0 ** 2, 1e5 ** 2], [0 ** 2, 20 ** 2], [20 ** 2, 59 ** 2], [59 ** 2, 1e5 ** 2]] area_range = [[0**2, 1e5**2], [0**2, 30**2], [30**2, 90**2], [90**2, 1e5**2]] #for (287,950)) resolution = 0 for e in range(args.max_epoch): print("Epoch", e + epoch_resume) # store losses in lists to display average since beginning epoch_losses = { 'train': { 'total': [], 'iou': [] }, 'val': { 'total': [], 'iou': [] } } # total mean for epoch will be saved here to display at the end total_losses = {'total': [], 'iou': []} # check if it's time to do some changes here if e + epoch_resume >= args.finetune_after and not args.update_encoder and not args.finetune_after == -1: print("Starting to update encoder") args.update_encoder = True acc_patience = 0 mt_val = -1 if args.loss_penalization: if e < 10: resolution = area_range[2] else: resolution = area_range[0] # we validate after each epoch for split in ['train', 'val']: if args.dataset == 'davis2017' or args.dataset == 'youtube' or args.dataset == 'kittimots': loaders[split].dataset.set_epoch(e) for batch_idx, (inputs, targets, seq_name, starting_frame) in enumerate(loaders[split]): # send batch to GPU prev_hidden_temporal_list = None loss = None last_frame = False max_ii = min(len(inputs), args.length_clip) for ii in range(max_ii): # If are on the last frame from a clip, we will have to backpropagate the loss back to the beginning of the clip. if ii == max_ii - 1: last_frame = True # x: input images (N consecutive frames from M different sequences) # y_mask: ground truth annotations (some of them are zeros to have a fixed length in number of object instances) # sw_mask: this mask indicates which masks from y_mask are valid x, y_mask, sw_mask = batch_to_var( args, inputs[ii], targets[ii]) if ii == 0: prev_mask = y_mask # From one frame to the following frame the prev_hidden_temporal_list is updated. loss, losses, outs, hidden_temporal_list = runIter( args, encoder, decoder, x, y_mask, sw_mask, resolution, crits, optims, split, loss, prev_hidden_temporal_list, prev_mask, last_frame) # Hidden temporal state from time instant ii is saved to be used when processing next time instant ii+1 if args.only_spatial == False: prev_hidden_temporal_list = hidden_temporal_list prev_mask = outs # store loss values in dictionary separately epoch_losses[split]['total'].append(losses[0]) epoch_losses[split]['iou'].append(losses[1]) # print after some iterations if (batch_idx + 1) % args.print_every == 0: mt = np.mean(epoch_losses[split]['total']) mi = np.mean(epoch_losses[split]['iou']) te = time.time() - start print("iter %d:\ttotal:%.4f\tiou:%.4f\ttime:%.4f" % (batch_idx, mt, mi, te)) if args.use_gpu: torch.cuda.synchronize() start = time.time() num_batches[split] = batch_idx + 1 # compute mean val losses within epoch if split == 'val' and args.smooth_curves: if mt_val == -1: mt = np.mean(epoch_losses[split]['total']) else: mt = 0.9 * mt_val + 0.1 * np.mean( epoch_losses[split]['total']) mt_val = mt else: mt = np.mean(epoch_losses[split]['total']) mi = np.mean(epoch_losses[split]['iou']) # save train and val losses for the epoch total_losses['iou'].append(mi) total_losses['total'].append(mt) args.epoch_resume = e + epoch_resume print("Epoch %d:\ttotal:%.4f\tiou:%.4f\t(%s)" % (e, mt, mi, split)) if mt < (best_val_loss - args.min_delta): print("Saving checkpoint.") best_val_loss = mt args.best_val_loss = best_val_loss # saves model, params, and optimizers save_checkpoint_prev_inference_mask(args, encoder, decoder, enc_opt, dec_opt) acc_patience = 0 else: acc_patience += 1 if acc_patience > args.patience and not args.update_encoder and not args.finetune_after == -1: print("Starting to update encoder") acc_patience = 0 args.update_encoder = True best_val_loss = 1000 # reset because adding a loss term will increase the total value mt_val = -1 encoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, _ = load_checkpoint( args.model_name, args.use_gpu) encoder.load_state_dict(encoder_dict) decoder.load_state_dict(decoder_dict) enc_opt.load_state_dict(enc_opt_dict) dec_opt.load_state_dict(dec_opt_dict) # early stopping after N epochs without improvement if acc_patience > args.patience_stop: break
def main(): args = parse_args() if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval('models.' + config.MODEL.NAME + '.get_cls_net')(config) dump_input = torch.rand( (1, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0])) logger.info(get_model_summary(model, dump_input)) # copy model file # this_dir = os.path.dirname(__file__) # models_dst_dir = os.path.join(final_output_dir, 'models') # if os.path.exists(models_dst_dir): # shutil.rmtree(models_dst_dir) # shutil.copytree(os.path.join(this_dir, '../lib/models'), models_dst_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } gpus = list(config.GPUS) ''' model = torch.nn.DataParallel(model, device_ids=gpus).cuda() ''' # Change DP to DDP torch.cuda.set_device(args.local_rank) model = model.to(args.local_rank) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().cuda() optimizer = get_optimizer(config, model) best_perf = 0.0 best_model = False last_epoch = config.TRAIN.BEGIN_EPOCH if config.TRAIN.RESUME: model_state_file = os.path.join(final_output_dir, 'checkpoint.pth.tar') if os.path.isfile(model_state_file): checkpoint = torch.load(model_state_file) last_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] model.module.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint (epoch {})".format( checkpoint['epoch'])) best_model = True if isinstance(config.TRAIN.LR_STEP, list): lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) else: lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) # Data loading code traindir = os.path.join(config.DATASET.ROOT, config.DATASET.TRAIN_SET) valdir = os.path.join(config.DATASET.ROOT, config.DATASET.TEST_SET) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ''' train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(config.MODEL.IMAGE_SIZE[0]), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) ) ''' # Change to TSV dataset instance train_dataset = TSVInstance( traindir, transforms.Compose([ transforms.RandomResizedCrop(config.MODEL.IMAGE_SIZE[0]), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) # DDP requires DistributedSampler train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU, shuffle=(train_sampler is None), num_workers=config.WORKERS, pin_memory=True, sampler=train_sampler) valid_loader = torch.utils.data.DataLoader( TSVInstance( valdir, transforms.Compose([ transforms.Resize(int(config.MODEL.IMAGE_SIZE[0] / 0.875)), transforms.CenterCrop(config.MODEL.IMAGE_SIZE[0]), transforms.ToTensor(), normalize, ])), batch_size=config.TEST.BATCH_SIZE_PER_GPU, shuffle=False, num_workers=config.WORKERS, pin_memory=True) for epoch in range(last_epoch, config.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(config, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(config, valid_loader, model, criterion, final_output_dir, tb_log_dir, writer_dict) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': config.MODEL.NAME, 'state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir, filename='checkpoint.pth.tar') final_model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar') logger.info( 'saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
from trainer.Trainer import Trainer from torch.utils.tensorboard import SummaryWriter from models.loss import PixWiseBCELoss from datasets.PixWiseDataset import PixWiseDataset from utils.utils import read_cfg, get_optimizer, build_network, get_device os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = "1" cfg = read_cfg(cfg_file='config/densenet_161_adam_lr1e-3.yaml') device = get_device(cfg) network = build_network(cfg) optimizer = get_optimizer(cfg, network) loss = PixWiseBCELoss(beta=cfg['train']['loss']['beta']) writer = SummaryWriter(cfg['log_dir']) dump_input = torch.randn(1, 3, 224, 224) writer.add_graph(network, (dump_input, )) # Without Resize transform, images are of different sizes and it causes an error train_transform = transforms.Compose([ transforms.Resize(cfg['model']['image_size']), transforms.RandomRotation(cfg['dataset']['augmentation']['rotation']), transforms.RandomHorizontalFlip(), transforms.ToTensor(),
def main(): args = parse_args() reset_config(config, args) logger, final_output_dir, tb_log_dir = create_logger( config, args.cfg, "train") logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval("models." + config.MODEL.NAME + ".get_pose_net")( config, is_train=True) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, "../lib/models", config.MODEL.NAME + ".py"), final_output_dir, ) writer_dict = { "writer": SummaryWriter(log_dir=tb_log_dir), "train_global_steps": 0, "valid_global_steps": 0, } dump_input = torch.rand(( config.TRAIN.BATCH_SIZE, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0], )) writer_dict["writer"].add_graph(model, (dump_input, ), verbose=False) gpus = [int(i) for i in config.GPUS.split(",")] model = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() optimizer = get_optimizer(config, model) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR) # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval("dataset." + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ]), ) valid_dataset = eval("dataset." + config.DATASET.DATASET)( config, config.DATASET.ROOT, config.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ]), ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=True, ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=True, ) best_perf = 0.0 best_model = False for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train( config, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict, ) # evaluate on validation set perf_indicator = validate( config, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict, ) if perf_indicator > best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info("=> saving checkpoint to {}".format(final_output_dir)) save_checkpoint( { "epoch": epoch + 1, "model": get_model_name(config), "state_dict": model.state_dict(), "perf": perf_indicator, "optimizer": optimizer.state_dict(), }, best_model, final_output_dir, ) final_model_state_file = os.path.join(final_output_dir, "final_state.pth.tar") logger.info( "saving final model state to {}".format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict["writer"].close()
def main_worker(gpu, ngpus_per_node, args, final_output_dir, tb_log_dir): args.gpu = gpu args.rank = args.rank * ngpus_per_node + gpu print('Init process group: dist_url: {}, world_size: {}, rank: {}'.format(cfg.DIST_URL, args.world_size, args.rank)) dist.init_process_group(backend=cfg.DIST_BACKEND, init_method=cfg.DIST_URL, world_size=args.world_size, rank=args.rank) update_config(cfg, args) # setup logger logger, _ = setup_logger(final_output_dir, args.rank, 'train') model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(cfg, is_train=True) logger.info(get_model_summary(model, torch.zeros(1, 3, *cfg.MODEL.IMAGE_SIZE))) # copy model file if not cfg.MULTIPROCESSING_DISTRIBUTED or (cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0): this_dir = os.path.dirname(__file__) shutil.copy2(os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } if not cfg.MULTIPROCESSING_DISTRIBUTED or (cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0): dump_input = torch.rand((1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) # logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.MODEL.SYNC_BN: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) torch.cuda.set_device(args.gpu) model.cuda(args.gpu) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) # define loss function (criterion) and optimizer criterion = JointsMSELoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda(args.gpu) # Data loading code train_dataset = eval('dataset.'+cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) ) valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) ) train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=(train_sampler is None), num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY, sampler=train_sampler ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY ) logger.info(train_loader.dataset) best_perf = -1 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`. lr_scheduler.step() # evaluate on validation set perf_indicator = validate( args, cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir, writer_dict ) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False if not cfg.MULTIPROCESSING_DISTRIBUTED or ( cfg.MULTIPROCESSING_DISTRIBUTED and args.rank == 0 ): logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint({ 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join( final_output_dir, 'final_state{}.pth.tar'.format(gpu) ) logger.info('saving final model state to {}'.format( final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def main(): args = parse_args() # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval('models.' + config.MODEL.NAME + '.get_nnb')(config) writer_dict = { 'writer': SummaryWriter(log_dir='./output/facexray'), 'train_global_steps': 0, 'valid_global_steps': 0, } gpus = list(config.GPUS) model = torch.nn.DataParallel(model) # define loss function (criterion) and optimizer criterion = Loss() optimizer = get_optimizer(config, model) last_epoch = config.TRAIN.BEGIN_EPOCH if isinstance(config.TRAIN.LR_STEP, list): lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) else: lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) # Data loading code # list_name没有单独标注在.yaml文件 # transform还没能适用于其他规格,应做成[256, 256, 3] train_dataset = eval('dataset.' + config.DATASET.DATASET + '.' + config.DATASET.DATASET)( config.DATASET.ROOT, config.DATASET.TRAIN_SET, None, transforms.Compose([transforms.ToTensor()])) valid_dataset = eval('dataset.' + config.DATASET.DATASET + '.' + config.DATASET.DATASET)(config.DATASET.ROOT, config.DATASET.TEST_SET, None, transforms.Compose( [transforms.ToTensor()])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU, shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE_PER_GPU, shuffle=False, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) for epoch in range(last_epoch, config.TRAIN.END_EPOCH): lr_scheduler.step() # 前50000次迭代锁定原hrnet层参数训练,后面的迭代训练所有参数 if epoch == 150000: for k, v in model.named_parameters(): v.requires_grad = True # train for one epoch train(config, train_loader, model, criterion, optimizer, epoch, writer_dict) # evaluate on validation set validate(config, valid_loader, model, criterion, writer_dict) torch.save(model.module.state_dict(), './output/BI_dataset/faceXray.pth') writer_dict['writer'].close()
model = get_model(cfg, [l1_cls_num, l2_cls_num], device, logger) if cfg.TRAIN_STAGE == 2: last_stage_weight_path = os.path.join(model_dir, 'best_model_stage1.pth') load_weight(model, last_stage_weight_path) model.module.freeze_backbone() model.module.freeze_classifer(0) elif cfg.TRAIN_STAGE == 1: last_stage_weight_path = os.path.join(args.pretrained_path) load_weight(model, last_stage_weight_path) model.module.freeze_backbone() model.module.freeze_classifer(1) # load_pretrained_weight(model, args.pretrained_path) combiner = Combiner(cfg, device) optimizer = get_optimizer(cfg, model) scheduler = get_scheduler(cfg, optimizer) # ----- END MODEL BUILDER ----- trainLoader = DataLoader( train_set, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.TRAIN.NUM_WORKERS, pin_memory=cfg.PIN_MEMORY, drop_last=True ) validLoader = DataLoader( valid_set, batch_size=cfg.TEST.BATCH_SIZE,
def main_worker( gpu, ngpus_per_node, args, final_output_dir, tb_log_dir ): # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED if cfg.FP16.ENABLED: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if cfg.FP16.STATIC_LOSS_SCALE != 1.0: if not cfg.FP16.ENABLED: print("Warning: if --fp16 is not used, static_loss_scale will be ignored.") args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if cfg.MULTIPROCESSING_DISTRIBUTED: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu print('Init process group: dist_url: {}, world_size: {}, rank: {}'. format(args.dist_url, args.world_size, args.rank)) dist.init_process_group( backend=cfg.DIST_BACKEND, init_method=args.dist_url, world_size=args.world_size, rank=args.rank ) update_config(cfg, args) # setup logger logger, _ = setup_logger(final_output_dir, args.rank, 'train') model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( cfg, is_train=True ) # copy model file if not cfg.MULTIPROCESSING_DISTRIBUTED or ( cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0 ): this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir ) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } if not cfg.MULTIPROCESSING_DISTRIBUTED or ( cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0 ): dump_input = torch.rand( (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE) ) writer_dict['writer'].add_graph(model, (dump_input, )) # logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) if cfg.FP16.ENABLED: model = network_to_half(model) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have # args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu] ) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer loss_factory = MultiLossFactory(cfg).cuda() # Data loading code train_loader = make_dataloader( cfg, is_train=True, distributed=args.distributed ) logger.info(train_loader.dataset) best_perf = -1 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) if cfg.FP16.ENABLED: optimizer = FP16_Optimizer( optimizer, static_loss_scale=cfg.FP16.STATIC_LOSS_SCALE, dynamic_loss_scale=cfg.FP16.DYNAMIC_LOSS_SCALE ) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join( final_output_dir, 'checkpoint.pth.tar') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) if cfg.FP16.ENABLED: lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer.optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch ) else: lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch ) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train one epoch do_train(cfg, model, train_loader, loss_factory, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict, fp16=cfg.FP16.ENABLED) perf_indicator = epoch if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False if not cfg.MULTIPROCESSING_DISTRIBUTED or ( cfg.MULTIPROCESSING_DISTRIBUTED and args.rank == 0 ): logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint({ 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join( final_output_dir, 'final_state{}.pth.tar'.format(gpu) ) logger.info('saving final model state to {}'.format( final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def build_optimizer(self): for name, module in self.named_modules(): optim_name = name.replace('net', 'optimizer') setattr(self, optim_name, utils.get_optimizer(self.opt, module))
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK # 用于加快训练速度,同时避免benchmark的随机性 torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')( cfg, is_train=True) # eval()函数执行一个字符串表达式,并返回表达式的值 # copy model file this_dir = os.path.dirname(__file__) # 取当前路径 shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) # 记录模型日志 model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() #model = torch.nn.DataParallel(model, device_ids=[0]).cuda() # 多GPU训练 # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() regress_loss = RegLoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize( # 使用Imagenet的均值和标准差进行归一化 mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) # 图像处理 train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY, ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY, ) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] last_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(cfg, train_loader, model, criterion, regress_loss, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set perf_indicator = validate(cfg, valid_loader, valid_dataset, model, criterion, regress_loss, final_output_dir, tb_log_dir, writer_dict) if perf_indicator >= best_perf: best_perf = perf_indicator best_model = True else: best_model = False logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
shuffle=config.TEST.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY, ) # get device if torch.cuda.is_available(): device = torch.device("cuda:{}".format(config.GPUID)) else: device = torch.device("cpu:0") model = crnn.get_crnn(config) model = model.to(device) model_info(model) print(model) optimizer = get_optimizer(config, model) last_epoch = config.TRAIN.BEGIN_EPOCH lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) if config.ATTENTION.ENABLE: criterion = torch.nn.NLLLoss() else: criterion = torch.nn.CTCLoss() # 训练 best_acc = 0.0 for epoch in range(last_epoch, config.TRAIN.END_EPOCH): model.train() for i, (inp, idx) in enumerate(train_loader): # 前馈,计算loss inp = inp.to(device)