def _init_model(self, models): models = [m.to(self.device) for m in models] if self.cfg.pretrained_model: self._log.info( "=> Flow model using pre-trained weights {}.".format( self.cfg.pretrained_model)) epoch, weights = load_checkpoint(self.cfg.pretrained_model) models[0].load_state_dict(weights) else: self._log.info("=> Train flow model from scratch.") models[0].init_weights() if self.cfg.pretrained_model_depth: self._log.info( "=> Depth model using pre-trained weights {}.".format( self.cfg.pretrained_model_depth)) epoch, weights = load_checkpoint(self.cfg.pretrained_model_depth) models[1].load_state_dict(weights) else: self._log.info("=> Train depth model from scratch.") models[1].init_weights() models = [ torch.nn.DataParallel(m, device_ids=self.device_ids) for m in models ] return models
def _init_model(self, model): # Load Model to Device n_gpu_use = self.cfg.train.n_gpu if self.cfg.mp.enabled: id = self.id if n_gpu_use > 0: torch.cuda.set_device(self.device) model = model.to(self.device) else: model = model.to(self.device) # Load Weights if self.cfg.train.pretrained_model: self._log.info(self.id, "=> using pre-trained weights {}.".format( self.cfg.train.pretrained_model)) epoch, weights = load_checkpoint(self.cfg.train.pretrained_model) self.i_epoch = epoch from collections import OrderedDict new_weights = OrderedDict() model_keys = list(model.state_dict().keys()) weight_keys = list(weights.keys()) for a, b in zip(model_keys, weight_keys): new_weights[a] = weights[b] weights = new_weights model.load_state_dict(weights) else: self._log.info(self.id, "=> Train from scratch.") model.init_weights() # Load Custom if hasattr(self.cfg.train, 'init_model'): if self.cfg.train.init_model: self._log.info(self.id, "=> using init weights {}.".format( self.cfg.train.init_model)) epoch, weights = load_checkpoint(self.cfg.train.init_model) from collections import OrderedDict new_weights = OrderedDict() model_keys = list(model.state_dict().keys()) weight_keys = list(weights.keys()) for a, b in zip(model_keys, weight_keys): new_weights[a] = weights[b] weights = new_weights model.load_state_dict(weights, strict=False) # Model Type if self.cfg.mp.enabled: if self.cfg.var.bn_avg: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model, 0) if n_gpu_use > 0: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=self.device_ids, find_unused_parameters=True) else: model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True) else: if n_gpu_use > 0: model = torch.nn.DataParallel(model, device_ids=self.device_ids) else: model = torch.nn.DataParallel(model).to(self.device) return model
def main(cfg, _log): init_seed(cfg.seed) _log.info("=> fetching img pairs.") train_set, valid_set = get_dataset(cfg) _log.info('{} samples found, {} train samples and {} test samples '.format( len(valid_set) + len(train_set), len(train_set), len(valid_set))) train_loader = torch.utils.data.DataLoader(train_set, batch_size=cfg.train.batch_size, num_workers=cfg.train.workers, pin_memory=True, shuffle=True) max_test_batch = 4 if type(valid_set) is torch.utils.data.ConcatDataset: valid_loader = [ torch.utils.data.DataLoader(s, batch_size=min(max_test_batch, cfg.train.batch_size), num_workers=min(4, cfg.train.workers), pin_memory=True, shuffle=False) for s in valid_set.datasets ] valid_size = sum([len(l) for l in valid_loader]) else: valid_loader = torch.utils.data.DataLoader( valid_set, batch_size=min(max_test_batch, cfg.train.batch_size), num_workers=min(4, cfg.train.workers), pin_memory=True, shuffle=False) valid_size = len(valid_loader) if cfg.train.epoch_size == 0: cfg.train.epoch_size = len(train_loader) if cfg.train.valid_size == 0: cfg.train.valid_size = valid_size cfg.train.epoch_size = min(cfg.train.epoch_size, len(train_loader)) cfg.train.valid_size = min(cfg.train.valid_size, valid_size) model = get_model(cfg.model) loss = get_loss(cfg.loss) trainer = get_trainer(cfg.trainer)(train_loader, valid_loader, model, loss, _log, cfg.save_root, cfg.train) for name, param in model.named_parameters(): if ("pyramid" in name) == False: param.requires_grad = False else: print(name, param.requires_grad) #parameter.requires_grad = False epoch, weights = load_checkpoint('checkpoints/Sintel/pwclite_ar.tar') print("traiiiiiiiiiiiiiiiiiiiiiiiiiiiiin", weights) trainer.model = model trainer.train()
def _init_model(self, model): model = model.to(self.device) if self.cfg.pretrained_model: self._log.info("=> using pre-trained weights {}.".format( self.cfg.pretrained_model)) epoch, weights = load_checkpoint(self.cfg.pretrained_model) from collections import OrderedDict new_weights = OrderedDict() model_keys = list(model.state_dict().keys()) weight_keys = list(weights.keys()) for a, b in zip(model_keys, weight_keys): new_weights[a] = weights[b] weights = new_weights model.load_state_dict(weights) else: self._log.info("=> Train from scratch.") model.init_weights() model = torch.nn.DataParallel(model, device_ids=self.device_ids) return model
def main(): # Last layer of convs is 28, maxpool is 30 target_layer = 4 on_conv = True no_imgs = 10 toy = False feat = True label = 3 # 0,1 for feat, 0,6 for actions of qnet, 0,5 for cats if not feat: combi = True # inputsize = 2704 # hiddensize = 512 inputsize = 25088 hiddensize = 1024 outputsize = 7 hidden_rec = 0 else: combi = False recurrent = -1 model_type = 'resnet' with_pos = False # checkpoint_dir_feat = os.path.abspath('/Users/lisa/Documents/Uni/ThesisDS/local_python/checkpoints/resnet_toy_01') # checkpoint_dir_feat = os.path.abspath('../checkpoints/res_pos_01') # checkpoint_filename_feat = os.path.join(checkpoint_dir_feat, 'checkpoint_resnet.pth.tar') checkpoint_dir_feat = os.path.abspath( '../checkpoints_from_18_07_18/resnet_cats_01') checkpoint_filename_feat = os.path.join( checkpoint_dir_feat, 'warmup_model_{}.pth.tar'.format(model_type)) # checkpoint_filename_feat = os.path.join(checkpoint_dir_feat, 'checkpoint_vgg.pth.tar') # checkpoint_filename_feat = os.path.join(checkpoint_dir_feat, 'model_best_resnet.pth.tar') # vgg = models.vgg16(pretrained=True) # checkpoint_dir_q = os.path.abspath('../checkpoints_from_18_07_18/qnet_simple_05_1') # checkpoint_filename_q = os.path.join(checkpoint_dir_q, 'model_best_qnet.pth.tar') # checkpoint_dir_feat = os.path.abspath('../checkpoints_from_18_07_18/resnet_auto_test_adam_1_2') # checkpoint_filename_feat = os.path.join(checkpoint_dir_feat, 'warmup_model_{}.pth.tar'.format(model_type)) # checkpoint_dir_q = os.path.abspath('../checkpoints_from_18_07_18/qnet_auto_03') # checkpoint_filename_q = os.path.join(checkpoint_dir_q, 'model_best_qnet.pth.tar') lr = 0.01 model = get_feature_model(model_type, model_type, load_pretrained=False, opti='optim.Adam', lr=lr, mom=None, checkpoint_pretrained=None, learn_pos=False, force_on_cpu=False, cat=True) if torch.cuda.is_available(): model.cuda() print(model) if not feat: if model_type == 'auto' or model_type == 'resnet': model = res.ResNetFeatures(model) else: model = m.NetNoDecisionLayer(model) model = get_q_model(combi=combi, recurrent=recurrent, toy=toy, inputsize=inputsize, hiddensize=hiddensize, outputsize=outputsize, feature_model=model, hidden_rec=hidden_rec) print(model) if torch.cuda.is_available(): model.cuda() criterion = nn.SmoothL1Loss() if combi and recurrent <= 0: if model_type == 'auto' or model_type == 'resnet': model_params = [{ 'params': model.conv1.parameters(), 'lr': lr / 10 }, { 'params': model.bn1.parameters(), 'lr': lr / 10 }, { 'params': model.relu.parameters(), 'lr': lr / 10 }, { 'params': model.maxpool.parameters(), 'lr': lr / 10 }, { 'params': model.layer1.parameters(), 'lr': lr / 10 }, { 'params': model.layer2.parameters(), 'lr': lr / 10 }, { 'params': model.layer3.parameters(), 'lr': lr / 10 }, { 'params': model.layer4.parameters(), 'lr': lr / 10 }, { 'params': model.qnet.parameters() }] else: model_params = [{ 'params': model.features.parameters(), 'lr': lr / 10 }, { 'params': model.qnet.parameters() }] else: model_params = model.parameters() optimizer = get_optimizer(model_params, 'optim.Adam', 0.01, None) if feat: to_load = os.path.join(checkpoint_dir_feat, checkpoint_filename_feat) else: to_load = os.path.join(checkpoint_dir_q, checkpoint_filename_q) if os.path.exists(to_load): model, _, _ = load_checkpoint(model, optimizer, filename=to_load, force_on_cpu=False) else: raise ValueError('Checkpoint {} does not exist.'.format(to_load)) val_loader = get_valloader_only(toy=toy, rsyncing=False, batch_size=1, num_workers=0, cat=True) # Grad cam # Target layer doesn't matter for classifier hook # target_layer = 0 # on_conv = False grad_cam = GradCam(model, target_layer=target_layer, model_type=model_type, feat=feat) checkpoint_dir_feat = os.path.join(checkpoint_dir_feat, 'Layer_{}'.format(target_layer)) if not os.path.isdir(checkpoint_dir_feat): os.makedirs(checkpoint_dir_feat) # Generate cam mask for i, batch in enumerate(val_loader): if len(batch['image_batch']) > 1: for j in range(len(batch['image_batch'])): print('---', flush=True) print('{}_{}, label {}, has lesion {}'.format( batch['image_id_batch'][j], j, batch['label_batch'][j], batch['has_lesion_batch'][j]), flush=True) print(batch['image_batch'][j].size(), flush=True) print(batch['image_batch'][j].type(), flush=True) # cam = grad_cam.generate_cam(torch.unsqueeze(batch['image_batch'][j], 0), # label=batch['label_batch'][j], on_conv=on_conv, pos=with_pos, # row=batch['center_row_batch'][j], col=batch['center_col_batch'][j]) # cam = grad_cam.generate_cam(torch.unsqueeze(batch['image_batch'][j], 0), # label=1, on_conv=on_conv, pos=with_pos, # row=batch['center_row_batch'][j], col=batch['center_col_batch'][j]) # save_class_activation_on_image(checkpoint_dir_feat, batch['image_batch'][j].numpy(), cam, # '{}_{}'.format(batch['image_id_batch'][j], j)) cam = grad_cam.generate_cam(torch.unsqueeze( batch['image_batch'][j], 0), label=label, on_conv=on_conv, pos=with_pos, row=batch['center_row_batch'][j], col=batch['center_col_batch'][j]) save_class_activation_on_image( checkpoint_dir_feat, tensor_to_numpy(batch['image_batch'][j]), cam, '{}_{}'.format(batch['image_id_batch'][j], j)) one_img_figure(checkpoint_dir_feat, batch['image_id_batch'][0], batch['original_batch'][0], four_bbs=True) save_orig_with_bbs(checkpoint_dir_feat, batch['image_id_batch'][0], batch['original_batch'][0], batch['bbox_batch']) elif len(batch['image_batch']) == 1: print('---', flush=True) print(batch['image_batch'][0].shape, flush=True) print('{}_{}, label {}, has lesion {}'.format( batch['image_id_batch'][0], 0, batch['label_batch'][0], batch['has_lesion_batch'][0]), flush=True) # cam = grad_cam.generate_cam(torch.unsqueeze(batch['image_batch'][0], 0), label=batch['label_batch'][0], # on_conv=on_conv, pos=with_pos, row=batch['center_row_batch'][0], # col=batch['center_col_batch'][0]) cam = grad_cam.generate_cam(torch.unsqueeze( batch['image_batch'][0], 0), label=label, on_conv=on_conv, pos=with_pos, row=batch['center_row_batch'][0], col=batch['center_col_batch'][0]) save_class_activation_on_image( checkpoint_dir_feat, tensor_to_numpy(batch['image_batch'][0]), cam, '{}_{}'.format(batch['image_id_batch'][0], 0)) if i >= no_imgs - 1: break print('Grad cam completed', flush=True)
def test(cfg_dict, feat_model_string, rsyncing, toy=False): # checkpoint_dir, experiment_name = 'qnet', opti = 'optim.RMSprop', lr = 0.001, mom = 0.1, combi = False checkpoint_dir, log_dir, experiment_name = get_q_save_check_tensorB_expName( cfg_dict) opti_feat, lr, mom, _ = get_f_train_opti_lr_mom_epochs(cfg_dict) inputsize, hiddensize, outputsize = get_q_net_input_hidden_output(cfg_dict) _, _, _, double, combi, param_noise, recurrent, hidden_rec, _, _, _ = get_q_variants_oneImg_maxNumImgsT_maxNumImgsV_double_combi_paramNoise_recurrent_recSize_distReward_distFactor_hist( cfg_dict) _, max_steps, replaysize = get_q_hyper_cloneFreq_maxSteps_replaysize( cfg_dict) test_tau = get_q_explo_kappa_epochsEps_targetEps_tau_testTau_tauEpochs( cfg_dict)[4] cat = get_f_variants_selectiveS_checkPretrained_cat(cfg_dict)[2] featnet_checkpoint = get_f_save_check_tensorB_expName(cfg_dict)[0] print('-------------') print('feat_model_string', feat_model_string) print('-------------') feature_model = get_feature_model(feat_model_string, feat_model_string, load_pretrained=True, opti='optim.Adam', lr=lr, mom=mom, checkpoint_pretrained=featnet_checkpoint, cat=cat) if feat_model_string == 'auto' or feat_model_string == 'resnet': feature_model = res.ResNetFeatures(feature_model) else: feature_model = m.NetNoDecisionLayer(feature_model) if torch.cuda.is_available(): feature_model.cuda() model = get_q_model(combi, recurrent, toy, inputsize, hiddensize, outputsize, feature_model=feature_model, hidden_rec=hidden_rec) # HERE if torch.cuda.is_available(): model.cuda() criterion = nn.MSELoss() if combi and recurrent <= 0: if feat_model_string == 'auto' or feat_model_string == 'resnet': model_params = [{ 'params': model.conv1.parameters(), 'lr': lr / 10 }, { 'params': model.bn1.parameters(), 'lr': lr / 10 }, { 'params': model.relu.parameters(), 'lr': lr / 10 }, { 'params': model.maxpool.parameters(), 'lr': lr / 10 }, { 'params': model.layer1.parameters(), 'lr': lr / 10 }, { 'params': model.layer2.parameters(), 'lr': lr / 10 }, { 'params': model.layer3.parameters(), 'lr': lr / 10 }, { 'params': model.layer4.parameters(), 'lr': lr / 10 }, { 'params': model.qnet.parameters() }] else: model_params = [{ 'params': model.features.parameters(), 'lr': lr / 10 }, { 'params': model.qnet.parameters() }] elif combi and recurrent > 0: if toy: model_params = [ { 'params': model.features.parameters(), 'lr': lr / 10 }, { 'params': model.ll1.parameters() }, { 'params': model.ll2.parameters() }, { 'params': model.ll3.parameters() }, { 'params': model.relu2.parameters() }, { 'params': model.lstm.parameters() }, ] else: model_params = [ { 'params': model.conv1.parameters(), 'lr': lr / 10 }, { 'params': model.bn1.parameters(), 'lr': lr / 10 }, { 'params': model.relu.parameters(), 'lr': lr / 10 }, { 'params': model.maxpool.parameters(), 'lr': lr / 10 }, { 'params': model.layer1.parameters(), 'lr': lr / 10 }, { 'params': model.layer2.parameters(), 'lr': lr / 10 }, { 'params': model.layer3.parameters(), 'lr': lr / 10 }, { 'params': model.layer4.parameters(), 'lr': lr / 10 }, { 'params': model.ll1.parameters() }, { 'params': model.ll2.parameters() }, { 'params': model.ll3.parameters() }, { 'params': model.relu2.parameters() }, { 'params': model.lstm.parameters() }, ] else: model_params = model.parameters() optimizer = get_optimizer(model_params, opti_feat, lr, mom) print(model, flush=True) # checkpoint_filename = os.path.join(checkpoint_dir, 'checkpoint_{}.pth.tar'.format(experiment_name)) # checkpoint_filename = os.path.join(checkpoint_dir, 'model_best_{}.pth.tar'.format(experiment_name)) checkpoint_filename = os.path.join( checkpoint_dir, 'warmup_model_{}.pth.tar'.format(experiment_name)) print('Load checkpoint from {}'.format( os.path.abspath(checkpoint_filename))) ###### # TODO this if else should be before if os.path.exists(checkpoint_filename): if not os.path.isdir('{}/trajectories_test'.format(checkpoint_dir)): os.makedirs('{}/trajectories_test'.format(checkpoint_dir)) # Don't load optimizer, otherwise LR might be too low already (yes? TODO) model, _, initial_epoch = load_checkpoint(model, optimizer, filename=checkpoint_filename) if combi and recurrent <= 0: if feat_model_string == 'auto' or feat_model_string == 'resnet': model_params = [{ 'params': model.conv1.parameters(), 'lr': lr / 10 }, { 'params': model.bn1.parameters(), 'lr': lr / 10 }, { 'params': model.relu.parameters(), 'lr': lr / 10 }, { 'params': model.maxpool.parameters(), 'lr': lr / 10 }, { 'params': model.layer1.parameters(), 'lr': lr / 10 }, { 'params': model.layer2.parameters(), 'lr': lr / 10 }, { 'params': model.layer3.parameters(), 'lr': lr / 10 }, { 'params': model.layer4.parameters(), 'lr': lr / 10 }, { 'params': model.qnet.parameters() }] else: model_params = [{ 'params': model.features.parameters(), 'lr': lr / 10 }, { 'params': model.qnet.parameters() }] else: model_params = model.parameters() assert opti_feat in ['optim.Adam', 'optim.SGD', 'optim.RMSprop'] print('Using optimizer {}'.format(opti_feat)) if opti_feat == 'optim.Adam': optimizer = eval(opti_feat)(model_params, lr=lr) else: optimizer = eval(opti_feat)(model_params, lr=lr, momentum=mom) model_path = 'model_best_{}.pth.tar'.format(experiment_name) print('Loading model checkpointed at epoch {}'.format(initial_epoch)) print('Get val env', flush=True) val_env = get_val_env_only(cfg_dict, feature_model, rsyncing=rsyncing, toy=toy, f_one=True) # val_env = get_val_env_only(cfg_dict, feature_model, rsyncing=rsyncing, toy=toy,f_one=False) warmup_trainer = QNetTrainer(cfg_dict, model, val_env, experiment_name=experiment_name, log_dir='default', checkpoint_dir=checkpoint_dir, checkpoint_filename=model_path, for_testing=True, tau_schedule=0, recurrent=recurrent) warmup_trainer.compile(loss=criterion, optimizer=optimizer) print('Evaluate', flush=True) val_metrics_arr, trajectory_all_imgs, triggered_all_imgs, Q_s_all_imgs, all_imgs, all_gt, \ actions_all_imgs = warmup_trainer.evaluate(val_env, 0, save_trajectory=True) print('Val_metrics_arr', val_metrics_arr) width = 2 steps_until_detection = [] print('Save', flush=True) if len(trajectory_all_imgs) > 1: # print('i:',len(trajectory_all_imgs),flush=True) for i, img in enumerate(trajectory_all_imgs): # print('j:',len(trajectory_all_imgs[i]),flush=True) orig_img = all_imgs[i] if all_gt[i] is None: orig_r = 0 orig_c = 0 orig_rn = 0 orig_cn = 0 else: orig_r, orig_c, orig_rn, orig_cn = get_nums_from_bbox( all_gt[i]) for j, starts in enumerate(img): # print('k:',len(trajectory_all_imgs[i][j]),flush=True) for k, step in enumerate(starts): r, c, rn, cn = step # print(triggered_all_imgs) # print(Q_s_all_imgs) # print(len(Q_s_all_imgs)) # print(len(Q_s_all_imgs[i])) # print(len(Q_s_all_imgs[i][j])) if triggered_all_imgs[i][j] == 1 and k == ( len(starts) - 1): steps_until_detection.append(k) if i < 10: save_image_with_orig_plus_current_bb( orig_img, '{}/trajectories_test/{}_{}_{}_trigger.png' .format(checkpoint_dir, i, j, k), bbox_flag=True, r=r, c=c, rn=rn, cn=cn, ro=orig_r, co=orig_c, rno=orig_rn, cno=orig_cn, lwidth=width, Q_s=Q_s_all_imgs[i][j][k], eps=-1, action=actions_all_imgs[i][j][k]) else: if i < 10: save_image_with_orig_plus_current_bb( orig_img, '{}/trajectories_test/{}_{}_{}.png'.format( checkpoint_dir, i, j, k), bbox_flag=True, r=r, c=c, rn=rn, cn=cn, ro=orig_r, co=orig_c, rno=orig_rn, cno=orig_cn, lwidth=width, Q_s=Q_s_all_imgs[i][j][k], eps=-1, action=actions_all_imgs[i][j][k]) else: orig_img = all_imgs[0] orig_r, orig_c, orig_rn, orig_cn = get_nums_from_bbox(all_gt[0]) for j, starts in enumerate(trajectory_all_imgs[0]): for k, step in enumerate(starts): r, c, rn, cn = step if triggered_all_imgs[0][j] == 1 and k == (len(starts) - 1): steps_until_detection.append(k) save_image_with_orig_plus_current_bb( orig_img, '{}/trajectories_test/0_{}_{}_trigger.png'.format( checkpoint_dir, j, k), bbox_flag=True, r=r, c=c, rn=rn, cn=cn, ro=orig_r, co=orig_c, rno=orig_rn, cno=orig_cn, lwidth=width, Q_s=Q_s_all_imgs[0][j][k], action=actions_all_imgs[0][j][k]) else: save_image_with_orig_plus_current_bb( orig_img, '{}/trajectories_test/0_{}_{}_trigger.png'.format( checkpoint_dir, j, k), bbox_flag=True, r=r, c=c, rn=rn, cn=cn, ro=orig_r, co=orig_c, rno=orig_rn, cno=orig_cn, lwidth=width, Q_s=Q_s_all_imgs[0][j][k], action=actions_all_imgs[0][j][k]) pkl.dump( steps_until_detection, open( '{}/trajectories_test/steps_until_detection.pkl'.format( checkpoint_dir), 'wb')) pkl.dump( val_metrics_arr, open( '{}/trajectories_test/val_metrics_arr.pkl'.format( checkpoint_dir), 'wb')) else: print('For testing, checkpoint filename has to exist')
# Decoder if parameters["decoder"] == "crf": decoder = CRFDecoder(output_dim, train_data["vocab"]["tag"], dropout=parameters["dropout"]) elif parameters["decoder"] == "softmax": decoder = SoftmaxDecoder(output_dim, train_data["vocab"]["tag"], dropout=parameters["dropout"]) # Model model = EmbedderEncoderDecoder(embedder, encoder, decoder) model.to(parameters["device"]) # Reload best checkpoint state = load_checkpoint(parameters["run_dir"] + "ner_best.pth.tar") model.load_state_dict(state["model"]) # Change Glove Embedder if "word_embedder" in model.embedder.embedders.keys(): logging.info( "Changing embedder to OOD data and intersecting with trained embeddings..." ) ### Find words in OOD vocabulary in Train vocabulary inter_vocab = intersect(train_data["vocab"]["word"][1], ood_data["vocab"]["word"][1]) ### Load trained embeddings values train_embeddings = model.embedder.embedders[ "word_embedder"].embedder.word_embeddings.weight
def prepare_and_start_training(cfg_dict, feat_model_string, rsyncing, toy=False): torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # Retrieve params Q opti, lr, mom, num_epochs, transitions_per_learning = get_q_train_opti_lr_mom_epochs_transPerUp(cfg_dict) checkpoint_dir, tensorboard_dir, experiment_name = get_q_save_check_tensorB_expName(cfg_dict) _, _, _, double, combi, param_noise, recurrent, hidden_rec, _, _, _ = get_q_variants_oneImg_maxNumImgsT_maxNumImgsV_double_combi_paramNoise_recurrent_recSize_distReward_distFactor_hist( cfg_dict) _, resume, lr_schedule, _ = get_seed_resume_lrSchedule_root(cfg_dict) inputsize, hiddensize, outputsize = get_q_net_input_hidden_output(cfg_dict) kappa, decreasing_eps, target_eps, tau, test_tau, tau_schedule_epochs = get_q_explo_kappa_epochsEps_targetEps_tau_testTau_tauEpochs( cfg_dict) clone_freq, max_steps, replaysize = get_q_hyper_cloneFreq_maxSteps_replaysize(cfg_dict) gamma, _, _, _ , clip_val = get_q_rewards_gamma_zeta_eta_iota_clipVal(cfg_dict) # Retrieve params Feat feat_checkpoint_dir, _, feat_experiment_name = get_f_save_check_tensorB_expName(cfg_dict) opti_feat = get_f_train_opti_lr_mom_epochs(cfg_dict)[0] # Load feature model and get environments # cat = get_f_variants_selectiveS_checkPretrained_cat(cfg_dict)[2] feature_model = get_feature_model(feat_model_string, feat_experiment_name, load_pretrained=True, opti='optim.Adam', lr=lr, mom=mom, checkpoint_pretrained=feat_checkpoint_dir,cat=cat) if feat_experiment_name == 'auto' or feat_experiment_name == 'resnet': feature_model = res.ResNetFeatures(feature_model) elif feat_experiment_name == 'resnet_pool': feature_model = res.ResNetFeaturesPool(feature_model) else: feature_model = m.NetNoDecisionLayer(feature_model) if torch.cuda.is_available(): feature_model.cuda() is_simple =False with_pool = False if feat_experiment_name == 'simple': is_simple= True elif feat_experiment_name == 'resnet_pool': with_pool= True model = get_q_model(combi, recurrent, toy, inputsize, hiddensize, outputsize, feature_model=feature_model, hidden_rec=hidden_rec,cat=cat,simple=is_simple,with_pool=with_pool) # HERE if torch.cuda.is_available(): model.cuda() if clip_val: criterion = nn.SmoothL1Loss() else: criterion = nn.MSELoss() if combi and recurrent <= 0: if feat_experiment_name == 'auto' or feat_experiment_name == 'resnet' or feat_experiment_name == 'resnet_pool': model_params = [ {'params': model.conv1.parameters(), 'lr': lr / 10}, {'params': model.bn1.parameters(), 'lr': lr / 10}, {'params': model.relu.parameters(), 'lr': lr / 10}, {'params': model.maxpool.parameters(), 'lr': lr / 10}, {'params': model.layer1.parameters(), 'lr': lr / 10}, {'params': model.layer2.parameters(), 'lr': lr / 10}, {'params': model.layer3.parameters(), 'lr': lr / 10}, {'params': model.layer4.parameters(), 'lr': lr / 10}, {'params': model.qnet.parameters()} ] else: model_params = [ {'params': model.features.parameters(), 'lr': lr / 10}, {'params': model.qnet.parameters()} ] elif combi and recurrent > 0: if toy: model_params = [ {'params': model.features.parameters(), 'lr': lr / 10}, {'params': model.ll1.parameters()}, {'params': model.ll2.parameters()}, {'params': model.ll3.parameters()}, {'params': model.relu2.parameters()}, {'params': model.lstm.parameters()}, ] else: model_params = [ {'params': model.conv1.parameters(), 'lr': lr / 10}, {'params': model.bn1.parameters(), 'lr': lr / 10}, {'params': model.relu.parameters(), 'lr': lr / 10}, {'params': model.maxpool.parameters(), 'lr': lr / 10}, {'params': model.layer1.parameters(), 'lr': lr / 10}, {'params': model.layer2.parameters(), 'lr': lr / 10}, {'params': model.layer3.parameters(), 'lr': lr / 10}, {'params': model.layer4.parameters(), 'lr': lr / 10}, {'params': model.ll1.parameters()}, {'params': model.ll2.parameters()}, {'params': model.ll3.parameters()}, {'params': model.relu2.parameters()}, {'params': model.lstm.parameters()}, ] else: model_params = model.parameters() optimizer = get_optimizer(model_params, opti, lr, mom) print(model, flush=True) # checkpoint_filename = os.path.join(checkpoint_dir, 'warmup_model_{}.pth.tar'.format(experiment_name)) checkpoint_filename = os.path.join(checkpoint_dir, 'model_best_{}.pth.tar'.format(experiment_name)) ###### if resume and os.path.exists(checkpoint_filename): print('======',flush=True) # Don't load optimizer, otherwise LR might be too low already (yes? TODO) model, _, initial_epoch, replay_mem = load_checkpoint(model, optimizer, filename=checkpoint_filename, load_mem=True, checkpoint_dir=checkpoint_dir,experiment_name=experiment_name) # TODO better solution? if clip_val: criterion = nn.SmoothL1Loss() else: criterion = nn.MSELoss() if combi and recurrent <= 0: if feat_experiment_name == 'auto' or feat_experiment_name == 'resnet' or feat_experiment_name == 'resnet_pool': model_params = [ {'params': model.conv1.parameters(), 'lr': lr / 10}, {'params': model.bn1.parameters(), 'lr': lr / 10}, {'params': model.relu.parameters(), 'lr': lr / 10}, {'params': model.maxpool.parameters(), 'lr': lr / 10}, {'params': model.layer1.parameters(), 'lr': lr / 10}, {'params': model.layer2.parameters(), 'lr': lr / 10}, {'params': model.layer3.parameters(), 'lr': lr / 10}, {'params': model.layer4.parameters(), 'lr': lr / 10}, {'params': model.qnet.parameters()} ] else: model_params = [ {'params': model.features.parameters(), 'lr': lr / 10}, {'params': model.qnet.parameters()} ] elif combi and recurrent > 0: if toy: model_params = [ {'params': model.features.parameters(), 'lr': lr / 10}, {'params': model.ll1.parameters()}, {'params': model.ll2.parameters()}, {'params': model.ll3.parameters()}, {'params': model.relu2.parameters()}, {'params': model.lstm.parameters()}, ] else: model_params = [ {'params': model.conv1.parameters(), 'lr': lr / 10}, {'params': model.bn1.parameters(), 'lr': lr / 10}, {'params': model.relu.parameters(), 'lr': lr / 10}, {'params': model.maxpool.parameters(), 'lr': lr / 10}, {'params': model.layer1.parameters(), 'lr': lr / 10}, {'params': model.layer2.parameters(), 'lr': lr / 10}, {'params': model.layer3.parameters(), 'lr': lr / 10}, {'params': model.layer4.parameters(), 'lr': lr / 10}, {'params': model.ll1.parameters()}, {'params': model.ll2.parameters()}, {'params': model.ll3.parameters()}, {'params': model.relu2.parameters()}, {'params': model.lstm.parameters()}, ] else: model_params = model.parameters() optimizer = get_optimizer(model_params, opti, lr, mom) model_path = 'checkpoint_{}.pth.tar'.format(experiment_name) print('Loading model checkpointed at epoch {}/{}'.format(initial_epoch, num_epochs)) print('======',flush=True) else: print('======',flush=True) print('First things first',flush=True) model_path = 'checkpoint_{}.pth.tar'.format(experiment_name) replay_mem = None initial_epoch = 1 print('======',flush=True) # Always start training from beginning # initial_epoch = 1 if combi: train_env, val_env = get_envs(cfg_dict, model, rsyncing, toy, f_one=True) else: train_env, val_env = get_envs(cfg_dict, feature_model, rsyncing, toy, f_one=True) warmup_trainer = QNetTrainer(cfg_dict, model, train_env, experiment_name=experiment_name, log_dir=tensorboard_dir, checkpoint_dir=checkpoint_dir, checkpoint_filename=model_path, tau_schedule=tau_schedule_epochs, recurrent=recurrent, replay_mem=replay_mem) warmup_trainer.compile(loss=criterion, optimizer=optimizer) # Start Training # warmup_trainer.train( val_env=val_env, initial_epoch=initial_epoch, num_epochs=num_epochs, decreasing_eps=decreasing_eps, target_eps=target_eps, transitions_per_learning=transitions_per_learning, lr_schedule=lr_schedule) best_model_path = os.path.join(warmup_trainer.checkpoint_dir, 'model_best_{}.pth.tar'.format(experiment_name)) warmup_model_path = os.path.join(warmup_trainer.checkpoint_dir, 'warmup_model_{}.pth.tar'.format(experiment_name)) shutil.move(best_model_path, warmup_model_path)
def __init__(self): self.transforms = None self.index = 0 self.prev_index = -1 self.bridge = CvBridge() self.prev_seq = None self.just_started = True self.mode = rospy.get_param('~mode', 'stereo') self.plan = rospy.get_param('~plan', 0) self.return_mode = rospy.get_param('~return_mode', 0) print(self.mode, self.plan) params_file = 'real_sensor.json' # Planner self.planner = None if self.plan: self.planner = Planner(mode="real", params_file=params_file) # Params with open(params_file) as f: self.param = json.load(f) self.param["d_candi"] = img_utils.powerf(self.param["s_range"], self.param["e_range"], 64, 1.) # Gen Model Datum intrinsics = torch.tensor(self.param["intr_rgb"]).unsqueeze(0) / 4 intrinsics[0, 2, 2] = 1. intrinsics_up = torch.tensor(self.param["intr_rgb"]).unsqueeze(0) s_width = self.param["size_rgb"][0] / 4 s_height = self.param["size_rgb"][1] / 4 focal_length = np.mean( [intrinsics_up[0, 0, 0], intrinsics_up[0, 1, 1]]) h_fov = math.degrees( math.atan(intrinsics_up[0, 0, 2] / intrinsics_up[0, 0, 0]) * 2) v_fov = math.degrees( math.atan(intrinsics_up[0, 1, 2] / intrinsics_up[0, 1, 1]) * 2) pixel_to_ray_array = View.normalised_pixel_to_ray_array(\ width= int(s_width), height= int(s_height), hfov = h_fov, vfov = v_fov, normalize_z = True) pixel_to_ray_array_2dM = np.reshape( np.transpose(pixel_to_ray_array, axes=[2, 0, 1]), [3, -1]) pixel_to_ray_array_2dM = torch.from_numpy( pixel_to_ray_array_2dM.astype(np.float32)).unsqueeze(0) left_2_right = torch.tensor(self.param["left_2_right"]) if self.mode == "stereo" or self.mode == "stereo_lc": src_cam_poses = torch.cat( [left_2_right.unsqueeze(0), torch.eye(4).unsqueeze(0)]).unsqueeze(0) elif self.mode == "mono" or self.mode == "mono_lc": src_cam_poses = torch.cat( [torch.eye(4).unsqueeze(0), torch.eye(4).unsqueeze(0)]).unsqueeze(0) self.model_datum = dict() self.model_datum["intrinsics"] = intrinsics.cuda() self.model_datum["intrinsics_up"] = intrinsics_up.cuda() self.model_datum["unit_ray"] = pixel_to_ray_array_2dM.cuda() self.model_datum["src_cam_poses"] = src_cam_poses.cuda() self.model_datum["d_candi"] = self.param["d_candi"] self.model_datum["d_candi_up"] = self.param["d_candi"] self.model_datum["rgb"] = None self.model_datum["prev_output"] = None self.model_datum["prev_lc"] = None self.rgb_pinned = torch.zeros( (1, 2, 3, self.param["size_rgb"][1], self.param["size_rgb"][0])).float().pin_memory() self.dpv_pinned = torch.zeros( (1, 64, int(self.param["size_rgb"][1]), int(self.param["size_rgb"][0]))).float().pin_memory() self.pred_depth_pinned = torch.zeros( (int(self.param["size_rgb"][1]), int(self.param["size_rgb"][0]))).float().pin_memory() self.true_depth_pinned = torch.zeros( (int(self.param["size_rgb"][1]), int(self.param["size_rgb"][0]))).float().pin_memory() self.unc_pinned = torch.zeros(1, 64, int( self.param["size_rgb"][0])).float().pin_memory() __imagenet_stats = {'mean': [0.485, 0.456, 0.406],\ 'std': [0.229, 0.224, 0.225]} self.transformer = transforms.Normalize(**__imagenet_stats) # Load Model if self.mode == "stereo": model_name = 'default_stereo_ilim' elif self.mode == "mono": model_name = 'default_ilim' elif self.mode == "mono_lc": model_name = 'default_exp7_lc_ilim' elif self.mode == 'stereo_lc': model_name = 'default_stereo_exp7_lc_ilim' cfg_path = 'configs/' + model_name + '.json' model_path = '' with open(cfg_path) as f: self.cfg = EasyDict(json.load(f)) self.model = get_model(self.cfg, 0) epoch, weights = load_checkpoint('outputs/checkpoints/' + model_name + '/' + model_name + '_model_best.pth.tar') from collections import OrderedDict new_weights = OrderedDict() model_keys = list(self.model.state_dict().keys()) weight_keys = list(weights.keys()) for a, b in zip(model_keys, weight_keys): new_weights[a] = weights[b] weights = new_weights self.model.load_state_dict(weights) self.model = self.model.cuda() self.model.eval() print("Model Loaded") # ROS self.q_msg = deque([], 1) lth = ConsumerThread(self.q_msg, self.handle_msg) lth.setDaemon(True) lth.start() self.queue_size = 3 self.sync = functools.partial(ApproximateTimeSynchronizer, slop=0.01) self.left_camsub = message_filters.Subscriber( '/left_camera_resized/image_color_rect', sensor_msgs.msg.Image) self.right_camsub = message_filters.Subscriber( 'right_camera_resized/image_color_rect', sensor_msgs.msg.Image) self.depth_sub = message_filters.Subscriber( '/left_camera_resized/depth', sensor_msgs.msg.Image ) # , queue_size=self.queue_size, buff_size=2**24 self.ts = self.sync( [self.left_camsub, self.right_camsub, self.depth_sub], self.queue_size) self.ts.registerCallback(self.callback) self.prev_left_cammsg = None self.depth_pub = rospy.Publisher('ros_net/depth', sensor_msgs.msg.Image, queue_size=self.queue_size) self.depth_color_pub = rospy.Publisher('ros_net/depth_color', sensor_msgs.msg.Image, queue_size=self.queue_size) self.depth_lc_pub = rospy.Publisher('ros_net/depth_lc', sensor_msgs.msg.Image, queue_size=self.queue_size) self.dpv_pub = rospy.Publisher('ros_net/dpv_pub', TensorMsg, queue_size=self.queue_size) self.unc_pub = rospy.Publisher('ros_net/unc_pub', TensorMsg, queue_size=self.queue_size) self.debug_pub = rospy.Publisher('ros_net/debug', sensor_msgs.msg.Image, queue_size=self.queue_size) self.debug2_pub = rospy.Publisher('ros_net/debug2', sensor_msgs.msg.Image, queue_size=self.queue_size) self.sensed_pub = rospy.Publisher('ros_net/sensed_pub', TensorMsg, queue_size=self.queue_size)