def __init__(self, args, student_model, teacher_model, src_loader, trg_loader, val_loader, optimizer, teacher_optimizer): self.args = args self.student_model = student_model self.teacher_model = teacher_model self.src_loader = src_loader self.trg_loader = trg_loader self.val_loader = val_loader self.optimizer = optimizer self.teacher_optimizer = teacher_optimizer # Define Evaluator self.evaluator = Evaluator(args.nclass) # Define lr scheduler # self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, # args.epochs, len(trn_loader)) #self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[3, 6, 9, 12], gamma=0.5) #ft self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[20], gamma=0.5) self.best_pred = 0 self.init_weight = 0.98 # Define Saver self.saver = Saver(self.args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() self.evaluator = Evaluator(self.args.nclass)
def main(): conf = Conf() conf.suppress_random() device = conf.get_device() args = parse(conf) # ---- SAVER OLD NET TO RESTORE PARAMS saver_trinet = Saver( Path(args.trinet_folder).parent, Path(args.trinet_folder).name) old_params, old_hparams = saver_trinet.load_logs() args.backbone = old_params['backbone'] args.metric = old_params['metric'] train_loader, query_loader, gallery_loader, queryimg_loader, galleryimg_loader = \ get_dataloaders(args.dataset_name, conf.nas_path, device, args) num_pids = train_loader.dataset.get_num_pids() assert num_pids == old_hparams['num_classes'] net = get_model(args, num_pids).to(device) state_dict = torch.load( Path(args.trinet_folder) / 'chk' / args.trinet_chk_name) net.load_state_dict(state_dict) e = Evaluator(net, query_loader, gallery_loader, queryimg_loader, galleryimg_loader, device=device, data_conf=DATA_CONFS[args.dataset_name]) e.eval(None, 0, verbose=True, do_tb=False)
def read(folder): log.info('Reading pretrained network from {}'.format(folder)) saver = Saver(folder) ckpt_info = saver.get_ckpt_info() model_opt = ckpt_info['model_opt'] ckpt_fname = ckpt_info['ckpt_fname'] model_id = ckpt_info['model_id'] model = attn_model.get_model(model_opt) ctrl_cnn_nlayers = len(model_opt['ctrl_cnn_filter_size']) ctrl_mlp_nlayers = model_opt['num_ctrl_mlp_layers'] attn_cnn_nlayers = len(model_opt['attn_cnn_filter_size']) attn_mlp_nlayers = model_opt['num_attn_mlp_layers'] attn_dcnn_nlayers = len(model_opt['attn_dcnn_filter_size']) timespan = model_opt['timespan'] glimpse_mlp_nlayers = model_opt['num_glimpse_mlp_layers'] score_mlp_nlayers = 1 weights = {} sess = tf.Session() saver.restore(sess, ckpt_fname) output_list = [] for net, nlayers in zip(['ctrl_cnn', 'ctrl_mlp', 'glimpse_mlp', 'score_mlp', 'attn_cnn', 'attn_mlp', 'attn_dcnn'], [ctrl_cnn_nlayers, ctrl_mlp_nlayers, glimpse_mlp_nlayers, score_mlp_nlayers, attn_cnn_nlayers, attn_mlp_nlayers, attn_dcnn_nlayers]): for ii in xrange(nlayers): for w in ['w', 'b']: key = '{}_{}_{}'.format(net, w, ii) log.info(key) output_list.append(key) if net == 'ctrl_cnn' or net == 'attn_cnn' or net == 'attn_dcnn': for tt in xrange(timespan): for w in ['beta', 'gamma']: key = '{}_{}_{}_{}'.format(net, ii, tt, w) log.info(key) output_list.append(key) for net in ['ctrl_lstm']: for w in ['w_xi', 'w_hi', 'b_i', 'w_xf', 'w_hf', 'b_f', 'w_xu', 'w_hu', 'b_u', 'w_xo', 'w_ho', 'b_o']: key = '{}_{}'.format(net, w) log.info(key) output_list.append(key) output_var = [] for key in output_list: output_var.append(model[key]) output_var_value = sess.run(output_var) for key, value in zip(output_list, output_var_value): weights[key] = value log.info(key) log.info(value.shape) return weights
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define weight self.temporal_weight = args.temporal_weight self.spatial_weight = args.spatial_weight # Define network temporal_model = Model(name='vgg16_bn', num_classes=101, is_flow=True).get_model() spatial_model = Model(name='vgg16_bn', num_classes=101, is_flow=False).get_model() # Define Optimizer #optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) temporal_optimizer = torch.optim.Adam(temporal_model.parameters(), lr=args.temporal_lr) spatial_optimizer = torch.optim.Adam(spatial_model.parameters(), lr=args.spatial_lr) # Define Criterion self.temporal_criterion = nn.BCELoss().cuda() self.spatial_criterion = nn.BCELoss().cuda() self.temporal_model, self.temporal_optimizer = temporal_model, temporal_optimizer self.spatial_model, self.spatial_optimizer = spatial_model, spatial_optimizer # Define Evaluator self.top1_eval = Evaluator(self.nclass) # Using cuda if args.cuda: self.temporal_model = torch.nn.DataParallel( self.temporal_model, device_ids=self.args.gpu_ids) patch_replication_callback(self.temporal_model) self.temporal_model = self.temporal_model.cuda() self.spatial_model = torch.nn.DataParallel( self.spatial_model, device_ids=self.args.gpu_ids) patch_replication_callback(self.spatial_model) self.spatial_model = self.spatial_model.cuda() # Resuming checkpoint self.best_accuracy = 0.0 '''
def __init__(self, args): self.args = args # define Saver self.saver = Saver(args) self.saver.save_experiment_config() # define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary()
def __init__(self): init_seeds(opt.seed) self.best_pred = 0. self.cutoff = -1 # backbone reaches to cutoff layer # Define Saver self.saver = Saver(opt, hyp, mode='val') # visualize if opt.visualize: self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() if 'pw' not in opt.arc: # remove BCELoss positive weights hyp['cls_pw'] = 1. hyp['obj_pw'] = 1. self.img_size = opt.img_size # Define Dataloader self.val_dataset, self.val_loader = make_data_loader(opt, hyp, train=False) self.num_classes = self.val_dataset.num_classes self.vnb = len(self.val_loader) # Initialize model self.model = Darknet(opt.cfg, self.img_size, opt.arc).to(opt.device) self.model.nc = self.num_classes # attach number of classes to model self.model.arc = opt.arc # attach yolo architecture self.model.hyp = hyp # attach hyperparameters to model # load weight if os.path.isfile(opt.pre): print("=> loading checkpoint '{}'".format(opt.pre)) checkpoint = torch.load(opt.pre) self.epoch = checkpoint['epoch'] self.best_pred = checkpoint['best_pred'] self.model.load_state_dict(checkpoint['state_dict']) # self.optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( opt.pre, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opt.pre)) # Mixed precision training https://github.com/NVIDIA/apex if mixed_precision: self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level='O1', verbosity=0) # Initialize distributed training if len(opt.gpu_id) > 1: print("Using multiple gpu") self.model = torch.nn.DataParallel(self.model, device_ids=opt.gpu_id)
def __init__(self): self.best_pred = 1e6 # Define Saver self.saver = Saver(opt) self.saver.save_experiment_config() # visualize if opt.visualize: # vis_legend = ["Loss", "MAE"] # batch_plot = create_vis_plot(vis, 'Batch', 'Loss', 'batch loss', vis_legend[0:1]) # val_plot = create_vis_plot(vis, 'Epoch', 'result', 'val result', vis_legend[1:2]) # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Dataset dataloader self.train_dataset = SHTDataset(opt.train_dir, train=True) self.train_loader = DataLoader(self.train_dataset, num_workers=opt.workers, shuffle=True, batch_size=opt.batch_size) # must be 1 self.test_dataset = SHTDataset(opt.test_dir, train=False) self.test_loader = torch.utils.data.DataLoader( self.test_dataset, shuffle=False, batch_size=opt.batch_size ) # must be 1, because per image size is different torch.cuda.manual_seed(opt.seed) model = CSRNet() self.model = model.to(opt.device) if opt.resume: if os.path.isfile(opt.pre): print("=> loading checkpoint '{}'".format(opt.pre)) checkpoint = torch.load(opt.pre) opt.start_epoch = checkpoint['epoch'] self.best_pred = checkpoint['best_pred'] self.model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( opt.pre, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opt.pre)) if opt.use_mulgpu: self.model = torch.nn.DataParallel(self.model, device_ids=opt.gpu_id) self.criterion = nn.MSELoss(reduction='mean').to(opt.device) self.optimizer = torch.optim.SGD(self.model.parameters(), opt.lr, momentum=opt.momentum, weight_decay=opt.decay) # Define lr scheduler self.scheduler = lr_scheduler.MultiStepLR( self.optimizer, milestones=[round(opt.epochs * x) for x in opt.steps], gamma=opt.scales) self.scheduler.last_epoch = opt.start_epoch - 1
def __init__(self, args): self.args = args self.saver = Saver(args) self.saver.save_experiment_config() self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() self.logger = self.saver.create_logger() kwargs = {'num_workers': args.workers, 'pin_memory': False} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) self.model = EDCNet(args.rgb_dim, args.event_dim, num_classes=self.nclass, use_bn=True) train_params = [{'params': self.model.random_init_params(), 'lr': 10*args.lr, 'weight_decay': 10*args.weight_decay}, {'params': self.model.fine_tune_params(), 'lr': args.lr, 'weight_decay': args.weight_decay}] self.optimizer = torch.optim.Adam(train_params, lr=args.lr, weight_decay=args.weight_decay) if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.to(self.args.device) if args.use_balanced_weights: root_dir = Path.db_root_dir(args.dataset)[0] if isinstance(Path.db_root_dir(args.dataset), list) else Path.db_root_dir(args.dataset) classes_weights_path = os.path.join(root_dir, args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass, classes_weights_path) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.criterion_event = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode='event') self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader), warmup_epochs=5) self.evaluator = Evaluator(self.nclass, self.logger) self.saver.save_model_summary(self.model) self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location='cuda:0') args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) if args.ft: args.start_epoch = 0
def main(): tb.log('sizes/world', dist_utils.env_world_size()) dist_utils.setup_dist_backend(args) # load datasets, initialize classifiers, load model of natural variation trn_loader, val_loader, trn_samp, val_samp = get_loaders(args) model, criterion, optimizer = init_classifier(args) G = load_model(args, reverse=False) # create directory to save outputs and save images os.makedirs(args.save_path, exist_ok=True) save_images(trn_loader, val_loader, G, args) if args.delta_dim == 2: save_grid(trn_loader, G) # global start time for training start_time = datetime.now() # reload classifier from checkpoint if --resume flag is given if args.resume: reload_from_cpkt(model, optimizer, args) # Evaluate classifier on validation set and quit if args.evaluate: top1, top5 = validate(val_loader, model, criterion, 0, start_time) if args.local_rank == 0: save_eval_df(top1, top5, args) print(f'Top1: {top1} | Top5: {top5}') return if args.distributed: dist_utils.sync_processes(args) scheduler = Scheduler(optimizer, args, tb, log) saver = Saver(args, scheduler.tot_epochs) # main training loop best_top1 = 0. for epoch in range(args.start_epoch, scheduler.tot_epochs): if args.distributed is True: trn_samp.set_epoch(epoch) val_samp.set_epoch(epoch) train(trn_loader, model, criterion, optimizer, scheduler, epoch, G, args) top1, top5 = validate(val_loader, model, criterion, epoch, start_time) saver.update(top1, top5) time_diff = (datetime.now() - start_time).total_seconds() / 3600.0 log.event("~~epoch\t\thours\t\ttop1\t\ttop5") log.event( f"~~{epoch}\t\t{time_diff:.5f}\t\t{top1:.3f}\t\t{top5:.3f}\n") if top1 > best_top1: if is_rank0 is True: save_checkpoint(epoch, model, optimizer, args) best_top1 = top1
def __init__(self, name, opt, data_opt=None, model_opt=None, seed=1234): # Restore previously saved checkpoints. self.opt = opt self.name = name self.new_model_opt = None if self.opt['restore']: self.restore_options(opt, data_opt) if model_opt is not None: if 'finetune' in model_opt and model_opt['finetune']: self.model_opt['finetune'] = model_opt['finetune'] self.new_model_opt = model_opt self.step.reset() self.model_id = self.get_model_id() self.exp_folder = os.path.join(self.opt['results'], self.model_id) self.saver = Saver(self.exp_folder, model_opt=self.model_opt, data_opt=self.data_opt) self.exp_folder = opt['restore'] else: if self.opt['model_id']: self.model_id = self.opt['model_id'] else: self.model_id = self.get_model_id() if model_opt is None or data_opt is None: raise Exception( 'You need to specify model options and data options') self.model_opt = model_opt self.data_opt = data_opt self.step = StepCounter() self.exp_folder = os.path.join(self.opt['results'], self.model_id) self.saver = Saver(self.exp_folder, model_opt=self.model_opt, data_opt=self.data_opt) self.init_cmd_logger() self.sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) # Log arguments self.log.log_args() # Train loop options self.log.info('Building model') self.model = self.get_model() # Load dataset self.log.info('Loading dataset') self.dataset_name = self.data_opt['dataset'] self.dataset = self.get_dataset() self.init_model() self.init_logs()
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # PATH = args.path # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) # Define network model = SCNN(nclass=self.nclass,backbone=args.backbone,output_stride=args.out_stride,cuda = args.cuda) # Define Optimizer optimizer = torch.optim.SGD(model.parameters(),args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) # patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch']))
def restore_options(self, opt, data_opt): self.saver = Saver(opt['restore']) self.ckpt_info = self.saver.get_ckpt_info() self.model_opt = self.ckpt_info['model_opt'] if data_opt is None: self.data_opt = self.ckpt_info['data_opt'] else: self.data_opt = data_opt self.ckpt_fname = self.ckpt_info['ckpt_fname'] self.step = StepCounter(self.ckpt_info['step']) self.model_id = self.ckpt_info['model_id'] pass
def __init__(self, args, ori_img_lst, init_mask_lst): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() self.ori_img_lst = ori_img_lst # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.test_loader, self.nclass = make_data_loader_demo( args, args.test_folder, ori_img_lst, init_mask_lst, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn, use_iou=args.use_maskiou) self.model = model # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict'], strict=False) else: self.model.load_state_dict(checkpoint['state_dict'], strict=False) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
class buildModel(object): def __init__(self, para): self.args = para # Define Saver self.saver = Saver(para) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() self.train_loader, self.val_loader, self.test_loader, self.nclass = dataloader( para) # Define network model = DeepLab(num_classes=self.nclass, backbone=para.backbone, output_stride=para.out_stride, sync_bn=para.sync_bn, freeze_bn=para.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': para.lr }, { 'params': model.get_10x_lr_params(), 'lr': para.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=para.momentum, weight_decay=para.weight_decay, nesterov=para.nesterov) # Define Criterion self.criterion = SegmentationLosses( weight=None, cuda=True).build_loss(mode=para.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(para.lr_scheduler, para.lr, para.epochs, len(self.train_loader)) self.model = torch.nn.DataParallel(self.model) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0
def read(folder): log.info('Reading pretrained network from {}'.format(folder)) saver = Saver(folder) ckpt_info = saver.get_ckpt_info() model_opt = ckpt_info['model_opt'] ckpt_fname = ckpt_info['ckpt_fname'] model_id = ckpt_info['model_id'] model = box_model.get_model(model_opt) ctrl_cnn_nlayers = len(model_opt['ctrl_cnn_filter_size']) ctrl_mlp_nlayers = model_opt['num_ctrl_mlp_layers'] timespan = model_opt['timespan'] glimpse_mlp_nlayers = model_opt['num_glimpse_mlp_layers'] weights = {} sess = tf.Session() saver.restore(sess, ckpt_fname) output_list = [] for net, nlayers in zip(['ctrl_cnn', 'ctrl_mlp', 'glimpse_mlp'], [ctrl_cnn_nlayers, ctrl_mlp_nlayers, glimpse_mlp_nlayers]): for ii in xrange(nlayers): for w in ['w', 'b']: key = '{}_{}_{}'.format(net, w, ii) log.info(key) output_list.append(key) if net == 'ctrl_cnn': for tt in xrange(timespan): for w in ['beta', 'gamma']: key = '{}_{}_{}_{}'.format(net, ii, tt, w) log.info(key) output_list.append(key) for net in ['ctrl_lstm']: for w in ['w_xi', 'w_hi', 'b_i', 'w_xf', 'w_hf', 'b_f', 'w_xu', 'w_hu', 'b_u', 'w_xo', 'w_ho', 'b_o']: key = '{}_{}'.format(net, w) log.info(key) output_list.append(key) output_var = [] for key in output_list: output_var.append(model[key]) output_var_value = sess.run(output_var) for key, value in zip(output_list, output_var_value): weights[key] = value log.info(key) log.info(value.shape) return weights
def __init__(self): self.session_key = utils.random_key() self.screen_id = 0 self.recording = False self.right_track = Motor(RT_FWD_PIN, RT_BWD_PIN) self.left_track = Motor(LT_FWD_PIN, LT_BWD_PIN) self.vision = Vision() self.saver = Saver('data', self.session_key) self.vision.start() self.saver.start() threading.Timer(startup_time - 0.1, self.switch_recording).start() threading.Timer(startup_time, self.save_status).start()
def eval(self, saver: Saver, iteration: int, verbose: bool, do_tb: bool = True): if self.perform_x2v: cmc_scores_i2v, mAP_i2v = self.evaluate_i2v(verbose=verbose) if do_tb: saver.dump_metric_tb(mAP_i2v, iteration, 'i2v', f'mAP') self.tb_cmc(saver, cmc_scores_i2v, iteration, 'i2v') cmc_scores_v2v, mAP_v2v = self.evaluate_v2v(verbose=verbose) if do_tb: saver.dump_metric_tb(mAP_v2v, iteration, 'v2v', f'mAP') self.tb_cmc(saver, cmc_scores_v2v, iteration, 'v2v') if self.perform_x2i: cmc_scores_i2i, mAP_i2i = self.evaluate_i2i(verbose=verbose) if do_tb: saver.dump_metric_tb(mAP_i2i, iteration, 'i2i', f'mAP') self.tb_cmc(saver, cmc_scores_i2i, iteration, 'i2i') cmc_scores_v2i, mAP_v2i = self.evaluate_v2i(verbose=verbose) if do_tb: saver.dump_metric_tb(mAP_v2i, iteration, 'v2i', f'mAP') self.tb_cmc(saver, cmc_scores_v2i, iteration, 'v2i')
def __init__(self, args): self.args = args self.saver = Saver(args) self.saver.save_experiment_config() kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # self.model = OCRNet(self.nclass) self.model = build_model(2, [32, 32], '44330020') self.optimizer = torch.optim.SGD(self.model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) if args.use_balanced_weights: weight = torch.tensor([0.2, 0.8], dtype=torch.float32) else: weight = None self.criterion = SegmentationLosses( weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.evaluator = Evaluator(self.nclass) self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) if args.cuda: self.model = self.model.cuda() self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) if args.ft: args.start_epoch = 0
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Dataloader if args.dataset == 'Cityscapes': kwargs = {'num_workers': args.num_workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.num_class = make_data_loader(args, **kwargs) # Define network if args.net == 'resnet101': blocks = [2,4,23,3] fpn = FPN(blocks, self.num_class, back_bone=args.net) # Define Optimizer self.lr = self.args.lr if args.optimizer == 'adam': self.lr = self.lr * 0.1 optimizer = torch.optim.Adam(fpn.parameters(), lr=args.lr, momentum=0, weight_decay=args.weight_decay) elif args.optimizer == 'sgd': optimizer = torch.optim.SGD(fpn.parameters(), lr=args.lr, momentum=0, weight_decay=args.weight_decay) # Define Criterion if args.dataset == 'Cityscapes': weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode='ce') self.model = fpn self.optimizer = optimizer # Define Evaluator self.evaluator = Evaluator(self.num_class) # multiple mGPUs if args.mGPUs: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) # Using cuda if args.cuda: self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 self.lr_stage = [68, 93] self.lr_staget_ind = 0
def __init__(self): self.camera = Camera(camera_suffixes=Config.camera_suffixes) self.history = EpisodeHistory() self.gripper = Gripper('172.16.0.2', Config.gripper_speed, Config.gripper_force) self.robot = Robot('panda_arm', Config.general_dynamics_rel) self.saver = Saver(Config.database_url, Config.collection) self.current_bin = Config.start_bin self.md = MotionData().with_dynamics(1.0) self.overall_start = 0 self.last_after_images: Optional[List[OrthographicImage]] = None
def main(args): np.random.seed(2019) matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.learning_rate, args.optimizer, args.num_steps, args.num_blocks, args.num_components, args.beta1, args.beta2 ] dir_switches = [ "oversample" ] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = Logger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() runner_config = { cc.LOAD_PATH: args.load_path, cc.SAVER: saver, cc.LOGGER: logger, cc.OVERSAMPLE: args.oversample, cc.VALIDATION_FRACTION: args.validation_fraction, cc.VALIDATION_FREQ: args.validation_freq, cc.BATCH_SIZE: args.batch_size, cc.LOAD_MODEL_PATH: args.load_model_path, cc.NUM_STEPS: args.num_steps, cc.DATA_LIMIT: args.data_limit } model_config = { cc.HEIGHT: 30, cc.WIDTH: 3, cc.WEIGHT_DECAY: args.weight_decay, cc.OPTIMIZER: args.optimizer, cc.LEARNING_RATE: args.learning_rate, cc.NUM_BLOCKS: args.num_blocks, cc.NUM_COMPONENTS: args.num_components, cc.BETA0: args.beta0, cc.BETA1: args.beta1, cc.BETA2: args.beta2, cc.MIXTURES_MU_INIT_SD: args.mixtures_mu_init_sd, cc.MIXTURES_SD_INIT_MU: args.mixtures_sd_init_mu, cc.MIXTURES_SD_INIT_SD: args.mixtures_sd_init_sd, cc.ONE_Q_VALUE: False, cc.FC_ONLY: True } runner = LehnertGridworldGMMRunner(runner_config, model_config) runner.setup() runner.main_training_loop() if args.save_model: runner.save_model() runner.evaluate_and_visualize() runner.close_model_session()
def main(args): np.random.seed(2019) if not args.show_graphs: matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX, args.game, args.num_blocks, args.encoder_learning_rate, args.encoder_optimizer, args.num_steps ] dir_switches = [ "no_sample", "only_one_q_value" "disable_batch_norm" ] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() if args.game == constants.GAME_BREAKOUT: num_actions = 6 else: raise ValueError("Unknown game.") runner = QRunnerMinAtar( args.load_path, num_actions, logger, saver, args.num_blocks, args.encoder_learning_rate, args.weight_decay, args.encoder_optimizer, args.num_steps, disable_batch_norm=args.disable_batch_norm, disable_softplus=args.disable_softplus, no_sample=args.no_sample, only_one_q_value=args.only_one_q_value, validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries, load_model_path=args.load_model_path, zero_sd_after_training=args.zero_sd_after_training ) runner.setup() runner.main_training_loop() if args.save_model: runner.save_model() runner.evaluate_and_visualize() runner.close_model_session()
def read(folder): log.info('Reading pretrained network from {}'.format(folder)) saver = Saver(folder) ckpt_info = saver.get_ckpt_info() model_opt = ckpt_info['model_opt'] ckpt_fname = ckpt_info['ckpt_fname'] model_id = ckpt_info['model_id'] model = patch_model.get_model(model_opt) attn_cnn_nlayers = len(model_opt['attn_cnn_filter_size']) attn_mlp_nlayers = model_opt['num_attn_mlp_layers'] attn_dcnn_nlayers = len(model_opt['attn_dcnn_filter_size']) timespan = model_opt['timespan'] weights = {} sess = tf.Session() saver.restore(sess, ckpt_fname) output_list = [] for net, nlayers in zip( ['attn_cnn', 'attn_mlp', 'attn_dcnn'], [attn_cnn_nlayers, attn_mlp_nlayers, attn_dcnn_nlayers]): for ii in xrange(nlayers): for w in ['w', 'b']: key = '{}_{}_{}'.format(net, w, ii) log.info(key) output_list.append(key) if net == 'attn_cnn' or net == 'attn_dcnn': for tt in xrange(timespan): for w in ['beta', 'gamma']: key = '{}_{}_{}_{}'.format(net, ii, tt, w) if key in model: log.info(key) output_list.append(key) output_var = [] for key in output_list: output_var.append(model[key]) output_var_value = sess.run(output_var) for key, value in zip(output_list, output_var_value): weights[key] = value log.info(key) log.info(value.shape) return weights
class Trainer(object): def __init__(self,args): warnings.filterwarnings('ignore') assert torch.cuda.is_available() torch.backends.cudnn.benchmark = True model_fname = 'data/deeplab_{0}_{1}_v3_{2}_epoch%d.pth'.format(args.backbone, args.dataset, args.exp) if args.dataset == 'pascal': raise NotImplementedError elif args.dataset == 'cityscapes': kwargs = {'num_workers': args.workers, 'pin_memory': True, 'drop_last': True} dataset_loader, num_classes = dataloaders.make_data_loader(args, **kwargs) args.num_classes = num_classes elif args.dataset == 'marsh' : kwargs = {'num_workers': args.workers, 'pin_memory': True, 'drop_last': True} dataset_loader,val_loader, test_loader, num_classes = dataloaders.make_data_loader(args, **kwargs) args.num_classes = num_classes else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) if args.backbone == 'autodeeplab': model = Retrain_Autodeeplab(args) model.load_state_dict(torch.load(r"./run/marsh/deeplab-autodeeplab/model_best.pth.tar")['state_dict'], strict=False) else: raise ValueError('Unknown backbone: {}'.format(args.backbone)) optimizer = optim.SGD(model.module.parameters(), lr=args.base_lr, momentum=0.9, weight_decay=0.0001) if args.criterion == 'Ohem': args.thresh = 0.7 args.crop_size = [args.crop_size, args.crop_size] if isinstance(args.crop_size, int) else args.crop_size args.n_min = int((args.batch_size / len(args.gpu) * args.crop_size[0] * args.crop_size[1]) // 16) criterion = build_criterion(args) model = nn.DataParallel(model).cuda() ##mergee self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader #kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = dataset_loader,val_loader, test_loader, num_classes self.criterion = criterion self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler #self.scheduler = scheduler self.scheduler = LR_Scheduler("poly",args.lr, args.epochs, len(self.train_loader)) #removed None from second parameter.
def __init__(self, weight_path, resume, gpu_id): init_seeds(1) init_dirs("result") self.device = gpu.select_device(gpu_id) self.start_epoch = 0 self.best_mIoU = 0. self.epochs = cfg.TRAIN["EPOCHS"] self.weight_path = weight_path self.train_loader, self.val_loader, _, self.num_class = make_data_loader( ) self.model = DeepLab(num_classes=self.num_class, backbone="resnet", output_stride=16, sync_bn=False, freeze_bn=False).to(self.device) train_params = [{ 'params': self.model.get_1x_lr_params(), 'lr': cfg.TRAIN["LR_INIT"] }, { 'params': self.model.get_10x_lr_params(), 'lr': cfg.TRAIN["LR_INIT"] * 10 }] self.optimizer = optim.SGD(train_params, momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"]) self.criterion = SegmentationLosses().build_loss( mode=cfg.TRAIN["LOSS_TYPE"]) self.scheduler = LR_Scheduler(mode=cfg.TRAIN["LR_SCHEDULER"], base_lr=cfg.TRAIN["LR_INIT"], num_epochs=self.epochs, iters_per_epoch=len(self.train_loader)) self.evaluator = Evaluator(self.num_class) self.saver = Saver() self.summary = TensorboardSummary(os.path.join("result", "run")) if resume: self.__resume_model_weights()
def main(args): np.random.seed(2019) matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.learning_rate, args.optimizer, args.num_steps, "" if args.data_limit is None else "_{:d}_dl".format(args.data_limit) ] dir_switches = [ "oversample" ] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = Logger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() runner_config = { cc.LOAD_PATH: args.load_path, cc.SAVER: saver, cc.LOGGER: logger, cc.OVERSAMPLE: args.oversample, cc.VALIDATION_FRACTION: args.validation_fraction, cc.VALIDATION_FREQ: args.validation_freq, cc.BATCH_SIZE: args.batch_size, cc.LOAD_MODEL_PATH: args.load_model_path, cc.NUM_STEPS: args.num_steps, cc.DATA_LIMIT: args.data_limit } model_config = { cc.HEIGHT: args.height, cc.WIDTH: args.width, cc.WEIGHT_DECAY: args.weight_decay, cc.OPTIMIZER: args.optimizer, cc.LEARNING_RATE: args.learning_rate, cc.FC_ONLY: not args.conv, cc.DROPOUT_PROB: args.dropout_prob } runner = LehnertGridworldRunner(runner_config, model_config) runner.setup() runner.main_training_loop() if args.save_model: runner.save_model() runner.close_model_session()
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = "" matplotlib.use("pdf") saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.round_to, args.epsilon, "" if args.data_limit is None else "_{:d}_dl".format(args.data_limit) ] dir_switches = ["hard_t"] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = Logger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() model_config = { cc.HEIGHT: 30, cc.WIDTH: 3, cc.WEIGHT_DECAY: 0.0001, cc.OPTIMIZER: constants.OPT_ADAM, cc.LEARNING_RATE: 0.0005, cc.FC_ONLY: True, cc.DROPOUT_PROB: 0.0 } runner_config = { cc.LOAD_MODEL_PATH: args.load_model_path, cc.ROUND_TO: args.round_to, cc.HARD_T: args.hard_t, cc.SAVER: saver, cc.LOGGER: logger, cc.EPSILON: args.epsilon } runner = LehnertGridworldApproxPartitionRunner(runner_config, model_config) runner.setup() runner.main_training_loop() runner.evaluate_and_visualize()
def main(args): np.random.seed(2019) if not args.show_graphs: matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.num_pucks, args.grid_size, args.grid_size, args.num_blocks, args.num_components, args.beta0, args.beta1, args.beta2, args.encoder_learning_rate, args.encoder_optimizer, args.num_steps ] dir_switches = ["no_sample", "only_one_q_value", "gt_q_values", "disable_batch_norm"] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() runner = QGMMPriorRunner( args.load_path, args.grid_size, args.num_pucks, logger, saver, args.num_blocks, args.num_components, args.hiddens, args.encoder_learning_rate, args.beta0, args.beta1, args.beta2, args.weight_decay, args.encoder_optimizer, args.num_steps, disable_batch_norm=args.disable_batch_norm, disable_softplus=args.disable_softplus, no_sample=args.no_sample, only_one_q_value=args.only_one_q_value, gt_q_values=args.gt_q_values, disable_resize=args.disable_resize, oversample=args.oversample, validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries, load_model_path=args.load_model_path, include_goal_states=args.include_goal_states, q_values_noise_sd=args.q_values_noise_sd, new_dones=args.new_dones ) runner.setup() runner.main_training_loop() if args.save_model: runner.save_model() runner.evaluate_and_visualize() runner.close_model_session()
def initialize_model(self, args): self.args = args # Define Saver self.saver = Saver(self.args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_worker': self.args.worker, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( self.args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=self.args.backbone, output_stride=self.args.out_stride, sync_bn=self.args.sync_bn, freeze_bn=self.args.freeze_bn) # Define Evaluator self.evaluator = Evaluator(self.nclass) # Using cuda if self.args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() if not os.path.isfile(self.args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( self.args.resume)) checkpoint = torch.load(self.args.resume) self.args.start_epoch = checkpoint['epoch'] if self.args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) self.model.eval() self.evaluator.reset()
def main(merge_list=None): parser = argparse.ArgumentParser(description="ReID Baseline Training") parser.add_argument("--config_file", default="", help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) if merge_list: cfg.merge_from_list(merge_list) saver = Saver(cfg) if cfg.TEST.IF_ON: log_file = 'test-log.txt' else: log_file = 'train-log.txt' logger = setup_logger("reid_baseline", saver.save_dir, log_file) logger.setLevel(logging.INFO) if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) logger.info("Running with config:\n{}".format(cfg)) logger.info("=" * 20) # os.environ['CUDA_VISIBLE_DEVICES'] = '3' torch.cuda.set_device(cfg.GPU.DEVICE_ID) logger.info(f"Using GPU: {cfg.GPU.DEVICE_ID}") logger.info(f"CUDNN VERSION: {cudnn.version()}") cudnn.enabled = True cudnn.benchmark = True if cfg.GPU.IF_DETERMINISTIC: # using cuDNN cudnn.benchmark = False cudnn.deterministic = True torch.random.manual_seed(1024) torch.random.manual_seed(1024) torch.cuda.manual_seed(1024) # gpu torch.cuda.manual_seed_all(1024) np.random.seed(1024) # numpy random.seed(1024) # random and transforms torch.set_printoptions(precision=10) return cfg, saver
def testing_entropy(self): self.saver = Saver(self.args) self.saver.save_experiment_config() """ Define Tensorboard Summary """ self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() self.model.eval() self.evaluator_1.reset() self.evaluator_2.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output_1, avg_confidence, max_confidence = self.model.forward_testing_entropy( image) loss_1 = self.criterion(output_1, target) entropy = normalized_shannon_entropy(output_1) self.writer.add_scalar('avg_confidence/i', avg_confidence.item(), i) self.writer.add_scalar('max_confidence/i', max_confidence.item(), i) self.writer.add_scalar('entropy/i', entropy.item(), i) self.writer.add_scalar('loss/i', loss_1.item(), i) self.summary.visualize_image(self.writer, self.args.dataset, image, target_show, output_2, global_step) print('testing confidence') self.writer.close()
return args if __name__ == '__main__': # Command-line arguments args = parse_args() tf.set_random_seed(1234) saver = None train_opt = trainer.make_train_opt(args) model_opt_read = make_model_opt(args) data_opt = trainer.make_data_opt(args) # Restore previously saved checkpoints. if train_opt['restore']: saver = Saver(train_opt['restore']) ckpt_info = saver.get_ckpt_info() model_opt = ckpt_info['model_opt'] data_opt = ckpt_info['data_opt'] ckpt_fname = ckpt_info['ckpt_fname'] step = ckpt_info['step'] model_id = ckpt_info['model_id'] exp_folder = train_opt['restore'] model_opt['pretrain_cnn'] = None else: if train_opt['model_id']: model_id = train_opt['model_id'] else: model_id = trainer.get_model_id('ris_box') model_opt = model_opt_read step = 0
model_id = timestr = '{}-{:04d}{:02d}{:02d}{:02d}{:02d}{:02d}'.format( task_name, time_obj.year, time_obj.month, time_obj.day, time_obj.hour, time_obj.minute, time_obj.second) return model_id if __name__ == '__main__': # Command-line arguments args = _parse_args() tf.set_random_seed(1234) saver = None # Restore previously saved checkpoints. if args.restore: saver = Saver(args.restore) ckpt_info = saver.get_ckpt_info() model_opt = ckpt_info['model_opt'] data_opt = ckpt_info['data_opt'] ckpt_fname = ckpt_info['ckpt_fname'] step = ckpt_info['step'] model_id = ckpt_info['model_id'] exp_folder = args.restore else: model_id = get_model_id('rec_ins_segm') cnn_filter_size_all = [args.cnn_1_filter_size, args.cnn_2_filter_size, args.cnn_3_filter_size, args.cnn_4_filter_size, args.cnn_5_filter_size]
def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join(Path.db_root_dir(args.dataset), args.dataset+'_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
class Trainer(object): def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join(Path.db_root_dir(args.dataset), args.dataset+'_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() output = self.model(image) loss = self.criterion(output, target) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) self.writer.add_scalar('train/total_loss_iter', loss.item(), i + num_img_tr * epoch) # Show 10 * 3 inference results each epoch if i % (num_img_tr // 10) == 0: global_step = i + num_img_tr * epoch self.summary.visualize_image(self.writer, self.args.dataset, image, target, output, global_step) self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print('Loss: %.3f' % train_loss) if self.args.no_val: # save checkpoint every epoch is_best = False self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best) def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) loss = self.criterion(output, target) test_loss += loss.item() pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Acc', Acc, epoch) self.writer.add_scalar('val/Acc_class', Acc_class, epoch) self.writer.add_scalar('val/fwIoU', FWIoU, epoch) print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format(Acc, Acc_class, mIoU, FWIoU)) print('Loss: %.3f' % test_loss) new_pred = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best)