def train(cfg, logger): model = build_model(cfg) device = cfg.MODEL.DEVICE optimizer, lr_schedule = make_optimizer(cfg, model) metric_fc = None loss_fn = get_loss_fn(cfg, logger) logger.info("----------------------------------------------") train_loader = make_data_loader(cfg, is_train=True) val_loader = make_data_loader(cfg, is_train=False) loss_fn2 = torch.nn.MSELoss() do_train( cfg, model, metric_fc, train_loader, val_loader, optimizer, lr_schedule, loss_fn, loss_fn2, logger, )
def main(): # 读取配置文件 with open('config/default.yml') as fin: config = yaml.load(fin, Loader=yaml.SafeLoader) # 生成 train 和 valid 数据集 train_config = config['dataset']['train'] train_df = pd.read_csv(train_config['data_path'], sep='\t') train_df.sample(frac=1) train, valid = train_test_split(train_df, test_size=config['train_valid_split']) train_dataset = build_dataloader(train, train_config, device=device) valid_dataset = build_dataloader(valid, train_config, device=device) # 建立模型 model_config = config['model'] model = BertClassifier(model_config) model.to(device) optimizer = build_optimizer(model, config['optimizer']) # 计算训练步数 num_train_steps = int( len(train_dataset) / train_dataset.batch_size * config['num_epochs']) num_warmup_steps = int(num_train_steps * config['optimizer']['warmup_proportion']) scheduler = build_scheduler(optimizer, num_train_steps, num_warmup_steps) # 训练 trainer.do_train(model, train_loader=train_dataset, valid_loader=valid_dataset, optimizer=optimizer, scheduler=scheduler, cfg=config)
def train(cfg): logger = setup_logger(name='Train', level=cfg.LOGGER.LEVEL) logger.info(cfg) model = build_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) #model.to(cuda_device = 'cuda:9') criterion = build_loss(cfg) optimizer = build_optimizer(cfg, model) scheduler = build_lr_scheduler(cfg, optimizer) train_loader = build_data(cfg, is_train=True) val_loader = build_data(cfg, is_train=False) logger.info(train_loader.dataset) logger.info(val_loader.dataset) arguments = dict() arguments["iteration"] = 0 checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD checkpointer = Checkpointer(model, optimizer, scheduler, cfg.SAVE_DIR) do_train(cfg, model, train_loader, val_loader, optimizer, scheduler, criterion, checkpointer, device, checkpoint_period, arguments, logger)
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) if cfg.SOLVER.FINETUNE: model.load_state_dict(torch.load(cfg.TEST.WEIGHT).module.state_dict()) model = nn.DataParallel(model) optimizer = make_optimizer(cfg, model) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) # scheduler = WarmupStepLR(optimizer,3, 9, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) loss_func = make_loss(cfg) arguments = {} do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, loss_func, num_query )
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes, clustering_loader = make_data_loader( cfg) # prepare model model = build_model(cfg, num_classes) if cfg.MODEL.IF_WITH_CENTER == 'on': loss_func, center_criterion_part, center_criterion_global, center_criterion_fore = make_loss_with_center( cfg, num_classes) optimizer, optimizer_center = make_optimizer_with_center( cfg, model, center_criterion_part, center_criterion_global, center_criterion_fore) else: loss_func = make_loss(cfg, num_classes) optimizer = make_optimizer(cfg, model) # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print('Only support pretrain_choice for imagenet, but got {}'.format( cfg.MODEL.PRETRAIN_CHOICE)) if cfg.MODEL.IF_WITH_CENTER == 'on': do_train_with_center( cfg, model, center_criterion_part, center_criterion_global, center_criterion_fore, train_loader, val_loader, optimizer, optimizer_center, scheduler, # modify for using self trained model loss_func, num_query, start_epoch, # add for using self trained model clustering_loader) else: do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, num_query, start_epoch, # add for using self trained model clustering_loader)
def train(cfg, local_rank, distributed, logger=None, tblogger=None, transfer_weight=False, change_lr=False): device = torch.device('cuda') # create model logger.info('Creating model "{}"'.format(cfg.MODEL.ARCHITECTURE)) model = build_model(cfg).to(device) criterion = torch.nn.CrossEntropyLoss(ignore_index=255).to(device) optimizer = make_optimizer(cfg, model) # model, optimizer = apex.amp.initialize(model, optimizer, opt_level='O2') scheduler = make_lr_scheduler(cfg, optimizer) if distributed: # model = apex.parallel.DistributedDataParallel(model) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=True, ) save_to_disk = get_rank() == 0 # checkpoint arguments = {} arguments['iteration'] = 0 arguments['best_iou'] = 0 checkpointer = Checkpointer(model, optimizer, scheduler, cfg.LOGS.DIR, save_to_disk, logger) extra_checkpoint_data = checkpointer.load( f=cfg.MODEL.WEIGHT, model_weight_only=transfer_weight, change_scheduler=change_lr) arguments.update(extra_checkpoint_data) # data_loader logger.info('Loading dataset "{}"'.format(cfg.DATASETS.TRAIN)) data_loader = make_data_loader(cfg, 'train', distributed) data_loader_val = make_data_loader(cfg, 'val', distributed) do_train(cfg, model=model, data_loader=data_loader, optimizer=optimizer, scheduler=scheduler, criterion=criterion, checkpointer=checkpointer, device=device, arguments=arguments, tblogger=tblogger, data_loader_val=data_loader_val, distributed=distributed)
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) total = sum([param.nelement() for param in model.parameters()]) print("Number of parameter: %.2fM" % (total / 1e6)) if cfg.MODEL.METRIC_LOSS_TYPE == 'triplet': print('Train without center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) optimizer = make_optimizer(cfg, model) loss_func = make_loss(cfg, num_classes) # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print( 'Only support pretrain_choice for imagenet and self, but got {}' .format(cfg.MODEL.PRETRAIN_CHOICE)) do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, num_query, start_epoch # add for using self trained model )
def train(cfg, local_rank): # prepare dataset tng_loader, val_loader, num_classes, num_query = get_dataloader(cfg) do_train( cfg, local_rank, tng_loader, val_loader, num_classes, num_query, )
def train(cfg): model = build_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = build_optimizer(cfg, model) scheduler = build_lr_scheduler(cfg, optimizer) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = cfg.SAVE_TO_DISK checkpointer = Checkpointer(model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader = build_data_loader( cfg, is_train=True, start_iter=arguments["iteration"], ) test_period = cfg.SOLVER.TEST_PERIOD if test_period > 0: data_loader_val = build_data_loader(cfg, is_train=False, is_for_period=True) else: data_loader_val = None checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD do_train( cfg, model, data_loader, data_loader_val, optimizer, scheduler, checkpointer, device, checkpoint_period, test_period, arguments, ) return model
def train(cfg, cfg_hr): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, cfg_hr, num_classes) model = nn.DataParallel(model) if cfg.MODEL.IF_WITH_CENTER == 'no': print('Train without center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) optimizer = make_optimizer(cfg, model) print(cfg.SOLVER.MARGIN) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) loss_func = make_loss(cfg, num_classes) # modified by gu arguments = {} do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, loss_func, num_query ) elif cfg.MODEL.IF_WITH_CENTER == 'yes': print('Train with center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) loss_func, center_criterion = make_loss_with_center(cfg, num_classes) # modified by gu optimizer, optimizer_center = make_optimizer_with_center(cfg, model, center_criterion) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) arguments = {} do_train_with_center( cfg, model, center_criterion, train_loader, val_loader, optimizer, optimizer_center, scheduler, loss_func, num_query ) else: print("Unsupported value for cfg.MODEL.IF_WITH_CENTER {}, only support yes or no!\n".format(cfg.MODEL.IF_WITH_CENTER))
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) print('Train without center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) optimizer = make_optimizer(cfg, model) loss_func = make_loss(cfg, num_classes) if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) if cfg.MODEL.DEVICE == "cuda": for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: start_epoch = 0 print('Only support pretrain_choice for imagenet and self, but got {}'. format(cfg.MODEL.PRETRAIN_CHOICE)) arguments = {} do_train(cfg, model, train_loader, val_loader, optimizer, scheduler, loss_func, num_query, start_epoch)
def train(cfg): model = build_model(cfg) data_rows_num = get_data_rows_num(cfg) k_fold = KFold(n_splits=10, shuffle=True, random_state=1) n_fold = 1 for train_idx, val_idx in k_fold.split( [i for i in range(1, data_rows_num)]): optimizer = make_optimizer(cfg, model) train_loader = make_data_loader(cfg, train_idx, is_train=True) val_loader = make_data_loader(cfg, val_idx, is_train=True) loss_functions = [bce_with_logits_loss, bce_with_logits_loss] do_train(cfg, model, train_loader, val_loader, optimizer, loss_functions, n_fold) n_fold += 1 pass
def train(cfg): model = build_model(cfg) device = cfg.MODEL.DEVICE optimizer = make_optimizer(cfg, model) train_loader = make_data_loader(cfg, csv = cfg.DATASETS.TRAIN, is_train=True) do_train( cfg, model, train_loader, optimizer, losses = [F.mse_loss, F.nll_loss], )
def train(cfg, saver): dataset_name = [cfg.DATASET.NAME] if cfg.JOINT.IF_ON: for name in cfg.JOINT.DATASET_NAME: dataset_name.append(name) train_loader, num_classes = make_train_data_loader_with_expand( cfg, dataset_name) else: train_loader, num_classes = make_train_data_loader( cfg, dataset_name[0]) valid = make_multi_valid_data_loader(cfg, dataset_name) train_component = TrainComponent(cfg, num_classes) do_train(cfg, train_loader, valid, train_component, saver)
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes, dataset = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) print('Train without center loss, the loss type is', cfg['MODEL.METRIC_LOSS_TYPE']) optimizer = make_optimizer(cfg, model) loss_func = make_loss(cfg, num_classes, model.in_planes) # modified by gu # Add for using self trained model if cfg['MODEL.PRETRAIN_CHOICE'] == 'continue': #start_epoch = eval(cfg['MODEL.PRETRAIN_PATH'].split('/')[-1].split('.')[0].split('_')[-1]) start_epoch = 0 print('Start epoch:', start_epoch) path_to_optimizer = cfg['MODEL.PRETRAIN_PATH'].replace('model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) model.load_param(cfg['MODEL.PRETRAIN_PATH']) #optimizer = make_optimizer(cfg, model) scheduler = WarmupMultiStepLR(optimizer, cfg['SOLVER.STEPS'], cfg['SOLVER.GAMMA'], cfg['SOLVER.WARMUP_FACTOR'], cfg['SOLVER.WARMUP_ITERS'], cfg['SOLVER.WARMUP_METHOD']) elif cfg['MODEL.PRETRAIN_CHOICE'] == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg['SOLVER.STEPS'], cfg['SOLVER.GAMMA'], cfg['SOLVER.WARMUP_FACTOR'], cfg['SOLVER.WARMUP_ITERS'], cfg['SOLVER.WARMUP_METHOD']) elif cfg['MODEL.PRETRAIN_CHOICE'] == 'self' or cfg['MODEL.PRETRAIN_CHOICE'] == 'self-no-head': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg['SOLVER.STEPS'], cfg['SOLVER.GAMMA'], cfg['SOLVER.WARMUP_FACTOR'], cfg['SOLVER.WARMUP_ITERS'], cfg['SOLVER.WARMUP_METHOD']) else: print('Only support pretrain_choice for imagenet and self, but got {}'.format(cfg['MODEL.PRETRAIN_CHOICE'])) do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, num_query, start_epoch, # add for using self trained model dataset )
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) optimizer = make_optimizer(cfg, model) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) loss_func = make_loss(cfg) arguments = {} do_train(cfg, model, train_loader, val_loader, optimizer, scheduler, loss_func, num_query)
def train(cfg, saver): dataset_name = [cfg.DATASET.NAME] if cfg.JOINT.IF_ON: for name in cfg.JOINT.DATASET_NAME: dataset_name.append(name) train_loader, num_classes = make_train_data_loader_with_expand( cfg, dataset_name) else: train_loader, num_classes = make_train_data_loader( cfg, dataset_name[0]) valid_dict = make_multi_valid_data_loader(cfg, dataset_name) train_component = TrainComponent(cfg, num_classes) saver.checkpoint_params['model'] = train_component.model do_train(cfg, train_loader, valid_dict, train_component, saver)
def train(cfg): model = build_fcn_model(cfg) optimizer = make_optimizer(cfg, model) arguments = {} data_loader = make_data_loader(cfg, is_train=True) val_loader = make_data_loader(cfg, is_train=False) do_train( cfg, model, data_loader, val_loader, optimizer, cross_entropy2d, )
def cluster_train(self, train_loader, num_classes): tr_comp = TrainComponent(self.cfg, num_classes) load_weight(self.model, tr_comp.model) del self.model if self.device == 'cuda': torch.cuda.empty_cache() do_train(self.cfg, tr_comp.model, train_loader, self.val_loader, tr_comp.optimizer, tr_comp.scheduler, tr_comp.loss_function, self.num_query, self.saver, center_criterion=tr_comp.loss_center, optimizer_center=tr_comp.optimizer_center) self.model = tr_comp.model
def train(cfg): # prepare dataset 训练集,验证集,验证集大小,行人类别数量 train_loader, val_loader, num_query, num_classes = get_data(cfg) # prepare model model = Baseline(num_classes, cfg.MODEL.LAST_STRIDE, cfg.MODEL.PRETRAIN_PATH, cfg.MODEL.NAME, cfg.MODEL.PRETRAIN_CHOICE, cfg.MODEL.BREACH) print('Train with the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) # 损失函数为ranked_loss optimizer = make_optimizer(cfg, model) loss_func = make_loss(cfg, num_classes) # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print('Only support pretrain_choice for imagenet and self, but got {}'. format(cfg.MODEL.PRETRAIN_CHOICE)) do_train(cfg, model, train_loader, val_loader, optimizer, scheduler, loss_func, num_query, start_epoch)
def train(cfg): model = build_model(cfg).to(cfg.MODEL.DEVICE) device = cfg.MODEL.DEVICE optimizer = make_optimizer(cfg, model) scheduler = None arguments = {} train_loader = make_data_loader(cfg, is_train=True) val_loader = make_data_loader(cfg, is_train=False) do_train( cfg, model, train_loader, val_loader, optimizer, None, F.mse_loss, )
def train(cfg): # prepare dataset data_bunch, test_labels, num_query = get_data_bunch(cfg) # prepare model model = build_model(cfg, data_bunch.c) if cfg.SOLVER.OPT == 'adam': opt_fns = partial(torch.optim.Adam) elif cfg.SOLVER.OPT == 'sgd': opt_fns = partial(torch.optim.SGD, momentum=0.9) else: raise NameError(f'optimizer {cfg.SOLVER.OPT} not support') def lr_multistep(start: float, end: float, pct: float): warmup_factor = 1 gamma = cfg.SOLVER.GAMMA milestones = [ 1.0 * s / cfg.SOLVER.MAX_EPOCHS for s in cfg.SOLVER.STEPS ] warmup_iter = 1.0 * cfg.SOLVER.WARMUP_ITERS / cfg.SOLVER.MAX_EPOCHS if pct < warmup_iter: alpha = pct / warmup_iter warmup_factor = cfg.SOLVER.WARMUP_FACTOR * (1 - alpha) + alpha return start * warmup_factor * gamma**bisect_right(milestones, pct) lr_sched = Scheduler(cfg.SOLVER.BASE_LR, cfg.SOLVER.MAX_EPOCHS, lr_multistep) loss_func = reidLoss(cfg.SOLVER.LOSSTYPE, cfg.SOLVER.MARGIN, data_bunch.c) do_train( cfg, model, data_bunch, test_labels, opt_fns, lr_sched, loss_func, num_query, )
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) if cfg.MODEL.IF_WITH_CENTER == 'no': print('Train without center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) optimizer = make_optimizer(cfg, model) # scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) loss_func = make_loss(cfg, num_classes) # modified by gu # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print( 'Only support pretrain_choice for imagenet and self, but got {}' .format(cfg.MODEL.PRETRAIN_CHOICE)) arguments = {} do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, num_query, start_epoch # add for using self trained model ) elif cfg.MODEL.IF_WITH_CENTER == 'yes': print('Train with center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) loss_func, center_criterion = make_loss_with_center( cfg, num_classes) # modified by gu optimizer, optimizer_center = make_optimizer_with_center( cfg, model, center_criterion) # scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) arguments = {} # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) path_to_center_param = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'center_param') print('Path to the checkpoint of center_param:', path_to_center_param) path_to_optimizer_center = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer_center') print('Path to the checkpoint of optimizer_center:', path_to_optimizer_center) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) center_criterion.load_state_dict(torch.load(path_to_center_param)) optimizer_center.load_state_dict( torch.load(path_to_optimizer_center)) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print( 'Only support pretrain_choice for imagenet and self, but got {}' .format(cfg.MODEL.PRETRAIN_CHOICE)) do_train_with_center( cfg, model, center_criterion, train_loader, val_loader, optimizer, optimizer_center, scheduler, # modify for using self trained model loss_func, num_query, start_epoch # add for using self trained model ) else: print( "Unsupported value for cfg.MODEL.IF_WITH_CENTER {}, only support yes or no!\n" .format(cfg.MODEL.IF_WITH_CENTER))
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes, num_classes2, image_map_label2 = make_data_loader( cfg) #print('\n\n*** image_map_label2:') # prepare model model = build_model(cfg, num_classes, num_classes2) #print(list(model.children())) #print(model.state_dict().keys()) #exit(0) #print('model.named_children(): \n\n', model.named_children()) ''' kk = 1 for name, child in model.base.named_children(): print(kk, name) kk += 1 print(len(list(model.base.children()))) exit(0) for i in range(len(list(model.base.children()))): print(' +++', i+1) print(list(model.base.children())[i]) exit(0) ''' if len(cfg.MODEL.PRETRAIN_PATH2) > 5: print('--- resume from ', cfg.MODEL.PRETRAIN_PATH2) #model.load_param(cfg.MODEL.PRETRAIN_PATH) #model.loiad_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH2, map_location=lambda storage, loc: storage)) if cfg.MODEL.ONCE_LOAD == 'yes': print('\n---ONCE_LOAD...\n') model.load_state_dict( torch.load(cfg.MODEL.PRETRAIN_PATH2, map_location=lambda storage, loc: storage)) #if cfg.MODEL.FREEZE_BASE == 'yes': # functions.freeze_layer(model, 'base', False) #functions.freeze_global_model(model, False) else: functions.load_state_dict_distill(model, cfg.MODEL.PRETRAIN_PATH2, cfg.MODEL.ONLY_BASE, cfg.MODEL.WITHOUT_FC) print('**** Successfully load ', cfg.MODEL.PRETRAIN_PATH2) if cfg.MODEL.FREEZE_BASE: #functions.freeze_layer(model, 'base', False) functions.freeze_global_model(model, False) if cfg.MODEL.IF_WITH_CENTER == 'no': print('Train without center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) if cfg.SOLVER.MY_OPTIMIZER == "yes": print('---* my optimizer:', cfg.SOLVER.MY_OPTIMIZER_NAME) other_params = [ p for n, p in model.named_parameters() if not n.startswith('base') ] optimizer = optim.SGD([{ 'params': model.base.parameters(), 'lr': cfg.SOLVER.LR / 10 }, { 'params': other_params, 'lr': cfg.SOLVER.LR }], momentum=0.9, weight_decay=5e-4, nesterov=True) else: print('---* not my optimizer') optimizer = make_optimizer(cfg, model) # scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) #_C.SOLVER.MY_SCHEDULER = "no" #_C.SOLVER.MY_WARMUP = "no" loss_func = make_loss(cfg, num_classes) # modified by gu # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 if cfg.SOLVER.MY_SCHEDULER == "yes": print('cfg.SOLVER.MY_SCHEDULER_STEP:', cfg.SOLVER.MY_SCHEDULER_STEP) print('---* my scheduler: ', cfg.SOLVER.MY_SCHEDULER_NAME) if cfg.SOLVER.MY_SCHEDULER_NAME == 'SL': scheduler = lr_scheduler.StepLR( optimizer, step_size=cfg.SOLVER.MY_SCHEDULER_STEP[0], gamma=0.1) elif cfg.SOLVER.MY_SCHEDULER_NAME == 'MSL': scheduler = lr_scheduler.MultiStepLR( optimizer, cfg.SOLVER.MY_SCHEDULER_STEP, gamma=0.1) else: print(cfg.SOLVER.MY_SCHEDULER_NAME, ' not found!') eixt(0) else: print('---* not my scheduler') scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print( 'Only support pretrain_choice for imagenet and self, but got {}' .format(cfg.MODEL.PRETRAIN_CHOICE)) arguments = {} print('************ do_train') do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, num_query, start_epoch, # add for using self trained model image_map_label2, num_classes2) # elif cfg.MODEL.IF_WITH_CENTER == 'yes': # print('Train with center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) # loss_func, center_criterion = make_loss_with_center(cfg, num_classes) # modified by gu # optimizer, optimizer_center = make_optimizer_with_center(cfg, model, center_criterion) # # scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, # # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) # # arguments = {} # # # Add for using self trained model # if cfg.MODEL.PRETRAIN_CHOICE == 'self': # start_epoch = eval(cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_')[-1]) # print('Start epoch:', start_epoch) # path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace('model', 'optimizer') # print('Path to the checkpoint of optimizer:', path_to_optimizer) # path_to_optimizer_center = cfg.MODEL.PRETRAIN_PATH.replace('model', 'optimizer_center') # print('Path to the checkpoint of optimizer_center:', path_to_optimizer_center) # model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) # optimizer.load_state_dict(torch.load(path_to_optimizer)) # optimizer_center.load_state_dict(torch.load(path_to_optimizer_center)) # scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) # elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': # start_epoch = 0 # scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) # else: # print('Only support pretrain_choice for imagenet and self, but got {}'.format(cfg.MODEL.PRETRAIN_CHOICE)) # # do_train_with_center( # cfg, # model, # center_criterion, # train_loader, # val_loader, # optimizer, # optimizer_center, # scheduler, # modify for using self trained model # loss_func, # num_query, # start_epoch # add for using self trained model # ) else: print( "Unsupported value for cfg.MODEL.IF_WITH_CENTER {}, only support yes or no!\n" .format(cfg.MODEL.IF_WITH_CENTER))
def train(cfg): # prepare dataset # train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) train_loader, val_loader, num_query, num_classes = make_data_loader_train( cfg) # prepare model if 'prw' in cfg.DATASETS.NAMES: num_classes = 483 elif "market1501" in cfg.DATASETS.NAMES: num_classes = 751 elif "duke" in cfg.DATASETS.NAMES: num_classes = 702 elif "cuhk" in cfg.DATASETS.NAMES: num_classes = 5532 model = build_model(cfg, num_classes) if cfg.MODEL.IF_WITH_CENTER == 'no': print('Train without center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) optimizer = make_optimizer(cfg, model) # scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) loss_func = make_loss(cfg, num_classes) # modified by gu # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': # start_epoch = eval(cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_')[-1]) start_epoch = 0 print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) pretrained_dic = torch.load(cfg.MODEL.PRETRAIN_PATH).state_dict() model_dict = model.state_dict() model_dict.update(pretrained_dic) model.load_state_dict(model_dict) if cfg.MODEL.WHOLE_MODEL_TRAIN == "no": for name, value in model.named_parameters(): if "Query_Guided_Attention" not in name and "non_local" not in name and "classifier_attention" not in name: value.requires_grad = False optimizer = make_optimizer(cfg, model) # else: # cfg.SOLVER.BASE_LR = 0.0000035 # optimizer.load_state_dict(torch.load(path_to_optimizer)) # ##### # for state in optimizer.state.values(): # for k, v in state.items(): # if isinstance(v, torch.Tensor): # state[k] = v.cuda() # ##### scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print( 'Only support pretrain_choice for imagenet and self, but got {}' .format(cfg.MODEL.PRETRAIN_CHOICE)) arguments = {} do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, num_query, start_epoch # add for using self trained model ) elif cfg.MODEL.IF_WITH_CENTER == 'yes': print('Train with center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) loss_func, center_criterion = make_loss_with_center( cfg, num_classes) # modified by gu optimizer, optimizer_center = make_optimizer_with_center( cfg, model, center_criterion) # scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) arguments = {} # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) path_to_center_param = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'center_param') print('Path to the checkpoint of center_param:', path_to_center_param) path_to_optimizer_center = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer_center') print('Path to the checkpoint of optimizer_center:', path_to_optimizer_center) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) ##### for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() ##### center_criterion.load_state_dict(torch.load(path_to_center_param)) optimizer_center.load_state_dict( torch.load(path_to_optimizer_center)) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print( 'Only support pretrain_choice for imagenet and self, but got {}' .format(cfg.MODEL.PRETRAIN_CHOICE)) do_train_with_center( cfg, model, center_criterion, train_loader, val_loader, optimizer, optimizer_center, scheduler, # modify for using self trained model loss_func, num_query, start_epoch # add for using self trained model ) else: print( "Unsupported value for cfg.MODEL.IF_WITH_CENTER {}, only support yes or no!\n" .format(cfg.MODEL.IF_WITH_CENTER))
if not os.path.exists(args.tensorboard_logs_path): os.mkdir(args.tensorboard_logs_path) #else: #clean_dir(args.tensorboard_logs_path) #dataloader train_loader, test_loader = make_data_loader( args.dataset, args.datapath, preprocess_all[cfg['preprocess']], args.num_workers, args.batch_size, cfg['ratio']) logger = setup_logger('face_recognition', args.output_dir) logger.info('pid {}, {}'.format(threading.currentThread().ident, args)) logger.info('pid {} running with config {}'.format( threading.currentThread().ident, cfg)) #model creterion = make_creterion(cfg, args) model = make_model(cfg['model']['name'], cfg['model']['layers'], cfg['num_embeddings'], cfg['num_class']) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.resume is not None: model, optimizer, cfg['start_iter'] = load_model( model, optimizer, args) # else: # model.backbone.apply(weights_init) do_train(model, creterion, optimizer, train_loader, test_loader, args, cfg)
def train(cfg, local_rank, distributed): logger = logging.getLogger(cfg.NAME) # build model model = build_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) # build solver optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) if distributed: model = DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {"iteration": 0} save_dir = os.path.join(cfg.CHECKPOINTER.DIR, cfg.CHECKPOINTER.NAME) save_to_disk = get_rank() == 0 checkpointer = Checkpointer( model=model, optimizer=optimizer, scheduler=scheduler, save_dir=save_dir, save_to_disk=save_to_disk, logger=logger ) extra_checkpoint_data = checkpointer.load(cfg.CHECKPOINTER.LOAD_NAME) arguments.update(extra_checkpoint_data) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) evaluate = cfg.SOLVER.EVALUATE if evaluate: synchronize() data_loader_val = make_data_loader(cfg, is_train=False, is_distributed=distributed, is_for_period=True) synchronize() else: data_loader_val = None save_to_disk = get_rank() == 0 if cfg.SUMMARY_WRITER and save_to_disk: save_dir = os.path.join(cfg.CHECKPOINTER.DIR, cfg.CHECKPOINTER.NAME) summary_writer = make_summary_writer(cfg.SUMMARY_WRITER, save_dir, model_name=cfg.MODEL.NAME) else: summary_writer = None do_train( cfg, model, data_loader, data_loader_val, optimizer, scheduler, checkpointer, device, arguments, summary_writer ) return model
def main(): parser = argparse.ArgumentParser(description="Baidu URFC") parser.add_argument("-p", dest="root_path", help="path to data") parser.add_argument( "-o", dest="output_dir", help="output dir", default="ouputs", ) parser.add_argument("-bs", dest="batch_size", default=64, type=int) parser.add_argument("-n", dest="nepochs", default=30, type=int) parser.add_argument("-cp", dest="checkpoint_period", default=1, type=int) parser.add_argument("-lr", dest="lr", default=0.01, type=float) parser.add_argument("-ckt", dest="ckt") parser.add_argument("-name", dest="name", default="URFC") parser.add_argument("-s", type=int, dest="split", default=0) args = parser.parse_args() output_dir = os.path.join(args.output_dir, args.name) if not os.path.exists(output_dir): os.mkdir(output_dir) logger = setupLogger(args.name, output_dir, filename=args.name + "_log.txt") logger.info(args) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = MultiModalNet("se_resnext101_32x4d", "dpn26", 0.5, num_classes=9, pretrained=True) # model = MultiModalNet_MS("se_resnext101_32x4d", "dpn26", 0.5, num_classes=9, pretrained=True) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) criterion = torch.nn.CrossEntropyLoss().to(device) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) # if torch.cuda.device_count() > 1: # model = torch.nn.DataParallel(model) model.to(device) train_files = pd.read_csv("train.csv") test_files = pd.read_csv("test.csv") kf = KFold(n_splits=10, random_state=2050) splits = [] for train_list, test_list in kf.split(train_files): splits.append((train_list, test_list)) val_files = train_files.drop(splits[args.split][0]) train_files = train_files.drop(splits[args.split][1]) # train_datalist, val_datalist = train_test_split(all_files, test_size=0.1, random_state=2050) train_img = os.path.join(args.root_path, "train") test_img = os.path.join(args.root_path, "test") train_visit = os.path.join(args.root_path, "npy", "train_visit") test_visit = os.path.join(args.root_path, "npy", "test_visit") trndatasets = MMDataset(train_files, train_img, train_visit, mode="train") trndata_loader = DataLoader(trndatasets, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=1) valdatasets = MMDataset(val_files, train_img, train_visit, augment=False, mode="val") valdata_loader = DataLoader(valdatasets, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=1) testdatasets = MMDataset(test_files, test_img, test_visit, augment=False, mode="test") test_loader = DataLoader(testdatasets, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=1) checkpointer = Checkpointer( model, optimizer, criterion, scheduler, output_dir, ) do_train(args.name, model, device, trndata_loader, valdata_loader, optimizer, criterion, scheduler, args.nepochs, args.checkpoint_period, checkpointer) checkpoint = torch.load(os.path.join(output_dir, "best_model.pth"), map_location=torch.device("cpu")) model.load_state_dict(checkpoint["model"]) logger.info("load model from " + os.path.join(output_dir, "best_model.pth")) test_submit(model, test_loader, device, output_dir)
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) print('Train with the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) optimizer = make_optimizer(cfg, model) # scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) loss_func = make_loss(cfg, num_classes) # modified by gu # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': # start_epoch = eval(cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_')[-1]) # print('Start epoch:', start_epoch) # path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace('model', 'optimizer') # print('Path to the checkpoint of optimizer:', path_to_optimizer) # model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) # optimizer.load_state_dict(torch.load(path_to_optimizer)) # scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) start_epoch = 120 print('Start epoch:', start_epoch) model.load_state_dict( torch.load('work_space_tri/se_resnet101_ibn_a_model_120.pth')) optimizer.load_state_dict( torch.load('work_space_tri/se_resnet101_ibn_a_optimizer_120.pth')) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print('Only support pretrain_choice for imagenet and self, but got {}'. format(cfg.MODEL.PRETRAIN_CHOICE)) arguments = {} do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, num_query, start_epoch # add for using self trained model )
writer = SummaryWriter(log_dir=os.path.join(output_dir, 'tensorboard')) logger = setup_logger("rendering_model", output_dir, 0) logger.info("Running with config:\n{}".format(cfg)) shutil.copy('../configs/config.yml', os.path.join(cfg.OUTPUT_DIR, 'configs.yml')) # In[ ]: train_loader, dataset = make_data_loader(cfg, is_train=True, is_center=cfg.DATASETS.CENTER) model = build_model(cfg) # In[ ]: optimizer = make_optimizer(cfg, model) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) #scheduler = build_scheduler(optimizer, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.START_ITERS, cfg.SOLVER.END_ITERS, cfg.SOLVER.LR_SCALE) loss_fn = make_loss(cfg) model, optimizer = amp.initialize(model, optimizer, opt_level="O1") do_train(cfg, model, train_loader, None, optimizer, scheduler, loss_fn, writer)