def main(args): path = os.path.join(os.getcwd(), 'soft_label', 'soft_label_resnet50.txt') if not os.path.isfile(path): print('soft label file is not exist') train_loader = getTrainLoader(args, path) _, val_loader, num_query, num_classes, train_size = make_data_loader(args) #train_loader, val_loader, num_query, num_classes, train_size = make_data_loader(args) model = build_model(args, num_classes) optimizer = make_optimizer(args, model) scheduler = WarmupMultiStepLR(optimizer, [30, 55], 0.1, 0.01, 5, "linear") loss_func = make_loss(args) model.to(device) for epoch in range(args.Epochs): model.train() running_loss = 0.0 running_klloss = 0.0 running_softloss = 0.0 running_corrects = 0.0 for index, data in enumerate(tqdm(train_loader)): img, target, soft_target = data img = img.cuda() target = target.cuda() soft_target = soft_target.cuda() score, _ = model(img) preds = torch.max(score.data, 1)[1] loss, klloss, softloss = loss_func(score, target, soft_target) optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() running_klloss += klloss.item() running_softloss += softloss.item() running_corrects += float(torch.sum(preds == target.data)) scheduler.step() epoch_loss = running_loss / train_size epoch_klloss = running_klloss / train_size epoch_softloss = running_softloss / train_size epoch_acc = running_corrects / train_size print( "Epoch {} Loss : {:.4f} KLLoss:{:.8f} SoftLoss:{:.4f} Acc:{:.4f}" .format(epoch, epoch_loss, epoch_klloss, epoch_softloss, epoch_acc)) if (epoch + 1) % args.n_save == 0: evaluator = Evaluator(model, val_loader, num_query) cmc, mAP = evaluator.run() print('---------------------------') print("CMC Curve:") for r in [1, 5, 10]: print("Rank-{} : {:.1%}".format(r, cmc[r - 1])) print("mAP : {:.1%}".format(mAP)) print('---------------------------') save_model(args, model, optimizer, epoch)
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) if cfg.SOLVER.FINETUNE: model.load_state_dict(torch.load(cfg.TEST.WEIGHT).module.state_dict()) model = nn.DataParallel(model) optimizer = make_optimizer(cfg, model) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) # scheduler = WarmupStepLR(optimizer,3, 9, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) loss_func = make_loss(cfg) arguments = {} do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, loss_func, num_query )
def train(cfg): model = build_model(cfg) # model.fix_bn() model = nn.DataParallel(model) torch.backends.cudnn.benchmark = True optimizer = make_optimizer(cfg, model) criterion = make_criterion(cfg) scheduler = make_lr_scheduler(cfg, optimizer) metrics = make_metrics(cfg) train_loader = make_dataloader(cfg, is_train=True) val_loader = make_dataloader(cfg, is_train=False) cfg.TOOLS.image_n = 3 #image_3_dataloader = make_inference_dataloader(cfg=cfg) image_3_dataloader = None cfg.TOOLS.image_n = 4 #image_4_dataloader = make_inference_dataloader(cfg=cfg) image_4_dataloader = None do_train(cfg, model=model, train_loader=train_loader, val_loader=val_loader, optimizer=optimizer, scheduler=scheduler, loss_fn=criterion, metrics=metrics, image_3_dataloader=image_3_dataloader, image_4_dataloader=image_4_dataloader)
def train(cfg, logger): model = build_model(cfg) device = cfg.MODEL.DEVICE optimizer, lr_schedule = make_optimizer(cfg, model) metric_fc = None loss_fn = get_loss_fn(cfg, logger) logger.info("----------------------------------------------") train_loader = make_data_loader(cfg, is_train=True) val_loader = make_data_loader(cfg, is_train=False) loss_fn2 = torch.nn.MSELoss() do_train( cfg, model, metric_fc, train_loader, val_loader, optimizer, lr_schedule, loss_fn, loss_fn2, logger, )
def __init__(self, cfg, num_classes): self.device = cfg.MODEL.DEVICE self.model = build_model(cfg, num_classes) self.loss = Loss(cfg, num_classes, self.model.in_planes) self.optimizer = make_optimizer(cfg, self.model) self.scheduler = WarmupMultiStepLR(self.optimizer, cfg.WARMUP.STEPS, cfg.WARMUP.GAMMA, cfg.WARMUP.FACTOR, cfg.WARMUP.MAX_EPOCHS, cfg.WARMUP.METHOD) if cfg.APEX.IF_ON: logger.info("Using apex") try: import apex except ImportError: raise ImportError("Please install apex from https://www.github.com/nvidia/apex to run this example.") assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled." # # if cfg.APEX.IF_SYNC_BN: # logger.info("Using apex synced BN") # self.module = apex.parallel.convert_syncbn_model(self.module) if self.device is 'cuda': self.model = self.model.cuda() if cfg.APEX.IF_ON: from apex import amp self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level=cfg.APEX.OPT_LEVEL, keep_batchnorm_fp32=None if cfg.APEX.OPT_LEVEL == 'O1' else True, loss_scale=cfg.APEX.LOSS_SCALE[0])
def main(): output_dir = cfg.MODEL.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) logger = setup_logger("tracker", output_dir, 0) logger.info("Running with config:\n{}".format(cfg)) torch.backends.cudnn.benchmark = True train_loader, val_loader = make_data_loader(cfg) model = build_model(cfg) optimizer = make_optimizer(cfg, model) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) loss_func = make_loss(cfg) do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, loss_func )
def train(train_loader, num_classes): parser = argparse.ArgumentParser(description="ReID Baseline Training") parser.add_argument( "--config_file", default="", help="path to config file", type=str ) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() set_seed(cfg.SOLVER.SEED) output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) logger = setup_logger("reid_baseline", output_dir, if_train=True) logger.info("Saving model in the path :{}".format(cfg.OUTPUT_DIR)) logger.info(args) if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, 'r') as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID if cfg.MODEL.PRETRAIN_CHOICE == 'finetune': model = make_model(cfg, num_class=num_classes) model.load_param_finetune(cfg.MODEL.PRETRAIN_PATH) print('Loading pretrained model for finetuning......') else: model = make_model(cfg, num_class=num_classes) loss_func = make_loss(cfg, num_classes=num_classes) optimizer = make_optimizer(cfg, model) scheduler = WarmupCosineAnnealingLR(optimizer, cfg.SOLVER.MAX_EPOCHS, cfg.SOLVER.DELAY_ITERS, cfg.SOLVER.ETA_MIN_LR, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_EPOCHS, cfg.SOLVER.WARMUP_METHOD) logger.info("use WarmupCosineAnnealingLR, delay_step:{}".format(cfg.SOLVER.DELAY_ITERS)) do_train( cfg, model, train_loader, optimizer, scheduler, # modify for using self trained model loss_func )
def train(cfg): logger = setup_logger("reid_baseline", cfg.OUTPUT_DIR) logger.info("Running with config:\n{}".format(cfg)) # prepare camstyle dataset train_loader, train_camstyle_loader, val_loader, num_query, num_classes = make_camstyle_data_loader( cfg) num_classes.append(-1) # prepare model model = build_model(cfg, num_classes) optimizer, _ = make_optimizer(cfg, model) loss_fn = make_loss(cfg, num_classes) # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'resume': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) logger.info('Start epoch:%d' % start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') logger.info('Path to the checkpoint of optimizer:%s' % path_to_optimizer) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'self' or cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 model.load_param(cfg.MODEL.PRETRAIN_PATH, cfg.MODEL.PRETRAIN_CHOICE) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: logger.info( 'Only support pretrain_choice for imagenet and self, but got {}'. format(cfg.MODEL.PRETRAIN_CHOICE)) do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_fn, num_query, start_epoch, # add for using self trained model 0, train_camstyle_loader)
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes, clustering_loader = make_data_loader( cfg) # prepare model model = build_model(cfg, num_classes) if cfg.MODEL.IF_WITH_CENTER == 'on': loss_func, center_criterion_part, center_criterion_global, center_criterion_fore = make_loss_with_center( cfg, num_classes) optimizer, optimizer_center = make_optimizer_with_center( cfg, model, center_criterion_part, center_criterion_global, center_criterion_fore) else: loss_func = make_loss(cfg, num_classes) optimizer = make_optimizer(cfg, model) # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print('Only support pretrain_choice for imagenet, but got {}'.format( cfg.MODEL.PRETRAIN_CHOICE)) if cfg.MODEL.IF_WITH_CENTER == 'on': do_train_with_center( cfg, model, center_criterion_part, center_criterion_global, center_criterion_fore, train_loader, val_loader, optimizer, optimizer_center, scheduler, # modify for using self trained model loss_func, num_query, start_epoch, # add for using self trained model clustering_loader) else: do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, num_query, start_epoch, # add for using self trained model clustering_loader)
def get_autoencoder_component(self, cfg): self.device = cfg.MODEL.DEVICE self.model = AutoEncoder(cfg.EBLL.IN_PLANES, cfg.EBLL.CODE_SIZE) self.loss = AutoEncoderLoss(cfg) copy_cfg = copy.deepcopy(cfg) copy_cfg['OPTIMIZER']['BASE_LR'] = copy_cfg.EBLL.OPTIMIZER_BASE_LR copy_cfg['OPTIMIZER']['NAME'] = 'SGD' self.optimizer = make_optimizer(copy_cfg, self.model) self.scheduler = ExponentialLR(self.optimizer, gamma=0.9)
def train(cfg, local_rank, distributed, logger=None, tblogger=None, transfer_weight=False, change_lr=False): device = torch.device('cuda') # create model logger.info('Creating model "{}"'.format(cfg.MODEL.ARCHITECTURE)) model = build_model(cfg).to(device) criterion = torch.nn.CrossEntropyLoss(ignore_index=255).to(device) optimizer = make_optimizer(cfg, model) # model, optimizer = apex.amp.initialize(model, optimizer, opt_level='O2') scheduler = make_lr_scheduler(cfg, optimizer) if distributed: # model = apex.parallel.DistributedDataParallel(model) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=True, ) save_to_disk = get_rank() == 0 # checkpoint arguments = {} arguments['iteration'] = 0 arguments['best_iou'] = 0 checkpointer = Checkpointer(model, optimizer, scheduler, cfg.LOGS.DIR, save_to_disk, logger) extra_checkpoint_data = checkpointer.load( f=cfg.MODEL.WEIGHT, model_weight_only=transfer_weight, change_scheduler=change_lr) arguments.update(extra_checkpoint_data) # data_loader logger.info('Loading dataset "{}"'.format(cfg.DATASETS.TRAIN)) data_loader = make_data_loader(cfg, 'train', distributed) data_loader_val = make_data_loader(cfg, 'val', distributed) do_train(cfg, model=model, data_loader=data_loader, optimizer=optimizer, scheduler=scheduler, criterion=criterion, checkpointer=checkpointer, device=device, arguments=arguments, tblogger=tblogger, data_loader_val=data_loader_val, distributed=distributed)
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) total = sum([param.nelement() for param in model.parameters()]) print("Number of parameter: %.2fM" % (total / 1e6)) if cfg.MODEL.METRIC_LOSS_TYPE == 'triplet': print('Train without center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) optimizer = make_optimizer(cfg, model) loss_func = make_loss(cfg, num_classes) # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print( 'Only support pretrain_choice for imagenet and self, but got {}' .format(cfg.MODEL.PRETRAIN_CHOICE)) do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, num_query, start_epoch # add for using self trained model )
def get_component(self, cfg, num_classes): self.device = cfg.MODEL.DEVICE self.model = build_model(cfg, num_classes) self.loss = Loss(cfg, num_classes, self.model.in_planes) self.optimizer = make_optimizer(cfg, self.model) self.scheduler = WarmupMultiStepLR(self.optimizer, cfg.WARMUP.STEPS, cfg.WARMUP.GAMMA, cfg.WARMUP.FACTOR, cfg.WARMUP.MAX_EPOCHS, cfg.WARMUP.METHOD)
def train(cfg): model = build_model(cfg) device = cfg.MODEL.DEVICE optimizer = make_optimizer(cfg, model) scheduler = None arguments = {} train_loader = make_data_loader(cfg, is_train=True) val_loader = make_data_loader(cfg, is_train=False) do_train(cfg, model, train_loader, val_loader, optimizer, F.cross_entropy)
def main(args): sys.stdout = Logger( os.path.join(args.log_path, args.log_description, 'log' + time.strftime(".%m_%d_%H:%M:%S") + '.txt')) train_loader, val_loader, num_query, num_classes, train_size = make_data_loader( args) model = build_model(args, num_classes) print(model) optimizer = make_optimizer(args, model) scheduler = WarmupMultiStepLR(optimizer, [30, 55], 0.1, 0.01, 5, "linear") loss_func = make_loss(args) model.to(device) for epoch in range(args.Epochs): model.train() running_loss = 0.0 running_corrects = 0.0 for index, data in enumerate(tqdm(train_loader)): img, target = data img = img.cuda() target = target.cuda() score, _ = model(img) preds = torch.max(score.data, 1)[1] loss = loss_func(score, target) optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() running_corrects += float(torch.sum(preds == target.data)) scheduler.step() epoch_loss = running_loss / train_size epoch_acc = running_corrects / train_size print("Epoch {} Loss : {:.6f} Acc:{:.4f}".format( epoch, epoch_loss, epoch_acc)) if (epoch + 1) % args.n_save == 0: evaluator = Evaluator(model, val_loader, num_query) cmc, mAP = evaluator.run() print('---------------------------') print("CMC Curve:") for r in [1, 5, 10]: print("Rank-{} : {:.1%}".format(r, cmc[r - 1])) print("mAP : {:.1%}".format(mAP)) print('---------------------------') save_model(args, model, optimizer, epoch)
def train(cfg, cfg_hr): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, cfg_hr, num_classes) model = nn.DataParallel(model) if cfg.MODEL.IF_WITH_CENTER == 'no': print('Train without center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) optimizer = make_optimizer(cfg, model) print(cfg.SOLVER.MARGIN) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) loss_func = make_loss(cfg, num_classes) # modified by gu arguments = {} do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, loss_func, num_query ) elif cfg.MODEL.IF_WITH_CENTER == 'yes': print('Train with center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) loss_func, center_criterion = make_loss_with_center(cfg, num_classes) # modified by gu optimizer, optimizer_center = make_optimizer_with_center(cfg, model, center_criterion) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) arguments = {} do_train_with_center( cfg, model, center_criterion, train_loader, val_loader, optimizer, optimizer_center, scheduler, loss_func, num_query ) else: print("Unsupported value for cfg.MODEL.IF_WITH_CENTER {}, only support yes or no!\n".format(cfg.MODEL.IF_WITH_CENTER))
def train(cfg): logger = setup_logger("reid_baseline", cfg.OUTPUT_DIR) logger.info("Running with config:\n{}".format(cfg)) # prepare dataset val_data_loader, num_query = make_val_data_loader(cfg) num_classes = np.zeros(len(cfg.DATALOADER.SAMPLER_PROB)).astype(int) - 1 source_dataset = init_dataset(cfg.SRC_DATA.NAMES, root_train=cfg.SRC_DATA.TRAIN_DIR, transfered=cfg.SRC_DATA.TRANSFERED) num_classes[0] = source_dataset.num_train_pids num_classes[1] = cfg.TGT_UNSUPDATA.CLUSTER_TOPK if cfg.MODEL.FINETUNE: num_classes[1] += 200 # prepare model model = build_model(cfg, num_classes) optimizer,fixed_lr_idxs = make_optimizer(cfg, model) loss_fn = make_loss(cfg, num_classes) # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'resume': start_epoch = eval(cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_')[-1]) logger.info('Start epoch:%d' %start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace('model', 'optimizer') logger.info('Path to the checkpoint of optimizer:%s' %path_to_optimizer) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch, fixed_lr_idxs) elif cfg.MODEL.PRETRAIN_CHOICE == 'self' or cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 model.load_param(cfg.MODEL.PRETRAIN_PATH, cfg.MODEL.PRETRAIN_CHOICE) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, -1, fixed_lr_idxs) camera_model = build_camera_model(cfg, num_classes=5) camera_model.load_param(cfg.TEST.CAMERA_WEIGHT, cfg.MODEL.PRETRAIN_CHOICE) else: logger.info('Only support pretrain_choice for imagenet and self, but got {}'.format(cfg.MODEL.PRETRAIN_CHOICE)) do_train(cfg, model, camera_model, val_data_loader, optimizer, scheduler, # modify for using self trained model loss_fn, num_query, start_epoch, # add for using self trained model 0 )
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) print('Train without center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) optimizer = make_optimizer(cfg, model) loss_func = make_loss(cfg, num_classes) if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) if cfg.MODEL.DEVICE == "cuda": for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: start_epoch = 0 print('Only support pretrain_choice for imagenet and self, but got {}'. format(cfg.MODEL.PRETRAIN_CHOICE)) arguments = {} do_train(cfg, model, train_loader, val_loader, optimizer, scheduler, loss_func, num_query, start_epoch)
def train(cfg): model = build_model(cfg) device = cfg.MODEL.DEVICE optimizer = make_optimizer(cfg, model) train_loader = make_data_loader(cfg, csv = cfg.DATASETS.TRAIN, is_train=True) do_train( cfg, model, train_loader, optimizer, losses = [F.mse_loss, F.nll_loss], )
def main(): init_env('1') loaders = make_data_loaders(cfg) model = build_model(cfg) model = model.cuda() task_name = 'base_unet' log_dir = os.path.join(cfg.LOG_DIR, task_name) cfg.TASK_NAME = task_name mkdir(log_dir) logger = setup_logger('train', log_dir, filename='train.log') logger.info(cfg) logger = setup_logger('eval', log_dir, filename='eval.log') optimizer, scheduler = make_optimizer(cfg, model) metrics = get_metrics(cfg) losses = get_losses(cfg) train_val(model, loaders, optimizer, scheduler, losses, metrics)
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes, dataset = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) print('Train without center loss, the loss type is', cfg['MODEL.METRIC_LOSS_TYPE']) optimizer = make_optimizer(cfg, model) loss_func = make_loss(cfg, num_classes, model.in_planes) # modified by gu # Add for using self trained model if cfg['MODEL.PRETRAIN_CHOICE'] == 'continue': #start_epoch = eval(cfg['MODEL.PRETRAIN_PATH'].split('/')[-1].split('.')[0].split('_')[-1]) start_epoch = 0 print('Start epoch:', start_epoch) path_to_optimizer = cfg['MODEL.PRETRAIN_PATH'].replace('model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) model.load_param(cfg['MODEL.PRETRAIN_PATH']) #optimizer = make_optimizer(cfg, model) scheduler = WarmupMultiStepLR(optimizer, cfg['SOLVER.STEPS'], cfg['SOLVER.GAMMA'], cfg['SOLVER.WARMUP_FACTOR'], cfg['SOLVER.WARMUP_ITERS'], cfg['SOLVER.WARMUP_METHOD']) elif cfg['MODEL.PRETRAIN_CHOICE'] == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg['SOLVER.STEPS'], cfg['SOLVER.GAMMA'], cfg['SOLVER.WARMUP_FACTOR'], cfg['SOLVER.WARMUP_ITERS'], cfg['SOLVER.WARMUP_METHOD']) elif cfg['MODEL.PRETRAIN_CHOICE'] == 'self' or cfg['MODEL.PRETRAIN_CHOICE'] == 'self-no-head': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg['SOLVER.STEPS'], cfg['SOLVER.GAMMA'], cfg['SOLVER.WARMUP_FACTOR'], cfg['SOLVER.WARMUP_ITERS'], cfg['SOLVER.WARMUP_METHOD']) else: print('Only support pretrain_choice for imagenet and self, but got {}'.format(cfg['MODEL.PRETRAIN_CHOICE'])) do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, num_query, start_epoch, # add for using self trained model dataset )
def train(cfg): model = build_fcn_model(cfg) optimizer = make_optimizer(cfg, model) arguments = {} data_loader = make_data_loader(cfg, is_train=True) val_loader = make_data_loader(cfg, is_train=False) do_train( cfg, model, data_loader, val_loader, optimizer, cross_entropy2d, )
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) optimizer = make_optimizer(cfg, model) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) loss_func = make_loss(cfg) arguments = {} do_train(cfg, model, train_loader, val_loader, optimizer, scheduler, loss_func, num_query)
def train(cfg, local_rank, distributed): pretrained_base = os.path.join(cfg.TRAIN.model_root, cfg.TRAIN.backbone + '.pth') model = get_model(cfg.MODEL.model, pretrained_base=pretrained_base) device = torch.device(cfg.MODEL.device) model.to(device) if distributed: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.CONFIG.output_dir save_to_disk = ptutil.get_rank() == 0 checkpointer = ptutil.CheckPointer(model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.TRAIN.weight) arguments.update(extra_checkpoint_data) data_loader = build_dataloader( cfg, train=True, distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.TRAIN.checkpoint_period training(model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments) return model
def train_net(cfg): print(cfg.NAME) print(cfg.DESCRIPTION) if not os.path.exists(cfg.OUTPUT_DIR): os.mkdir(cfg.OUTPUT_DIR) model = Network(cfg) device = torch.device(cfg.DEVICE) if cfg.PRETRAINED: model_path = cfg.PRETRAIN_MODEL model.load_state_dict(torch.load(model_path)) print('loding weight from ', model_path) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) # output_dir = cfg.OUTPUT_DIR data_loader = make_data_loader(cfg, is_train=True) args = {} args['iteration'] = 0 do_train(model, data_loader, optimizer, scheduler, device, args, cfg) print(cfg.NAME) return model
logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID train_loader, val_loader, num_query, num_classes = make_dataloader(cfg) if cfg.MODEL.PRETRAIN_CHOICE == 'finetune': model = make_model(cfg, num_class=num_classes) model.load_param_finetune(cfg.MODEL.PRETRAIN_PATH) print('Loading pretrained model for finetuning......') else: model = make_model(cfg, num_class=num_classes) loss_func, center_criterion = make_loss(cfg, num_classes=num_classes) optimizer, optimizer_center = make_optimizer(cfg, model, center_criterion) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_EPOCHS, cfg.SOLVER.WARMUP_METHOD) do_train( cfg, model, center_criterion, train_loader, val_loader, optimizer, optimizer_center, scheduler, # modify for using self trained model loss_func,
def train(cfg): # prepare dataset train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) if cfg.MODEL.IF_WITH_CENTER == 'no': print('Train without center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) optimizer = make_optimizer(cfg, model) # scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) loss_func = make_loss(cfg, num_classes) # modified by gu # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print( 'Only support pretrain_choice for imagenet and self, but got {}' .format(cfg.MODEL.PRETRAIN_CHOICE)) arguments = {} do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, num_query, start_epoch # add for using self trained model ) elif cfg.MODEL.IF_WITH_CENTER == 'yes': print('Train with center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) loss_func, center_criterion = make_loss_with_center( cfg, num_classes) # modified by gu optimizer, optimizer_center = make_optimizer_with_center( cfg, model, center_criterion) # scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, # cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) arguments = {} # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) path_to_center_param = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'center_param') print('Path to the checkpoint of center_param:', path_to_center_param) path_to_optimizer_center = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer_center') print('Path to the checkpoint of optimizer_center:', path_to_optimizer_center) model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) center_criterion.load_state_dict(torch.load(path_to_center_param)) optimizer_center.load_state_dict( torch.load(path_to_optimizer_center)) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print( 'Only support pretrain_choice for imagenet and self, but got {}' .format(cfg.MODEL.PRETRAIN_CHOICE)) do_train_with_center( cfg, model, center_criterion, train_loader, val_loader, optimizer, optimizer_center, scheduler, # modify for using self trained model loss_func, num_query, start_epoch # add for using self trained model ) else: print( "Unsupported value for cfg.MODEL.IF_WITH_CENTER {}, only support yes or no!\n" .format(cfg.MODEL.IF_WITH_CENTER))
if __name__ == '__main__': Cfg.freeze() log_dir = Cfg.DATALOADER.LOG_DIR logger = setup_logger('pose-transfer-gan.train', log_dir) logger.info("Running with config:\n{}".format(Cfg)) os.environ['CUDA_VISIBLE_DEVICES'] = Cfg.MODEL.DEVICE_ID cudnn.benchmark = True # This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware. train_loader, val_loader = make_dataloader(Cfg) model_G, model_Dip, model_Dii, model_D_reid = make_model(Cfg) optimizerG = make_optimizer(Cfg, model_G) optimizerDip = make_optimizer(Cfg, model_Dip) optimizerDii = make_optimizer(Cfg, model_Dii) schedulerG = WarmupMultiStepLR(optimizerG, Cfg.SOLVER.STEPS, Cfg.SOLVER.GAMMA, Cfg.SOLVER.WARMUP_FACTOR, Cfg.SOLVER.WARMUP_EPOCHS, Cfg.SOLVER.WARMUP_METHOD) schedulerDip = WarmupMultiStepLR(optimizerDip, Cfg.SOLVER.STEPS, Cfg.SOLVER.GAMMA, Cfg.SOLVER.WARMUP_FACTOR, Cfg.SOLVER.WARMUP_EPOCHS, Cfg.SOLVER.WARMUP_METHOD) schedulerDii = WarmupMultiStepLR(optimizerDii, Cfg.SOLVER.STEPS, Cfg.SOLVER.GAMMA, Cfg.SOLVER.WARMUP_FACTOR,
cfg.merge_from_file('../configs/config.yml') cfg.freeze() train_loader, dataset = make_data_loader(cfg, is_train=True) val_loader, dataset_val = make_data_loader_view(cfg, is_train=True) model = build_model(cfg).cuda() maxs = torch.max(dataset.bbox[0], dim=0).values.cuda()+0.5 mins = torch.min(dataset.bbox[0], dim=0).values.cuda()-0.5 model.set_max_min(maxs,mins) optimizer = make_optimizer(cfg, model) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) loss_fn = make_loss(cfg) model, optimizer = amp.initialize(model, optimizer, opt_level="O1") beg = time.time() for batch in train_loader: beg = time.time()
time_str = datetime.datetime.strftime(curr_time,'%Y%m%d_') save_dir = os.path.join(cfg['save_dir'],time_str+cfg['tag']) log_dir = os.path.join(cfg['log_dir'],"log_"+time_str+cfg['tag']) cfg['save_dir'] = save_dir cfg['log_dir'] = log_dir if not os.path.isdir(save_dir): os.makedirs(save_dir) if not os.path.isdir(log_dir): os.makedirs(log_dir) print("Save_dir :",save_dir) print("Log_dir :", log_dir) # import pdb; pdb.set_trace() model = build_model(cfg,pretrain_path=arg['load_path']) optimizer = make_optimizer(cfg['optimizer'],model) lr_scheduler = wrapper_lr_scheduler(cfg['lr_scheduler'],optimizer) if arg['device']: # 传入命令指定 device id free_device_ids = arg['device'] else: free_device_ids = get_free_device_ids() max_num_devices = cfg['max_num_devices'] if len(free_device_ids)>=max_num_devices: free_device_ids = free_device_ids[:max_num_devices] master_device = free_device_ids[0] model.cuda(master_device) model = nn.DataParallel(model,device_ids=free_device_ids).cuda(master_device)