def __init__(self, bm_ws_market, eventQueue, apiKey, apiSecret, symbols): # 日志 self.logger = generate_logger('OMS') # 绑定事件队列, 队列中只有 TARGET_POSITION_EVENT,另开一个线程来push目标仓位 self.eventQueue = eventQueue # 目标仓位 {symbol: pos} self.target_position = {} # 标的 self.symbols = symbols # websocket-market self.bm_ws_market = bm_ws_market # 外部的,因为DataHandler同时也在用它 or 它就是DataHandler # websocket-trading self.bm_ws_trading = bitmexWSTrading(apiKey, apiSecret) self.bm_ws_trading.connect() self.bm_ws_trading.subscribe(self.symbols) self.bm_ws_trading.wait_for_initial_status() # 等待的初始信息 self.actual_position = self.bm_ws_trading.actual_position # 由websocket接收的信息计算出的实际仓位 `position` self.unfilled_qty = self.bm_ws_trading.unfilled_qty # 由websocket接收的信息计算出的未成交委托 `order` # rest self.bm_rest = bitmexREST(apiKey, apiSecret)
def main_worker(gpu, args): """ 模型训练、测试、转JIT、蒸馏文件制作 :param gpu: 运行的gpu id :param args: 运行超参 """ args.gpu = gpu utils.generate_logger(f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}-{gpu}.log") logging.info(f'args: {args}') # 可复现性 if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True logging.warning('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.cuda: logging.info(f"Use GPU: {args.gpu} ~") if args.distributed: args.rank = args.rank * args.gpus + gpu dist.init_process_group(backend='nccl', init_method=args.init_method, world_size=args.world_size, rank=args.rank) else: logging.info(f"Use CPU ~") # 创建/加载模型,使用预训练模型时,需要自己先下载好放到 pretrained 文件夹下,以网络名词命名 logging.info(f"=> creating model '{args.arch}'") model = my_models.get_model(args.arch, args.pretrained, num_classes=args.num_classes) # 重加载之前训练好的模型 if args.resume: if os.path.isfile(args.resume): logging.info(f"=> loading checkpoint '{args.resume}'") checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) acc = model.load_state_dict(checkpoint['state_dict'], strict=True) logging.info(f'missing keys of models: {acc.missing_keys}') del checkpoint else: raise Exception(f"No checkpoint found at '{args.resume}' to be resumed") # 模型信息 image_height, image_width = args.image_size logging.info(f'Model {args.arch} input size: ({image_height}, {image_width})') utils.summary(size=(image_height, image_width), channel=3, model=model) # 模型转换:转为 torch.jit.script if args.jit: if not args.resume: raise Exception('Option --resume must specified!') applications.convert_to_jit(model, args=args) return if args.criterion == 'softmax': criterion = criterions.HybridCELoss(args=args) # 混合策略多分类 elif args.criterion == 'bce': criterion = criterions.HybridBCELoss(args=args) # 混合策略多标签二分类 else: raise NotImplementedError(f'Not loss function {args.criterion}') if args.cuda: if args.distributed and args.sync_bn: model = apex.parallel.convert_syncbn_model(model) torch.cuda.set_device(args.gpu) model.cuda(args.gpu) criterion = criterion.cuda(args.gpu) if args.knowledge in ('train', 'test', 'val'): torch.set_flush_denormal(True) distill_loader = dataloader.load(args, name=args.knowledge) applications.distill(distill_loader, model, criterion, args, is_confuse_matrix=True) return if args.make_curriculum in ('train', 'test', 'val'): torch.set_flush_denormal(True) curriculum_loader = dataloader.load(args, name=args.make_curriculum) applications.make_curriculum(curriculum_loader, model, criterion, args, is_confuse_matrix=True) return if args.visual_data in ('train', 'test', 'val'): torch.set_flush_denormal(True) test_loader = dataloader.load(args, name=args.visual_data) applications.Visualize.visualize(test_loader, model, args) return # 优化器 opt_set = { 'sgd': partial(torch.optim.SGD, momentum=args.momentum), 'adam': torch.optim.Adam, 'adamw': AdamW, 'radam': RAdam, 'ranger': Ranger, 'lookaheadadam': LookaheadAdam, 'ralamb': Ralamb, 'rangerlars': RangerLars, 'novograd': Novograd, } optimizer = opt_set[args.opt](model.parameters(), lr=args.lr) # weight decay转移到train那里了 # 随机均值平均优化器 # from optim.swa import SWA # optimizer = SWA(optimizer, swa_start=10, swa_freq=5, swa_lr=0.05) # 混合精度训练 if args.cuda: model, optimizer = amp.initialize(model, optimizer, opt_level="O1") if args.distributed: model = apex.parallel.DistributedDataParallel(model) else: model = torch.nn.DataParallel(model) if args.train: train_loader = dataloader.load(args, 'train') val_loader = dataloader.load(args, 'val') scheduler = LambdaLR(optimizer, lambda epoch: adjust_learning_rate(epoch, args=args)) applications.train(train_loader, val_loader, model, criterion, optimizer, scheduler, args) args.evaluate = True if args.evaluate: torch.set_flush_denormal(True) test_loader = dataloader.load(args, name='test') acc, loss, paths_targets_preds_probs = applications.test(test_loader, model, criterion, args, is_confuse_matrix=True) logging.info(f'Evaluation: * Acc@1 {acc:.3f} and loss {loss:.3f}.') logging.info(f'Evaluation Result:\n') for path, target, pred, prob in paths_targets_preds_probs: logging.info(path + ' ' + str(target) + ' ' + str(pred) + ' ' + ','.join([f'{num:.2f}' for num in prob])) logging.info('Evaluation Over~')
def main(): opt = args.getopt() # set-up file structures and create logger datasetdir = os.path.join(opt.datasetdir, str(opt.datasetversion)) opt.batch_size == 1 if opt.save_ranks else opt.batch_size resultsdir = os.path.join(opt.resultsdir, 'experiment_id' + str(opt.id)) if not os.path.exists(resultsdir): os.system('mkdir -p ' + resultsdir) log = utils.generate_logger(opt, resultsdir) # basic/cuda set-up torch.manual_seed(opt.seed) if opt.gpu >= 0: assert opt.gpu <= torch.cuda.device_count() torch.cuda.manual_seed(opt.seed) torch.cuda.set_device(opt.gpu) cudnn.enabled = True cudnn.benchmark = True else: log.warning('on cpu. you should probably use gpus with --gpu=GPU_idx') # print & save arguments log.info(opt) torch.save(opt, os.path.join(resultsdir, 'exp' + str(opt.id) + '_opt.pt')) ####################################### build vocab ##################################### log.info('-' * 100) dictionary = dataset.build_vocabulary(opt, log) ntokens = len(dictionary) log.info('dictionary loaded successfully! vocabulary size: ' + str(ntokens)) log.info('-' * 100) # get pre-trained word embeddings (from downloaded binary file saved at opt.wordmodel) word_vec_file = os.path.join( opt.datasetdir, str(opt.datasetversion), os.path.basename(opt.wordmodel) + '_vocab_vecs.pt') dictionary.loadwordmodel(opt.wordmodel, word_vec_file, opt.emsize, log, opt.gpu) log.info('-' * 100) ####################################### load data ##################################### train_loader, flat_train_features = dataset.get_features( dictionary, opt, log, 'train') test_loader, flat_test_features = dataset.get_features( dictionary, opt, log, opt.evalset) # filter views by input_vars and condition_vars condition_vars = sorted( opt.condition_vars.split('_')) # sorted alphabetically train_views, train_mus = dataset.filter_features(flat_train_features, opt.input_vars, condition_vars) test_views, _ = dataset.filter_features(flat_test_features, opt.input_vars, condition_vars) # do CCA lambdas, proj_mtxs = cca_utils.cca(train_views, log, k=opt.k) #lambdas, proj_mtxs = cca_utils.cca_mardia(train_views, log, opt.k, opt.eps, opt.r) # get (train) projections using learned weights train_projections = [ cca_utils.get_projection(v, mtx, lambdas, opt.p) for (v, mtx) in zip(train_views, proj_mtxs) ] proj_train_mus = [ proj.mean(dim=0).view(-1, 1) for proj in train_projections ] if len(condition_vars) == 2: # three-view CCA # select answer & question proj_mtxs (rather than image) qa_proj_mtxs = [proj_mtxs[0], proj_mtxs[2]] qa_train_projections = [train_projections[0], train_projections[2]] qa_proj_train_mus = [proj_train_mus[0], proj_train_mus[2]] test_meters = evals.candidate_answers_recall( test_loader, lambdas, qa_proj_mtxs, qa_train_projections, qa_proj_train_mus, dictionary, opt, log, opt.evalset, train_loader) elif len(condition_vars) == 1: # two-view CCA test_meters = evals.candidate_answers_recall( test_loader, lambdas, proj_mtxs, train_projections, proj_train_mus, dictionary, opt, log, opt.evalset, train_loader) else: print( 'cannot handle this CCA architecture - check --input_vars and --condition_vars!' ) return log.info('-' * 100) log.info(opt) log.info('-' * 100) s = utils.stringify_meters(test_meters) log.info(s) # saves ranking performance to text file with p,k parameters resultsdir = os.path.join(opt.resultsdir, 'experiment_id' + str(opt.id)) fh = open(os.path.join(resultsdir, 'p_k_gridsearch.txt'), 'a') fh.write('k={k}\tp={p}\tranks: {ranks}\r\n'.format(k=str(opt.k), p=str(opt.p), ranks=s)) fh.close()
def main(config, mode, weights): cfg = kaptan.Kaptan(handler='yaml') config = cfg.import_config(config) MODEL_SAVE_NAME, MODEL_SAVE_FOLDER, LOGGER_SAVE_NAME, CHECKPOINT_DIRECTORY = utils.generate_save_names( config) os.makedirs(MODEL_SAVE_FOLDER, exist_ok=True) logger = utils.generate_logger(MODEL_SAVE_FOLDER, LOGGER_SAVE_NAME) logger.info("*" * 40) logger.info("") logger.info("") logger.info("Using the following configuration:") logger.info(config.export("yaml", indent=4)) logger.info("") logger.info("") logger.info("*" * 40) NORMALIZATION_MEAN, NORMALIZATION_STD, RANDOM_ERASE_VALUE = utils.fix_generator_arguments( config) TRAINDATA_KWARGS = { "rea_value": config.get("TRANSFORMATION.RANDOM_ERASE_VALUE") } """ MODEL PARAMS """ from utils import model_weights MODEL_WEIGHTS = None if config.get("MODEL.MODEL_BASE") in model_weights: if mode == "train": if os.path.exists( model_weights[config.get("MODEL.MODEL_BASE")][1]): pass else: logger.info( "Model weights file {} does not exist. Downloading.". format(model_weights[config.get("MODEL.MODEL_BASE")][1])) utils.web.download( model_weights[config.get("MODEL.MODEL_BASE")][1], model_weights[config.get("MODEL.MODEL_BASE")][0]) MODEL_WEIGHTS = model_weights[config.get("MODEL.MODEL_BASE")][1] else: raise NotImplementedError( "Model %s is not available. Please choose one of the following: %s" % (config.get("MODEL.MODEL_BASE"), str(model_weights.keys()))) # ------------------ LOAD SAVED LOGGER IF EXISTS ---------------------------- DRIVE_BACKUP = config.get("SAVE.DRIVE_BACKUP") if DRIVE_BACKUP: backup_logger = os.path.join(CHECKPOINT_DIRECTORY, LOGGER_SAVE_NAME) if os.path.exists(backup_logger): shutil.copy2(backup_logger, ".") else: backup_logger = None NUM_GPUS = torch.cuda.device_count() if NUM_GPUS > 1: raise RuntimeError( "Not built for multi-GPU. Please start with single-GPU.") logger.info("Found %i GPUs" % NUM_GPUS) # --------------------- BUILD GENERATORS ------------------------ data_crawler_ = config.get("EXECUTION.CRAWLER", "VeRiDataCrawler") data_crawler = __import__("crawlers." + data_crawler_, fromlist=[data_crawler_]) data_crawler = getattr(data_crawler, data_crawler_) from generators import SequencedGenerator logger.info("Crawling data folder %s" % config.get("DATASET.ROOT_DATA_FOLDER")) crawler = data_crawler(data_folder=config.get("DATASET.ROOT_DATA_FOLDER"), train_folder=config.get("DATASET.TRAIN_FOLDER"), test_folder=config.get("DATASET.TEST_FOLDER"), query_folder=config.get("DATASET.QUERY_FOLDER"), **{"logger": logger}) train_generator = SequencedGenerator(gpus=NUM_GPUS, i_shape=config.get("DATASET.SHAPE"), \ normalization_mean=NORMALIZATION_MEAN, normalization_std=NORMALIZATION_STD, normalization_scale=1./config.get("TRANSFORMATION.NORMALIZATION_SCALE"), \ h_flip = config.get("TRANSFORMATION.H_FLIP"), t_crop=config.get("TRANSFORMATION.T_CROP"), rea=config.get("TRANSFORMATION.RANDOM_ERASE"), **TRAINDATA_KWARGS) train_generator.setup(crawler, mode='train', batch_size=config.get("TRANSFORMATION.BATCH_SIZE"), instance=config.get("TRANSFORMATION.INSTANCES"), workers=config.get("TRANSFORMATION.WORKERS")) logger.info("Generated training data generator") TRAIN_CLASSES = config.get("MODEL.SOFTMAX_DIM", train_generator.num_entities) test_generator = SequencedGenerator( gpus=NUM_GPUS, i_shape=config.get("DATASET.SHAPE"), normalization_mean=NORMALIZATION_MEAN, normalization_std=NORMALIZATION_STD, normalization_scale=1. / config.get("TRANSFORMATION.NORMALIZATION_SCALE"), h_flip=0, t_crop=False, rea=False) test_generator.setup(crawler, mode='test', batch_size=config.get("TRANSFORMATION.BATCH_SIZE"), instance=config.get("TRANSFORMATION.INSTANCES"), workers=config.get("TRANSFORMATION.WORKERS")) QUERY_CLASSES = test_generator.num_entities logger.info("Generated validation data/query generator") # --------------------- INSTANTIATE MODEL ------------------------ model_builder = __import__("models", fromlist=["*"]) model_builder = getattr( model_builder, config.get("EXECUTION.MODEL_BUILDER", "veri_model_builder")) logger.info("Loaded {} from {} to build ReID model".format( config.get("EXECUTION.MODEL_BUILDER", "veri_model_builder"), "models")) if type( config.get("MODEL.MODEL_KWARGS") ) is dict: # Compatibility with old configs. TODO fix all old configs. model_kwargs_dict = config.get("MODEL.MODEL_KWARGS") else: model_kwargs_dict = json.loads(config.get("MODEL.MODEL_KWARGS")) reid_model = model_builder( arch = config.get("MODEL.MODEL_ARCH"), \ base=config.get("MODEL.MODEL_BASE"), \ weights=MODEL_WEIGHTS, \ soft_dimensions = config.get("MODEL.SOFTMAX",TRAIN_CLASSES), \ embedding_dimensions = config.get("MODEL.EMB_DIM"), \ normalization = config.get("MODEL.MODEL_NORMALIZATION"), \ **model_kwargs_dict) logger.info("Finished instantiating model with {} architecture".format( config.get("MODEL.MODEL_ARCH"))) if mode == "test": reid_model.load_state_dict(torch.load(weights)) reid_model.cuda() reid_model.eval() else: if weights != "": # Load weights if train and starting from a another model base... logger.info( "Commencing partial model load from {}".format(weights)) reid_model.partial_load(weights) logger.info("Completed partial model load from {}".format(weights)) reid_model.cuda() logger.info( torchsummary.summary(reid_model, input_size=(3, *config.get("DATASET.SHAPE")))) # --------------------- INSTANTIATE LOSS ------------------------ from loss import ReIDLossBuilder loss_function = ReIDLossBuilder( loss_functions=config.get("LOSS.LOSSES"), loss_lambda=config.get("LOSS.LOSS_LAMBDAS"), loss_kwargs=config.get("LOSS.LOSS_KWARGS"), **{"logger": logger}) logger.info("Built loss function") # --------------------- INSTANTIATE LOSS OPTIMIZER -------------- from optimizer.StandardLossOptimizer import StandardLossOptimizer as loss_optimizer LOSS_OPT = loss_optimizer( base_lr=config.get("LOSS_OPTIMIZER.BASE_LR", config.get("OPTIMIZER.BASE_LR")), lr_bias=config.get("LOSS_OPTIMIZER.LR_BIAS_FACTOR", config.get("OPTIMIZER.LR_BIAS_FACTOR")), weight_decay=config.get("LOSS_OPTIMIZER.WEIGHT_DECAY", config.get("OPTIMIZER.WEIGHT_DECAY")), weight_bias=config.get("LOSS_OPTIMIZER.WEIGHT_BIAS_FACTOR", config.get("OPTIMIZER.WEIGHT_BIAS_FACTOR")), gpus=NUM_GPUS) loss_optimizer = LOSS_OPT.build( loss_builder=loss_function, name=config.get("LOSS_OPTIMIZER.OPTIMIZER_NAME", config.get("OPTIMIZER.OPTIMIZER_NAME")), **json.loads( config.get("LOSS_OPTIMIZER.OPTIMIZER_KWARGS", config.get("OPTIMIZER.OPTIMIZER_KWARGS")))) logger.info("Built loss optimizer") # --------------------- INSTANTIATE OPTIMIZER ------------------------ optimizer_builder = __import__("optimizer", fromlist=["*"]) optimizer_builder = getattr( optimizer_builder, config.get("EXECUTION.OPTIMIZER_BUILDER", "OptimizerBuilder")) logger.info("Loaded {} from {} to build Optimizer model".format( config.get("EXECUTION.OPTIMIZER_BUILDER", "OptimizerBuilder"), "optimizer")) OPT = optimizer_builder( base_lr=config.get("OPTIMIZER.BASE_LR"), lr_bias=config.get("OPTIMIZER.LR_BIAS_FACTOR"), weight_decay=config.get("OPTIMIZER.WEIGHT_DECAY"), weight_bias=config.get("OPTIMIZER.WEIGHT_BIAS_FACTOR"), gpus=NUM_GPUS) optimizer = OPT.build( reid_model, config.get("OPTIMIZER.OPTIMIZER_NAME"), **json.loads(config.get("OPTIMIZER.OPTIMIZER_KWARGS"))) logger.info("Built optimizer") # --------------------- INSTANTIATE SCHEDULER ------------------------ try: # We first check if scheduler is part of torch's provided schedulers. scheduler = __import__('torch.optim.lr_scheduler', fromlist=['lr_scheduler']) scheduler = getattr(scheduler, config.get("SCHEDULER.LR_SCHEDULER")) except ( ModuleNotFoundError, AttributeError ): # If it fails, then we try to import from schedulers implemented in scheduler/ folder scheduler_ = config.get("SCHEDULER.LR_SCHEDULER") scheduler = __import__("scheduler." + scheduler_, fromlist=[scheduler_]) scheduler = getattr(scheduler, scheduler_) scheduler = scheduler(optimizer, last_epoch=-1, **json.loads(config.get("SCHEDULER.LR_KWARGS"))) logger.info("Built scheduler") # ------------------- INSTANTIATE LOSS SCHEEDULER --------------------- loss_scheduler = None if loss_optimizer is not None: # In case loss has no differentiable paramters try: loss_scheduler = __import__('torch.optim.lr_scheduler', fromlist=['lr_scheduler']) loss_scheduler = getattr( loss_scheduler, config.get("LOSS_SCHEDULER.LR_SCHEDULER", config.get("SCHEDULER.LR_SCHEDULER"))) except (ModuleNotFoundError, AttributeError): loss_scheduler_ = config.get("LOSS_SCHEDULER.LR_SCHEDULER", config.get("SCHEDULER.LR_SCHEDULER")) loss_scheduler = __import__("scheduler." + loss_scheduler_, fromlist=[loss_scheduler_]) loss_scheduler = getattr(loss_scheduler, loss_scheduler_) loss_scheduler = loss_scheduler( loss_optimizer, last_epoch=-1, **json.loads( config.get("LOSS_SCHEDULER.LR_KWARGS", config.get("SCHEDULER.LR_KWARGS")))) logger.info("Built loss scheduler") else: loss_scheduler = None # ---------------------------- SETUP BACKUP PATH ------------------------- if DRIVE_BACKUP: fl_list = glob.glob(os.path.join(CHECKPOINT_DIRECTORY, "*.pth")) else: fl_list = glob.glob(os.path.join(MODEL_SAVE_FOLDER, "*.pth")) _re = re.compile(r'.*epoch([0-9]+)\.pth') previous_stop = [ int(item[1]) for item in [_re.search(item) for item in fl_list] if item is not None ] if len(previous_stop) == 0: previous_stop = 0 logger.info("No previous stop detected. Will start from epoch 0") else: previous_stop = max(previous_stop) + 1 logger.info( "Previous stop detected. Will attempt to resume from epoch %i" % previous_stop) # --------------------- PERFORM TRAINING ------------------------ trainer = __import__("trainer", fromlist=["*"]) trainer = getattr(trainer, config.get("EXECUTION.TRAINER", "SimpleTrainer")) logger.info("Loaded {} from {} to build Trainer".format( config.get("EXECUTION.TRAINER", "SimpleTrainer"), "trainer")) loss_stepper = trainer(model=reid_model, loss_fn=loss_function, optimizer=optimizer, loss_optimizer=loss_optimizer, scheduler=scheduler, loss_scheduler=loss_scheduler, train_loader=train_generator.dataloader, test_loader=test_generator.dataloader, queries=QUERY_CLASSES, epochs=config.get("EXECUTION.EPOCHS"), logger=logger, crawler=crawler) loss_stepper.setup(step_verbose=config.get("LOGGING.STEP_VERBOSE"), save_frequency=config.get("SAVE.SAVE_FREQUENCY"), test_frequency=config.get("EXECUTION.TEST_FREQUENCY"), save_directory=MODEL_SAVE_FOLDER, save_backup=DRIVE_BACKUP, backup_directory=CHECKPOINT_DIRECTORY, gpus=NUM_GPUS, fp16=config.get("OPTIMIZER.FP16"), model_save_name=MODEL_SAVE_NAME, logger_file=LOGGER_SAVE_NAME) if mode == 'train': loss_stepper.train(continue_epoch=previous_stop) elif mode == 'test': loss_stepper.evaluate() else: raise NotImplementedError()
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.logger = generate_logger('bitmexWS_Market')
def main_worker(gpu, args): """ 模型训练、测试、转JIT、蒸馏文件制作 :param gpu: 运行的gpu id :param args: 运行超参 """ args.gpu = gpu utils.generate_logger( f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}-{gpu}.log") logging.info(f'args: {args}') # 可复现性 if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True logging.warning('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.cuda: logging.info(f"Use GPU: {args.gpu} ~") if args.distributed: args.rank = args.rank * args.gpus + gpu dist.init_process_group(backend='nccl', init_method=args.init_method, world_size=args.world_size, rank=args.rank) else: logging.info(f"Use CPU ~") # 创建/加载模型,使用预训练模型时,需要自己先下载好放到 pretrained 文件夹下,以网络名词命名 logging.info(f"=> creating model '{args.arch}'") model = my_models.get_model(args.arch, args.pretrained, num_classes=args.num_classes) # 重加载之前训练好的模型 if args.resume: if os.path.isfile(args.resume): logging.info(f"=> loading checkpoint '{args.resume}'") checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) acc = model.load_state_dict(checkpoint['state_dict']) logging.info(f'missing keys of models: {acc.missing_keys}') del checkpoint else: raise Exception( f"No checkpoint found at '{args.resume}' to be resumed") # 模型信息 image_height, image_width = args.image_size logging.info( f'Model {args.arch} input size: ({image_height}, {image_width})') utils.summary(size=(image_height, image_width), channel=3, model=model) # 模型转换:转为 torch.jit.script if args.jit: if not args.resume: raise Exception('Option --resume must specified!') applications.convert_to_jit(model, args=args) return if args.criterion == 'rank': criterion = criterions.RankingLoss(args=args) # 对比排序损失 elif args.criterion == 'emd': criterion = criterions.EMDLoss() # 推土机距离损失 elif args.criterion == 'regress': criterion = criterions.RegressionLoss() # MSE回归损失 else: raise NotImplementedError( f'Not loss function {args.criterion},only (rank, emd, regress)!') if args.cuda: if args.distributed and args.sync_bn: model = apex.parallel.convert_syncbn_model(model) torch.cuda.set_device(args.gpu) model.cuda(args.gpu) criterion = criterion.cuda(args.gpu) # 优化器:Adam > SGD > SWA(SGD > Adam) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # 可尝试优化器 # optimizer = torch.optim.SGD(model.parameters(), # args.lr, momentum=args.momentum, # weight_decay=args.weight_decay) # from optim.torchtools.optim import RangerLars, Ralamb, Novograd, LookaheadAdam, Ranger, RAdam, AdamW # optimizer = RangerLars(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # optimizer = Ralamb(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # optimizer = Novograd(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # optimizer = LookaheadAdam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # optimizer = Ranger(model_params, lr=args.lr, weight_decay=args.weight_decay) # optimizer = RAdam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # 随机均值平均优化器 # from optim.swa import SWA # optimizer = SWA(optimizer, swa_start=10, swa_freq=5, swa_lr=0.05) # 混合精度训练 if args.cuda: model, optimizer = amp.initialize(model, optimizer, opt_level="O1") model = DDP(model) else: model = torch.nn.DataParallel(model) if args.train: train_loader = dataloader.load(args, 'train') val_loader = dataloader.load(args, 'val') scheduler = LambdaLR( optimizer, lambda epoch: adjust_learning_rate(epoch, args=args)) applications.train(train_loader, val_loader, model, criterion, optimizer, scheduler, args) args.evaluate = True if args.evaluate: torch.set_flush_denormal(True) test_loader = dataloader.load(args, name='test') acc, loss, test_results = applications.test(test_loader, model, criterion, args) logging.info(f'Evaluation: * Acc@1 {acc:.3f} and loss {loss:.3f}.') logging.info(f'Evaluation results:') for result in test_results: logging.info(' '.join([str(r) for r in result])) logging.info('Evaluation Over~')
def main(): args = config.get_args() # set-up file structures and create logger log, args.results_dir = generate_logger(args, args.results_dir) print_and_log(log, args) # basic/cuda set-up torch.manual_seed(SEED) if args.gpu >= 0: assert args.gpu <= torch.cuda.device_count() torch.cuda.manual_seed(SEED) torch.cuda.set_device(args.gpu) cudnn.enabled = True cudnn.benchmark = True else: print_and_log( log, 'on cpu. you should probably use gpus with --gpu=GPU_idx') ####################################### load datasets ##################################### print_and_log(log, '-' * 100) train_loader = get_dataloader('train', args) val_loader = get_dataloader( 'val', args, shared_dictionary=train_loader.dataset.dictionary) print_and_log(log, '-' * 100) ####################################### do CCA ##################################### # flatten features for CCA if 'QA_full' in args.cca: # CCA on full dataset flat_train_features = get_flat_features(train_loader, args) if args.cca == 'QA_full_trainval': flat_val_features = get_flat_features(val_loader, args) flat_train_features = [ torch.cat((flat_train_features[i], flat_val_features[i]), dim=0) for i in range(len(flat_val_features)) ] elif 'QA_human' in args.cca: # CCA on H_t subset (i.e. subset with human relevance scores) flat_train_features = get_flat_features(train_loader, args, human_set_only=True) if args.cca == 'QA_human_trainval': flat_val_features = get_flat_features(val_loader, args, human_set_only=True) flat_train_features = [ torch.cat((flat_train_features[i], flat_val_features[i]), dim=0) for i in range(len(flat_val_features)) ] # do CCA lambdas, proj_mtxs = compute_cca(flat_train_features, k=args.k) # get train projections using learned weights train_projections = [ get_projection(v, mtx, lambdas, args.p) for (v, mtx) in zip(flat_train_features, proj_mtxs) ] proj_train_mus = [ proj.mean(dim=0).view(-1, 1) for proj in train_projections ] del flat_train_features, train_projections # compute clusters compute_clusters(train_loader, lambdas, proj_mtxs, proj_train_mus, args, log) compute_clusters(val_loader, lambdas, proj_mtxs, proj_train_mus, args, log)
def main(config, mode, weights): # Generate configuration cfg = kaptan.Kaptan(handler='yaml') config = cfg.import_config(config) # Generate logger MODEL_SAVE_NAME, MODEL_SAVE_FOLDER, LOGGER_SAVE_NAME, CHECKPOINT_DIRECTORY = utils.generate_save_names( config) logger = utils.generate_logger(MODEL_SAVE_FOLDER, LOGGER_SAVE_NAME) logger.info("*" * 40) logger.info("") logger.info("") logger.info("Using the following configuration:") logger.info(config.export("yaml", indent=4)) logger.info("") logger.info("") logger.info("*" * 40) """ SETUP IMPORTS """ #from crawlers import ReidDataCrawler #from generators import SequencedGenerator #from loss import LossBuilder NORMALIZATION_MEAN, NORMALIZATION_STD, RANDOM_ERASE_VALUE = utils.fix_generator_arguments( config) TRAINDATA_KWARGS = { "rea_value": config.get("TRANSFORMATION.RANDOM_ERASE_VALUE") } """ Load previousely saved logger, if it exists """ DRIVE_BACKUP = config.get("SAVE.DRIVE_BACKUP") if DRIVE_BACKUP: backup_logger = os.path.join(CHECKPOINT_DIRECTORY, LOGGER_SAVE_NAME) if os.path.exists(backup_logger): shutil.copy2(backup_logger, ".") else: backup_logger = None NUM_GPUS = torch.cuda.device_count() if NUM_GPUS > 1: raise RuntimeError( "Not built for multi-GPU. Please start with single-GPU.") logger.info("Found %i GPUs" % NUM_GPUS) # --------------------- BUILD GENERATORS ------------------------ # Supported integrated data sources --> MNIST, CIFAR # For BDD or others need a crawler and stuff...but we;ll deal with it later from generators import ClassedGenerator load_dataset = config.get("EXECUTION.DATASET_PRELOAD") if load_dataset in ["MNIST", "CIFAR10", "CIFAR100"]: crawler = load_dataset #dataset = torchvision.datasets.MNIST(root="./MNIST", train=True,) logger.info("No crawler necessary when using %s dataset" % crawler) else: raise NotImplementedError() train_generator = ClassedGenerator( gpus=NUM_GPUS, i_shape=config.get("DATASET.SHAPE"), \ normalization_mean=NORMALIZATION_MEAN, normalization_std=NORMALIZATION_STD, normalization_scale=1./config.get("TRANSFORMATION.NORMALIZATION_SCALE"), \ h_flip = config.get("TRANSFORMATION.H_FLIP"), t_crop=config.get("TRANSFORMATION.T_CROP"), rea=config.get("TRANSFORMATION.RANDOM_ERASE"), **TRAINDATA_KWARGS) train_generator.setup( crawler, preload_classes = config.get("EXECUTION.DATASET_PRELOAD_CLASS"), \ mode='train',batch_size=config.get("TRANSFORMATION.BATCH_SIZE"), \ workers = config.get("TRANSFORMATION.WORKERS")) logger.info("Generated training data generator") test_generator = ClassedGenerator( gpus=NUM_GPUS, i_shape=config.get("DATASET.SHAPE"), \ normalization_mean=NORMALIZATION_MEAN, normalization_std=NORMALIZATION_STD, normalization_scale=1./config.get("TRANSFORMATION.NORMALIZATION_SCALE"), \ h_flip = config.get("TRANSFORMATION.H_FLIP"), t_crop=config.get("TRANSFORMATION.T_CROP"), rea=config.get("TRANSFORMATION.RANDOM_ERASE"), **TRAINDATA_KWARGS) test_generator.setup( crawler, preload_classes = config.get("EXECUTION.DATASET_TEST_PRELOAD_CLASS"), \ mode='test',batch_size=config.get("TRANSFORMATION.BATCH_SIZE"), \ workers = config.get("TRANSFORMATION.WORKERS")) logger.info("Generated testing data generator") # --------------------- INSTANTIATE MODEL ------------------------ model_builder = __import__("models", fromlist=["*"]) model_builder = getattr(model_builder, config.get("EXECUTION.MODEL_BUILDER")) logger.info("Loaded {} from {} to build VAEGAN model".format( config.get("EXECUTION.MODEL_BUILDER"), "models")) vaegan_model = model_builder( arch=config.get("MODEL.ARCH"), base=config.get("MODEL.BASE"), \ latent_dimensions = config.get("MODEL.LATENT_DIMENSIONS"), \ **json.loads(config.get("MODEL.MODEL_KWARGS"))) logger.info("Finished instantiating model") if mode == "test": vaegan_model.load_state_dict(torch.load(weights)) vaegan_model.cuda() vaegan_model.eval() else: vaegan_model.cuda() #logger.info(torchsummary.summary(vaegan_model, input_size=(config.get("TRANSFORMATION.CHANNELS"), *config.get("DATASET.SHAPE")))) logger.info( torchsummary.summary( vaegan_model.Encoder, input_size=(config.get("TRANSFORMATION.CHANNELS"), *config.get("DATASET.SHAPE")))) logger.info( torchsummary.summary( vaegan_model.Decoder, input_size=(config.get("MODEL.LATENT_DIMENSIONS"), 1))) logger.info( torchsummary.summary( vaegan_model.LatentDiscriminator, input_size=(config.get("MODEL.LATENT_DIMENSIONS"), 1))) logger.info( torchsummary.summary( vaegan_model.Discriminator, input_size=(config.get("TRANSFORMATION.CHANNELS"), *config.get("DATASET.SHAPE")))) # --------------------- INSTANTIATE LOSS ------------------------ # ----------- NOT NEEDED. VAEGAN WILL USE BCE LOSS THROUGHOUT # loss_function = LossBuilder(loss_functions=config.get("LOSS.LOSSES"), loss_lambda=config.get("LOSS.LOSS_LAMBDAS"), loss_kwargs=config.get("LOSS.LOSS_KWARGS"), **{"logger":logger}) # logger.info("Built loss function") # --------------------- INSTANTIATE OPTIMIZER ------------------------ optimizer_builder = __import__("optimizer", fromlist=["*"]) optimizer_builder = getattr(optimizer_builder, config.get("EXECUTION.OPTIMIZER_BUILDER")) logger.info("Loaded {} from {} to build VAEGAN model".format( config.get("EXECUTION.OPTIMIZER_BUILDER"), "optimizer")) OPT = optimizer_builder(base_lr=config.get("OPTIMIZER.BASE_LR")) optimizer = OPT.build( vaegan_model, config.get("OPTIMIZER.OPTIMIZER_NAME"), **json.loads(config.get("OPTIMIZER.OPTIMIZER_KWARGS"))) logger.info("Built optimizer") # --------------------- INSTANTIATE SCHEDULER ------------------------ try: scheduler = __import__('torch.optim.lr_scheduler', fromlist=['lr_scheduler']) scheduler_ = getattr(scheduler, config.get("SCHEDULER.LR_SCHEDULER")) except (ModuleNotFoundError, AttributeError): scheduler_ = config.get("SCHEDULER.LR_SCHEDULER") scheduler = __import__("scheduler." + scheduler_, fromlist=[scheduler_]) scheduler_ = getattr(scheduler, scheduler_) scheduler = {} for base_model in [ "Encoder", "Decoder", "Discriminator", "Autoencoder", "LatentDiscriminator" ]: scheduler[base_model] = scheduler_( optimizer[base_model], last_epoch=-1, **json.loads(config.get("SCHEDULER.LR_KWARGS"))) logger.info("Built scheduler for {}".format(base_model)) # --------------------- SETUP CONTINUATION ------------------------ if DRIVE_BACKUP: fl_list = glob.glob(os.path.join(CHECKPOINT_DIRECTORY, "*.pth")) else: fl_list = glob.glob(os.path.join(MODEL_SAVE_FOLDER, "*.pth")) _re = re.compile(r'.*epoch([0-9]+)\.pth') previous_stop = [ int(item[1]) for item in [_re.search(item) for item in fl_list] if item is not None ] if len(previous_stop) == 0: previous_stop = 0 else: previous_stop = max(previous_stop) + 1 logger.info( "Previous stop detected. Will attempt to resume from epoch %i" % previous_stop) # --------------------- INSTANTIATE TRAINER ------------------------ Trainer = __import__("trainer", fromlist=["*"]) Trainer = getattr(Trainer, config.get("EXECUTION.TRAINER")) logger.info("Loaded {} from {} to build VAEGAN model".format( config.get("EXECUTION.TRAINER"), "trainer")) loss_stepper = Trainer(model=vaegan_model, loss_fn=None, optimizer=optimizer, scheduler=scheduler, train_loader=train_generator.dataloader, test_loader=test_generator.dataloader, epochs=config.get("EXECUTION.EPOCHS"), batch_size=config.get("TRANSFORMATION.BATCH_SIZE"), latent_size=config.get("MODEL.LATENT_DIMENSIONS"), logger=logger) loss_stepper.setup(step_verbose=config.get("LOGGING.STEP_VERBOSE"), save_frequency=config.get("SAVE.SAVE_FREQUENCY"), test_frequency=config.get("EXECUTION.TEST_FREQUENCY"), save_directory=MODEL_SAVE_FOLDER, save_backup=DRIVE_BACKUP, backup_directory=CHECKPOINT_DIRECTORY, gpus=NUM_GPUS, fp16=config.get("OPTIMIZER.FP16"), model_save_name=MODEL_SAVE_NAME, logger_file=LOGGER_SAVE_NAME) if mode == 'train': loss_stepper.train(continue_epoch=previous_stop) elif mode == 'test': loss_stepper.evaluate() else: raise NotImplementedError()