def main(): parser = argparse.ArgumentParser(description="PyTorch Deep Learning") parser.add_argument("--config", default="", help="path to config file", type=str) parser.add_argument('--test', action='store_true', help='testing the algorithm') parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.config != "": cfg.merge_from_file(args.config) cfg.merge_from_list(args.opts) build_output(cfg, args.config) logger = setup_logger(cfg.OUTPUT_DIR) deploy_macro(cfg) loader = LoaderFactory.produce(cfg) graph = SPOS(cfg) graph.load(path=cfg.RESUME) if args.test: test_genetic_search(cfg, graph, loader['val'], loader['train'], logger) else: genetic_search(cfg, graph, loader['val'], loader['train'], logger)
def main(): global cfgs parser = ArgumentParser() parser.add_argument('--exp-name', type=str, default='default') parser.add_argument('--env', type=str) parser.add_argument('--controller', type=str, default='mppi') parser.add_argument('--train-seed', type=int, default=0) parser.add_argument('--snapshot-mode', type=str, default='last') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--iter', type=int, default=None) parser.add_argument('--render', default=False, action='store_true') parser.add_argument('--num-threads', type=int, default=1) parser.add_argument('--debug', default=False, action='store_true') args = parser.parse_args() cfgs['test_task'] = args.env train_log_dir = create_log_dir(first_time=False, exp_prefix=args.exp_name, seed=args.train_seed) cfgs.update(load_cfgs(train_log_dir)) logger = setup_logger(first_time=False, exp_prefix=args.exp_name, seed=args.train_seed, cfgs=cfgs, snapshot_mode=args.snapshot_mode, snapshot_gap=None) test_task = gym.make(env_dict[cfgs['test_task']]) sample_train_task = gym.make(env_dict[cfgs['train_tasks'][0]]) # check if test task has same dimensions of observation and action as training tasks ob_shape, ac_shape = check_task([test_task, sample_train_task]) del sample_train_task model = Net(input_shape=ob_shape + ac_shape, output_shape=ob_shape, **cfgs['net']) controller = controller_dict[args.controller](num_threads=args.num_threads, **cfgs['controller']) algo = MBMRL(None, model, controller, logger, num_threads=1, **cfgs['train']) algo.test(test_task, load_iter=args.iter, render=args.render, debug=args.debug, **cfgs['test'])
def main(): parser = argparse.ArgumentParser(description="PyTorch Deep Learning") parser.add_argument("--config", default="", help="path to config file", type=str) parser.add_argument('--products', action='store_true', help='list available products in all factories') parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.products: show_products() if args.config != "": cfg.merge_from_file(args.config) cfg.merge_from_list(args.opts) build_output(cfg, args.config) logger = setup_logger(cfg.OUTPUT_DIR) deploy_macro(cfg) loader = LoaderFactory.produce(cfg) graph = GraphFactory.produce(cfg) solver = Solver(cfg, graph.model.named_parameters()) graph.model = graph.model.cuda() p = next(iter(graph.model.parameters())) print(p.is_cuda) graph.model, solver.opt = amp.initialize(graph.model, solver.opt, opt_level='O3', keep_batchnorm_fp32=True) graph.use_multigpu() p = next(iter(graph.model.parameters())) print(p.dtype) batch = next(iter(loader['val'])) for key in batch: batch[key] = batch[key].cuda() solver.zero_grad() outputs = graph.model(batch['inp']) loss, losses = graph.loss_head(outputs, batch) with amp.scale_loss(loss, solver.opt) as scaled_loss: scaled_loss.backward() solver.step()
def train_with_tune(config, reporter): build_output(cfg, args.config) logger = setup_logger(cfg.OUTPUT_DIR) logger.info(cfg.OUTPUT_DIR) cfg.SOLVER.MOMENTUM = np.asscalar(config['momentum']) cfg.SOLVER.BASE_LR = np.asscalar(config['lr']) cfg.SOLVER.WARMRESTART_PERIOD = int( np.asscalar(config['restart_period'])) trainer = get_trainer(cfg.TRAINER)(cfg) trainer.train() acc = trainer.acc reporter(mean_accuracy=acc)
def main(): parser = argparse.ArgumentParser(description="PyTorch Deep Learning") parser.add_argument("--config", default="", help="path to config file", type=str) parser.add_argument('--products', action='store_true', help='list available products in all factories') parser.add_argument('--cfg', action='store_true', help='list available configs in YAML') parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.products: show_products() if args.cfg: show_configs() if args.config != "": cfg.merge_from_file(args.config) cfg.merge_from_list(args.opts) build_output(cfg, args.config) logger = setup_logger(cfg.OUTPUT_DIR) deploy_macro(cfg) trainer = TrainerFactory.produce(cfg) logger.info("Running with config") if cfg.EVALUATE: trainer.test() sys.exit() try: trainer.train() except: logger.info("Unexpected Error Occurred") if cfg.SAVE: logger.info("Back up the Checkpoint") trainer.graph.save(trainer.graph.save_path, trainer.graph.model, trainer.graph.sub_models, trainer.solvers, trainer.engine.epoch, trainer.engine.accu) logger.info(traceback.format_exc()) sys.exit(1)
def main(): global cfgs parser = ArgumentParser() parser.add_argument('--exp-name', type=str, default='default') parser.add_argument('--env', nargs='+', type=str) parser.add_argument('--controller', type=str, default='mppi') parser.add_argument('--snapshot-mode', type=str, default='last') parser.add_argument('--snapshot-gap', type=int, default=1) parser.add_argument('--resume', default=False, action='store_true') parser.add_argument('--iter', type=int, default=None) parser.add_argument('--num-threads', type=int, default=1) args = parser.parse_args() cfgs['train_tasks'] = args.env logger = setup_logger(first_time=not args.resume, exp_prefix=args.exp_name, seed=train_cfg['seed'], cfgs=cfgs, snapshot_mode=args.snapshot_mode, snapshot_gap=args.snapshot_gap) # load configs when resume training if args.resume: cfgs = load_cfgs(logger.get_log_dir()) train_tasks = [gym.make(env_dict[t]) for t in cfgs['train_tasks']] # get shape of task space ob_shape, ac_shape = check_task(train_tasks) # build dynamics network model = Net(input_shape=ob_shape + ac_shape, output_shape=ob_shape, **cfgs['net']) # build controller for adaptation controller = controller_dict[args.controller](**cfgs['controller']) # train model-based meta RL algo = MBMRL(tasks=train_tasks, model=model, controller=controller, logger=logger, num_threads=args.num_threads, **cfgs['train']) algo.debug()
def test_benchmark(model_path, cfg, logger=None): model = build_model.build(cfg) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['model']) model = model.cuda() model.eval() model_name = model_path.split('/')[-1] dataset_name = cfg.DATASETS.NAMES if logger == None: logger = setup_logger("person search", cfg.OUTPUT_DIR, 'TEST-{}.txt'.format(model_name), 0) logger.info(cfg) print(Color('B') + '') logger.info("start test") logger.info("dataset:{0},model_path:{1}".format(dataset_name, model_path)) gallery_loader = get_data_loader(cfg, mode='test') query_loader = get_data_loader(cfg, mode='probe') imgnames_with_boxes, boxes_feats, probe_feats = inference( model, gallery_loader, query_loader, 'cuda') precision, recall, det_rate, det_ap = detection_performance_calc( gallery_loader.dataset, imgnames_with_boxes.values(), det_thresh=0.01, logger=logger) ret = gallery_loader.dataset.search_performance_calc( gallery_loader.dataset, query_loader.dataset, imgnames_with_boxes.values(), boxes_feats, probe_feats, det_thresh=0.5, gallery_size=gallery_loader.dataset.test_size, logger=logger) return det_rate, det_ap, ret['mAP'], ret['accs'][0]
def main(): parser = argparse.ArgumentParser(description="PyTorch Deep Learning") parser.add_argument("--config", default="", help="path to config file", type=str) parser.add_argument('--list', action='store_true', help='list available config in factories') parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.config != "": cfg.merge_from_file(args.config) cfg.merge_from_list(args.opts) build_output(cfg, args.config) logger = setup_logger(cfg.OUTPUT_DIR) deploy_macro(cfg) loader = LoaderFactory.produce(cfg) graph = SPOS(cfg) evolution = Evolution(cfg=cfg, graph=graph, num_batches=len(loader['train']), logger=logger) graph.use_multigpu() solver = Solver(cfg, graph.model.named_parameters()) manager = multiprocessing.Manager() cand_pool = manager.list() lock = manager.Lock() best_accu = 0.0 for epoch in range(cfg.SOLVER.START_EPOCH, cfg.SOLVER.MAX_EPOCHS): finished = Value(c_bool, False) pool_process = multiprocessing.Process( target=evolution.maintain, args=[ epoch - cfg.SPOS.EPOCH_TO_CS, cand_pool, lock, finished, logger ]) pool_process.start() train_once(logger, epoch, graph, loader['train'], solver, pool=cand_pool, pool_lock=lock, shared_finished_flag=finished) pool_process.join() test_accu = test_once(logger, epoch, graph, loader['val'], loader['train']) if test_accu > best_accu: best_accu = test_accu logger.info( f"Epoch [{epoch:03}] Best Accuracy [{best_accu:3.3f}]") graph.save(graph.save_path, graph.model, solvers={'main': solver}, epoch=epoch, metric=test_accu)
def main(): parser = argparse.ArgumentParser(description="PyTorch Deep Learning") parser.add_argument( "--local_rank", type=int, help= "Local rank. Necessary for using the torch.distributed.launch utility." ) parser.add_argument("--config", default="", help="path to config file", type=str) parser.add_argument('--products', action='store_true', help='list available products in all factories') parser.add_argument('--cfg', action='store_true', help='list available configs in YAML') parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.products: show_products() if args.cfg: show_configs() dist.init_process_group(backend='nccl') rank = dist.get_rank() assert rank == args.local_rank if args.config != "": cfg.merge_from_file(args.config) cfg.merge_from_list(args.opts) if args.local_rank != 0: time.sleep(5) cfg.IO = False cfg.SAVE = False build_output(cfg, args.config, find_existing_path=True) logger = logging.getLogger("Logger") else: build_output(cfg, args.config) logger = setup_logger(cfg.OUTPUT_DIR) deploy_macro(cfg) logger.info(f"Rank [{rank}] Start!") device = torch.device("cuda:{}".format(rank)) torch.cuda.set_device(device) trainer = TrainerFactory.produce(cfg) if cfg.EVALUATE: trainer.test() sys.exit() try: trainer.train() except: logger.info("Unexpected Error Occurred") if cfg.SAVE: logger.info("Back up the Checkpoint") trainer.graph.save(trainer.graph.save_path, trainer.graph.model, trainer.graph.sub_models, trainer.solvers, trainer.engine.epoch, trainer.engine.accu) logger.info(traceback.format_exc()) sys.exit(1)
def main(config): # loaders and base loaders = Loaders(config) base = Base(config, loaders) # make dirs make_dirs(config.save_images_path) make_dirs(config.save_wp_models_path) make_dirs(config.save_st_models_path) make_dirs(config.save_features_path) logger = setup_logger('adaptation_reid', config.output_path, if_train=True) if config.mode == 'train': if config.resume: # automatically resume model from the latest one if config.resume_epoch_num == 0: start_train_epoch = 0 root, _, files = os_walk(config.save_models_path) if len(files) > 0: # get indexes of saved models indexes = [] for file in files: indexes.append( int(file.replace('.pkl', '').split('_')[-1])) # remove the bad-case and get available indexes model_num = len(base.model_list) available_indexes = copy.deepcopy(indexes) for element in indexes: if indexes.count(element) < model_num: available_indexes.remove(element) available_indexes = sorted(list(set(available_indexes)), reverse=True) unavailable_indexes = list( set(indexes).difference(set(available_indexes))) if len(available_indexes ) > 0: # resume model from the latest model base.resume_model(available_indexes[0]) start_train_epoch = available_indexes[0] + 1 logger.info( 'Time: {}, automatically resume training from the latest step (model {})' .format(time_now(), available_indexes[0])) else: # logger.info('Time: {}, there are no available models') else: start_train_epoch = config.resume_epoch_num else: start_train_epoch = 0 # main loop for current_epoch in range( start_train_epoch, config.warmup_reid_epoches + config.warmup_gan_epoches + config.warmup_adaptation_epoches): # train if current_epoch < config.warmup_reid_epoches: # warmup reid model results = train_an_epoch(config, 0, loaders, base, current_epoch, train_gan=True, train_reid=True, self_training=False, optimize_sl_enc=True, train_adaptation=False) elif current_epoch < config.warmup_reid_epoches + config.warmup_gan_epoches: # warmup GAN model results = train_an_epoch(config, 0, loaders, base, current_epoch, train_gan=True, train_reid=False, self_training=False, optimize_sl_enc=False, train_adaptation=False) # joint train elif current_epoch < config.warmup_reid_epoches + config.warmup_gan_epoches + config.warmup_adaptation_epoches: #warmup adaptation results = train_an_epoch(config, 0, loaders, base, current_epoch, train_gan=True, train_reid=False, self_training=False, optimize_sl_enc=False, train_adaptation=True) print("another epoch") logger.info('Time: {}; Epoch: {}; {}'.format( time_now(), current_epoch, results)) # save model if current_epoch % config.save_model_interval == 0: base.save_model(current_epoch, True) if current_epoch % config.test_model_interval == 0: visualize(config, loaders, base, current_epoch) test(config, base, loaders, epoch=0, brief=False) total_wp_epoches = config.warmup_reid_epoches + config.warmup_gan_epoches for iter_n in range(config.iteration_number): src_dataset, src_dataloader, trg_dataset, trg_dataloader = loaders.get_self_train_loaders( ) trg_labeled_dataloader = generate_labeled_dataset( base, iter_n, src_dataset, src_dataloader, trg_dataset, trg_dataloader) for epoch in range(total_wp_epoches + 1, config.self_train_epoch): results = train_an_epoch( config, iter_n, loaders, base, epoch, train_gan=True, train_reid=False, self_training=True, optimize_sl_enc=True, trg_labeled_loader=trg_labeled_dataloader) logger.info('Time: {}; Epoch: {}; {}'.format( time_now(), current_epoch, results)) if epoch % config.save_model_interval == 0: base.save_model(iter_n * config.self_train_epoch + epoch, False) elif config.mode == 'test': # resume from pre-trained model and test base.resume_model_from_path(config.pretrained_model_path, config.pretrained_model_epoch) cmc, map = test(config, base, loaders, epoch=100, brief=False)
from tools import config from tools.logger import get_logger, setup_logger from apscheduler.schedulers.blocking import BlockingScheduler from downalod_manager.download_job_manager import manage_download_execution setup_logger() logger = get_logger() logger.debug("") logger.debug("execution started: ") search_params = config.seach_params.copy() scheduler = BlockingScheduler() job = scheduler.add_job(manage_download_execution, 'interval', args=[search_params], minutes=1, name="scraper_manager") scheduler.start()
def main(cfg, config_file=None): #model model = build_model.build(cfg) cudnn_benchmark = False torch.cuda.manual_seed(1) dataloader = get_data_loader(cfg, mode='train') if cfg.RESUME: print('resume from:', cfg.RESUME) checkpoint = torch.load(cfg.RESUME) model.load_state_dict(checkpoint['model']) opt = get_optimizer(cfg, model) opt = opt.load_state_dict(checkpoint['optimizer']) scheduler = get_lr_scheduler(cfg, opt) scheduler = scheduler.load_state_dict(checkpoint['lr_scheduler']) start_epoch = checkpoint['epoch'] + 1 iter_count = checkpoint['iter_count'] logger_dir = checkpoint['logger_dir'] else: print('start from beginning') opt = get_optimizer(cfg, model) scheduler = get_lr_scheduler(cfg, opt) start_epoch = 0 iter_count = 0 logger_dir = osp.join(cfg.OUTPUT_DIR, time.asctime()[4:]) #logger if not osp.exists(logger_dir): os.mkdir(logger_dir) if config_file is not None: copy(config_file, logger_dir) logger = setup_logger("person search", logger_dir, 'log.txt'.format(time.asctime()[4:]), 0) logger.info(cfg) model = model.cuda() def inplace_relu(m): classname = m.__class__.__name__ if classname.find('ReLU') != -1: m.inplace = True model.apply(inplace_relu) if cfg.APEX != '': logger.info("current use apex") APEX = True # model.roi_heads.box_roi_pool.forward = \ # amp.half_function(model.roi_heads.box_roi_pool.forward) model, opt = amp.initialize(model, opt, opt_level="O2") # if cfg.MODEL.BACKBONE == 'DE_FasterRCNN_OIM': # model.roi_heads.de_box_roi_pool.forward= \ # amp.half_function(model.roi_heads.de_box_roi_pool.forward) else: APEX = False iter_time = int(dataloader.dataset.__len__() / dataloader.batch_size) writer = SummaryWriter(logger_dir) color = [Color('G'), Color('M'), Color('R'), Color('Y'), Color('B')] for epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCHS): loss_dict = 0 for i, (img, target) in enumerate(dataloader): start_time = time.time() iterval = 100 img, target = ship_data_to_cuda(img, target, 'cuda') result = model(img, target) result['loss_reid'] = 1 * result['loss_reid'] losses = sum(loss for loss in result.values()) keys = list(result.keys()) if (i + 1) % iterval == 0: print('\n{} '.format(Color('G'))) logger.info( 'epoch:{0},iter:[{1}/{2}],losses:{3},lr:{4},time_per_batch:{5:.3f}' .format(epoch, i, iter_time, loss_dict / iterval, opt.param_groups[0]['lr'], time.time() - start_time)) logger.info( 'loss_detection:{0:.2f};loss_box_reg:{1:.2f};' 'loss_reid:{2:.2f};loss_objectness:{3:.2f};loss_rpn_box_reg:{4:.2f};' .format(*[result[key].item() for key in keys])) for key in keys: writer.add_scalar('{}'.format(key), result[key].item(), iter_count) loss_dict = 0 else: loss_dict += losses opt.zero_grad() if APEX: with amp.scale_loss(losses, opt) as scaled_loss: scaled_loss.backward() else: losses.backward() opt.step() iter_count += 1 scheduler.step() save_path = osp.join(logger_dir, 'ep{}.pth'.format(epoch)) torch.save( { 'epoch': epoch, 'model': model.state_dict(), 'optimizer': opt.state_dict(), 'lr_scheduler': scheduler.state_dict(), 'iter_count': iter_count, 'logger_dir': logger_dir }, save_path) det_rate, det_ap, map, rank1 = test_benchmark(save_path, cfg, logger) writer.add_scalar('det_rate', det_rate, epoch) writer.add_scalar('det_ap', det_ap, epoch) writer.add_scalar('map', map, epoch) writer.add_scalar('rank1', rank1, epoch) writer.close()
import numpy as np import scipy.io as sio import os from tools.logger import setup_logger import torch from torchvision.utils import save_image import logging from tools import * from tools.metrics import R1_mAP_eval logger = setup_logger('adapatation_test','./out/base',if_train= False) def test(config, base, loaders,epoch, brief=False): base.set_eval() evaluator = R1_mAP_eval(num_query= loaders.num_query_target, max_rank= 50, feat_norm= 'yes', eval_dataset_name = config.target_dataset_name) evaluator.reset() for n_iter, (img,fname, vid, camid) in enumerate(loaders.target_val_loader): #print("first iter test") with torch.no_grad(): img = img.to(base.device) feature_maps, feat, _ = base.encoder(img,True,False) evaluator.update((feat, vid, camid)) # print(feat.requires_grad) # print("vid requires gradient ",vid.requires_grad) cmc, mAP = evaluator.compute() logger.info("Validation Results - Epoch: {}".format(epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1]))