Exemple #1
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch Deep Learning")
    parser.add_argument("--config",
                        default="",
                        help="path to config file",
                        type=str)
    parser.add_argument('--test',
                        action='store_true',
                        help='testing the algorithm')
    parser.add_argument("opts",
                        help="Modify config options using the command-line",
                        default=None,
                        nargs=argparse.REMAINDER)

    args = parser.parse_args()
    if args.config != "":
        cfg.merge_from_file(args.config)
    cfg.merge_from_list(args.opts)
    build_output(cfg, args.config)
    logger = setup_logger(cfg.OUTPUT_DIR)
    deploy_macro(cfg)

    loader = LoaderFactory.produce(cfg)
    graph = SPOS(cfg)
    graph.load(path=cfg.RESUME)
    if args.test:
        test_genetic_search(cfg, graph, loader['val'], loader['train'], logger)
    else:
        genetic_search(cfg, graph, loader['val'], loader['train'], logger)
Exemple #2
0
def main():
    global cfgs

    parser = ArgumentParser()
    parser.add_argument('--exp-name', type=str, default='default')
    parser.add_argument('--env', type=str)
    parser.add_argument('--controller', type=str, default='mppi')
    parser.add_argument('--train-seed', type=int, default=0)
    parser.add_argument('--snapshot-mode', type=str, default='last')
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--iter', type=int, default=None)
    parser.add_argument('--render', default=False, action='store_true')
    parser.add_argument('--num-threads', type=int, default=1)
    parser.add_argument('--debug', default=False, action='store_true')
    args = parser.parse_args()

    cfgs['test_task'] = args.env

    train_log_dir = create_log_dir(first_time=False, exp_prefix=args.exp_name, seed=args.train_seed)
    cfgs.update(load_cfgs(train_log_dir))

    logger = setup_logger(first_time=False, exp_prefix=args.exp_name, seed=args.train_seed,
        cfgs=cfgs, snapshot_mode=args.snapshot_mode, snapshot_gap=None)

    test_task = gym.make(env_dict[cfgs['test_task']])
    sample_train_task = gym.make(env_dict[cfgs['train_tasks'][0]])
    # check if test task has same dimensions of observation and action as training tasks
    ob_shape, ac_shape = check_task([test_task, sample_train_task])
    del sample_train_task

    model = Net(input_shape=ob_shape + ac_shape, output_shape=ob_shape, **cfgs['net'])
    controller = controller_dict[args.controller](num_threads=args.num_threads, **cfgs['controller'])

    algo = MBMRL(None, model, controller, logger, num_threads=1, **cfgs['train'])
    algo.test(test_task, load_iter=args.iter, render=args.render, debug=args.debug, **cfgs['test'])
def main():
    parser = argparse.ArgumentParser(description="PyTorch Deep Learning")
    parser.add_argument("--config",
                        default="",
                        help="path to config file",
                        type=str)
    parser.add_argument('--products',
                        action='store_true',
                        help='list available products in all factories')
    parser.add_argument("opts",
                        help="Modify config options using the command-line",
                        default=None,
                        nargs=argparse.REMAINDER)

    args = parser.parse_args()

    if args.products:
        show_products()

    if args.config != "":
        cfg.merge_from_file(args.config)
    cfg.merge_from_list(args.opts)
    build_output(cfg, args.config)
    logger = setup_logger(cfg.OUTPUT_DIR)
    deploy_macro(cfg)

    loader = LoaderFactory.produce(cfg)
    graph = GraphFactory.produce(cfg)
    solver = Solver(cfg, graph.model.named_parameters())

    graph.model = graph.model.cuda()

    p = next(iter(graph.model.parameters()))
    print(p.is_cuda)

    graph.model, solver.opt = amp.initialize(graph.model,
                                             solver.opt,
                                             opt_level='O3',
                                             keep_batchnorm_fp32=True)

    graph.use_multigpu()
    p = next(iter(graph.model.parameters()))
    print(p.dtype)

    batch = next(iter(loader['val']))
    for key in batch:
        batch[key] = batch[key].cuda()

    solver.zero_grad()
    outputs = graph.model(batch['inp'])

    loss, losses = graph.loss_head(outputs, batch)
    with amp.scale_loss(loss, solver.opt) as scaled_loss:
        scaled_loss.backward()
    solver.step()
 def train_with_tune(config, reporter):
     build_output(cfg, args.config)
     logger = setup_logger(cfg.OUTPUT_DIR)
     logger.info(cfg.OUTPUT_DIR)
     cfg.SOLVER.MOMENTUM = np.asscalar(config['momentum'])
     cfg.SOLVER.BASE_LR = np.asscalar(config['lr'])
     cfg.SOLVER.WARMRESTART_PERIOD = int(
         np.asscalar(config['restart_period']))
     trainer = get_trainer(cfg.TRAINER)(cfg)
     trainer.train()
     acc = trainer.acc
     reporter(mean_accuracy=acc)
Exemple #5
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch Deep Learning")
    parser.add_argument("--config",
                        default="",
                        help="path to config file",
                        type=str)
    parser.add_argument('--products',
                        action='store_true',
                        help='list available products in all factories')
    parser.add_argument('--cfg',
                        action='store_true',
                        help='list available configs in YAML')
    parser.add_argument("opts",
                        help="Modify config options using the command-line",
                        default=None,
                        nargs=argparse.REMAINDER)

    args = parser.parse_args()

    if args.products:
        show_products()

    if args.cfg:
        show_configs()

    if args.config != "":
        cfg.merge_from_file(args.config)
    cfg.merge_from_list(args.opts)
    build_output(cfg, args.config)
    logger = setup_logger(cfg.OUTPUT_DIR)
    deploy_macro(cfg)
    trainer = TrainerFactory.produce(cfg)

    logger.info("Running with config")

    if cfg.EVALUATE:
        trainer.test()
        sys.exit()

    try:
        trainer.train()
    except:
        logger.info("Unexpected Error Occurred")
        if cfg.SAVE:
            logger.info("Back up the Checkpoint")
            trainer.graph.save(trainer.graph.save_path, trainer.graph.model,
                               trainer.graph.sub_models, trainer.solvers,
                               trainer.engine.epoch, trainer.engine.accu)
        logger.info(traceback.format_exc())
        sys.exit(1)
Exemple #6
0
def main():
    global cfgs

    parser = ArgumentParser()
    parser.add_argument('--exp-name', type=str, default='default')
    parser.add_argument('--env', nargs='+', type=str)
    parser.add_argument('--controller', type=str, default='mppi')
    parser.add_argument('--snapshot-mode', type=str, default='last')
    parser.add_argument('--snapshot-gap', type=int, default=1)
    parser.add_argument('--resume', default=False, action='store_true')
    parser.add_argument('--iter', type=int, default=None)
    parser.add_argument('--num-threads', type=int, default=1)
    args = parser.parse_args()

    cfgs['train_tasks'] = args.env

    logger = setup_logger(first_time=not args.resume,
                          exp_prefix=args.exp_name,
                          seed=train_cfg['seed'],
                          cfgs=cfgs,
                          snapshot_mode=args.snapshot_mode,
                          snapshot_gap=args.snapshot_gap)

    # load configs when resume training
    if args.resume:
        cfgs = load_cfgs(logger.get_log_dir())

    train_tasks = [gym.make(env_dict[t]) for t in cfgs['train_tasks']]

    # get shape of task space
    ob_shape, ac_shape = check_task(train_tasks)

    # build dynamics network
    model = Net(input_shape=ob_shape + ac_shape,
                output_shape=ob_shape,
                **cfgs['net'])

    # build controller for adaptation
    controller = controller_dict[args.controller](**cfgs['controller'])

    # train model-based meta RL
    algo = MBMRL(tasks=train_tasks,
                 model=model,
                 controller=controller,
                 logger=logger,
                 num_threads=args.num_threads,
                 **cfgs['train'])
    algo.debug()
Exemple #7
0
def test_benchmark(model_path, cfg, logger=None):
    model = build_model.build(cfg)

    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['model'])
    model = model.cuda()
    model.eval()
    model_name = model_path.split('/')[-1]
    dataset_name = cfg.DATASETS.NAMES
    if logger == None:
        logger = setup_logger("person search", cfg.OUTPUT_DIR,
                              'TEST-{}.txt'.format(model_name), 0)
    logger.info(cfg)
    print(Color('B') + '')
    logger.info("start test")
    logger.info("dataset:{0},model_path:{1}".format(dataset_name, model_path))
    gallery_loader = get_data_loader(cfg, mode='test')
    query_loader = get_data_loader(cfg, mode='probe')
    imgnames_with_boxes, boxes_feats, probe_feats = inference(
        model, gallery_loader, query_loader, 'cuda')
    precision, recall, det_rate, det_ap = detection_performance_calc(
        gallery_loader.dataset,
        imgnames_with_boxes.values(),
        det_thresh=0.01,
        logger=logger)
    ret = gallery_loader.dataset.search_performance_calc(
        gallery_loader.dataset,
        query_loader.dataset,
        imgnames_with_boxes.values(),
        boxes_feats,
        probe_feats,
        det_thresh=0.5,
        gallery_size=gallery_loader.dataset.test_size,
        logger=logger)

    return det_rate, det_ap, ret['mAP'], ret['accs'][0]
def main():
    parser = argparse.ArgumentParser(description="PyTorch Deep Learning")
    parser.add_argument("--config",
                        default="",
                        help="path to config file",
                        type=str)
    parser.add_argument('--list',
                        action='store_true',
                        help='list available config in factories')
    parser.add_argument("opts",
                        help="Modify config options using the command-line",
                        default=None,
                        nargs=argparse.REMAINDER)

    args = parser.parse_args()
    if args.config != "":
        cfg.merge_from_file(args.config)
    cfg.merge_from_list(args.opts)
    build_output(cfg, args.config)
    logger = setup_logger(cfg.OUTPUT_DIR)
    deploy_macro(cfg)

    loader = LoaderFactory.produce(cfg)

    graph = SPOS(cfg)
    evolution = Evolution(cfg=cfg,
                          graph=graph,
                          num_batches=len(loader['train']),
                          logger=logger)

    graph.use_multigpu()
    solver = Solver(cfg, graph.model.named_parameters())

    manager = multiprocessing.Manager()
    cand_pool = manager.list()
    lock = manager.Lock()

    best_accu = 0.0

    for epoch in range(cfg.SOLVER.START_EPOCH, cfg.SOLVER.MAX_EPOCHS):

        finished = Value(c_bool, False)
        pool_process = multiprocessing.Process(
            target=evolution.maintain,
            args=[
                epoch - cfg.SPOS.EPOCH_TO_CS, cand_pool, lock, finished, logger
            ])
        pool_process.start()
        train_once(logger,
                   epoch,
                   graph,
                   loader['train'],
                   solver,
                   pool=cand_pool,
                   pool_lock=lock,
                   shared_finished_flag=finished)
        pool_process.join()

        test_accu = test_once(logger, epoch, graph, loader['val'],
                              loader['train'])
        if test_accu > best_accu:
            best_accu = test_accu
            logger.info(
                f"Epoch [{epoch:03}]   Best Accuracy [{best_accu:3.3f}]")

        graph.save(graph.save_path,
                   graph.model,
                   solvers={'main': solver},
                   epoch=epoch,
                   metric=test_accu)
def main():
    parser = argparse.ArgumentParser(description="PyTorch Deep Learning")
    parser.add_argument(
        "--local_rank",
        type=int,
        help=
        "Local rank. Necessary for using the torch.distributed.launch utility."
    )
    parser.add_argument("--config",
                        default="",
                        help="path to config file",
                        type=str)
    parser.add_argument('--products',
                        action='store_true',
                        help='list available products in all factories')
    parser.add_argument('--cfg',
                        action='store_true',
                        help='list available configs in YAML')
    parser.add_argument("opts",
                        help="Modify config options using the command-line",
                        default=None,
                        nargs=argparse.REMAINDER)

    args = parser.parse_args()

    if args.products:
        show_products()

    if args.cfg:
        show_configs()

    dist.init_process_group(backend='nccl')
    rank = dist.get_rank()
    assert rank == args.local_rank

    if args.config != "":
        cfg.merge_from_file(args.config)
    cfg.merge_from_list(args.opts)

    if args.local_rank != 0:
        time.sleep(5)
        cfg.IO = False
        cfg.SAVE = False
        build_output(cfg, args.config, find_existing_path=True)
        logger = logging.getLogger("Logger")
    else:
        build_output(cfg, args.config)
        logger = setup_logger(cfg.OUTPUT_DIR)

    deploy_macro(cfg)
    logger.info(f"Rank [{rank}] Start!")
    device = torch.device("cuda:{}".format(rank))
    torch.cuda.set_device(device)
    trainer = TrainerFactory.produce(cfg)

    if cfg.EVALUATE:
        trainer.test()
        sys.exit()

    try:
        trainer.train()
    except:
        logger.info("Unexpected Error Occurred")
        if cfg.SAVE:
            logger.info("Back up the Checkpoint")
            trainer.graph.save(trainer.graph.save_path, trainer.graph.model,
                               trainer.graph.sub_models, trainer.solvers,
                               trainer.engine.epoch, trainer.engine.accu)
        logger.info(traceback.format_exc())
        sys.exit(1)
def main(config):

    # loaders and base
    loaders = Loaders(config)
    base = Base(config, loaders)

    # make dirs
    make_dirs(config.save_images_path)
    make_dirs(config.save_wp_models_path)
    make_dirs(config.save_st_models_path)
    make_dirs(config.save_features_path)

    logger = setup_logger('adaptation_reid', config.output_path, if_train=True)

    if config.mode == 'train':

        if config.resume:
            # automatically resume model from the latest one
            if config.resume_epoch_num == 0:
                start_train_epoch = 0
                root, _, files = os_walk(config.save_models_path)
                if len(files) > 0:
                    # get indexes of saved models
                    indexes = []
                    for file in files:
                        indexes.append(
                            int(file.replace('.pkl', '').split('_')[-1]))

                    # remove the bad-case and get available indexes
                    model_num = len(base.model_list)
                    available_indexes = copy.deepcopy(indexes)
                    for element in indexes:
                        if indexes.count(element) < model_num:
                            available_indexes.remove(element)

                    available_indexes = sorted(list(set(available_indexes)),
                                               reverse=True)
                    unavailable_indexes = list(
                        set(indexes).difference(set(available_indexes)))

                    if len(available_indexes
                           ) > 0:  # resume model from the latest model
                        base.resume_model(available_indexes[0])
                        start_train_epoch = available_indexes[0] + 1
                        logger.info(
                            'Time: {}, automatically resume training from the latest step (model {})'
                            .format(time_now(), available_indexes[0]))
                    else:  #
                        logger.info('Time: {}, there are no available models')
            else:
                start_train_epoch = config.resume_epoch_num
        else:
            start_train_epoch = 0

        # main loop
        for current_epoch in range(
                start_train_epoch, config.warmup_reid_epoches +
                config.warmup_gan_epoches + config.warmup_adaptation_epoches):

            # train
            if current_epoch < config.warmup_reid_epoches:  # warmup reid model
                results = train_an_epoch(config,
                                         0,
                                         loaders,
                                         base,
                                         current_epoch,
                                         train_gan=True,
                                         train_reid=True,
                                         self_training=False,
                                         optimize_sl_enc=True,
                                         train_adaptation=False)
            elif current_epoch < config.warmup_reid_epoches + config.warmup_gan_epoches:  # warmup GAN model
                results = train_an_epoch(config,
                                         0,
                                         loaders,
                                         base,
                                         current_epoch,
                                         train_gan=True,
                                         train_reid=False,
                                         self_training=False,
                                         optimize_sl_enc=False,
                                         train_adaptation=False)  # joint train
            elif current_epoch < config.warmup_reid_epoches + config.warmup_gan_epoches + config.warmup_adaptation_epoches:  #warmup adaptation
                results = train_an_epoch(config,
                                         0,
                                         loaders,
                                         base,
                                         current_epoch,
                                         train_gan=True,
                                         train_reid=False,
                                         self_training=False,
                                         optimize_sl_enc=False,
                                         train_adaptation=True)

            print("another epoch")
            logger.info('Time: {};  Epoch: {};  {}'.format(
                time_now(), current_epoch, results))
            # save model
            if current_epoch % config.save_model_interval == 0:
                base.save_model(current_epoch, True)

            if current_epoch % config.test_model_interval == 0:
                visualize(config, loaders, base, current_epoch)
                test(config, base, loaders, epoch=0, brief=False)

        total_wp_epoches = config.warmup_reid_epoches + config.warmup_gan_epoches

        for iter_n in range(config.iteration_number):
            src_dataset, src_dataloader, trg_dataset, trg_dataloader = loaders.get_self_train_loaders(
            )

            trg_labeled_dataloader = generate_labeled_dataset(
                base, iter_n, src_dataset, src_dataloader, trg_dataset,
                trg_dataloader)
            for epoch in range(total_wp_epoches + 1, config.self_train_epoch):
                results = train_an_epoch(
                    config,
                    iter_n,
                    loaders,
                    base,
                    epoch,
                    train_gan=True,
                    train_reid=False,
                    self_training=True,
                    optimize_sl_enc=True,
                    trg_labeled_loader=trg_labeled_dataloader)
                logger.info('Time: {};  Epoch: {};  {}'.format(
                    time_now(), current_epoch, results))

                if epoch % config.save_model_interval == 0:
                    base.save_model(iter_n * config.self_train_epoch + epoch,
                                    False)

    elif config.mode == 'test':
        # resume from pre-trained model and test
        base.resume_model_from_path(config.pretrained_model_path,
                                    config.pretrained_model_epoch)
        cmc, map = test(config, base, loaders, epoch=100, brief=False)
Exemple #11
0
from tools import config
from tools.logger import get_logger, setup_logger
from apscheduler.schedulers.blocking import BlockingScheduler
from downalod_manager.download_job_manager import manage_download_execution

setup_logger()
logger = get_logger()
logger.debug("")
logger.debug("execution started: ")
search_params = config.seach_params.copy()
scheduler = BlockingScheduler()
job = scheduler.add_job(manage_download_execution,
                        'interval',
                        args=[search_params],
                        minutes=1,
                        name="scraper_manager")
scheduler.start()
Exemple #12
0
def main(cfg, config_file=None):
    #model

    model = build_model.build(cfg)
    cudnn_benchmark = False
    torch.cuda.manual_seed(1)

    dataloader = get_data_loader(cfg, mode='train')
    if cfg.RESUME:
        print('resume from:', cfg.RESUME)
        checkpoint = torch.load(cfg.RESUME)
        model.load_state_dict(checkpoint['model'])
        opt = get_optimizer(cfg, model)
        opt = opt.load_state_dict(checkpoint['optimizer'])
        scheduler = get_lr_scheduler(cfg, opt)
        scheduler = scheduler.load_state_dict(checkpoint['lr_scheduler'])
        start_epoch = checkpoint['epoch'] + 1
        iter_count = checkpoint['iter_count']
        logger_dir = checkpoint['logger_dir']
    else:
        print('start from beginning')
        opt = get_optimizer(cfg, model)
        scheduler = get_lr_scheduler(cfg, opt)

        start_epoch = 0
        iter_count = 0
        logger_dir = osp.join(cfg.OUTPUT_DIR, time.asctime()[4:])

    #logger

    if not osp.exists(logger_dir):
        os.mkdir(logger_dir)
    if config_file is not None:
        copy(config_file, logger_dir)
    logger = setup_logger("person search", logger_dir,
                          'log.txt'.format(time.asctime()[4:]), 0)
    logger.info(cfg)

    model = model.cuda()

    def inplace_relu(m):
        classname = m.__class__.__name__
        if classname.find('ReLU') != -1:
            m.inplace = True

    model.apply(inplace_relu)

    if cfg.APEX != '':
        logger.info("current use apex")
        APEX = True
        # model.roi_heads.box_roi_pool.forward = \
        #     amp.half_function(model.roi_heads.box_roi_pool.forward)
        model, opt = amp.initialize(model, opt, opt_level="O2")
        # if cfg.MODEL.BACKBONE == 'DE_FasterRCNN_OIM':
        #     model.roi_heads.de_box_roi_pool.forward= \
        #         amp.half_function(model.roi_heads.de_box_roi_pool.forward)
    else:
        APEX = False
    iter_time = int(dataloader.dataset.__len__() / dataloader.batch_size)
    writer = SummaryWriter(logger_dir)
    color = [Color('G'), Color('M'), Color('R'), Color('Y'), Color('B')]

    for epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCHS):
        loss_dict = 0

        for i, (img, target) in enumerate(dataloader):
            start_time = time.time()
            iterval = 100

            img, target = ship_data_to_cuda(img, target, 'cuda')
            result = model(img, target)
            result['loss_reid'] = 1 * result['loss_reid']
            losses = sum(loss for loss in result.values())
            keys = list(result.keys())

            if (i + 1) % iterval == 0:
                print('\n{} '.format(Color('G')))
                logger.info(
                    'epoch:{0},iter:[{1}/{2}],losses:{3},lr:{4},time_per_batch:{5:.3f}'
                    .format(epoch, i, iter_time, loss_dict / iterval,
                            opt.param_groups[0]['lr'],
                            time.time() - start_time))
                logger.info(
                    'loss_detection:{0:.2f};loss_box_reg:{1:.2f};'
                    'loss_reid:{2:.2f};loss_objectness:{3:.2f};loss_rpn_box_reg:{4:.2f};'
                    .format(*[result[key].item() for key in keys]))
                for key in keys:
                    writer.add_scalar('{}'.format(key), result[key].item(),
                                      iter_count)
                loss_dict = 0
            else:
                loss_dict += losses
            opt.zero_grad()
            if APEX:
                with amp.scale_loss(losses, opt) as scaled_loss:
                    scaled_loss.backward()
            else:
                losses.backward()
            opt.step()
            iter_count += 1

        scheduler.step()
        save_path = osp.join(logger_dir, 'ep{}.pth'.format(epoch))
        torch.save(
            {
                'epoch': epoch,
                'model': model.state_dict(),
                'optimizer': opt.state_dict(),
                'lr_scheduler': scheduler.state_dict(),
                'iter_count': iter_count,
                'logger_dir': logger_dir
            }, save_path)
        det_rate, det_ap, map, rank1 = test_benchmark(save_path, cfg, logger)
        writer.add_scalar('det_rate', det_rate, epoch)
        writer.add_scalar('det_ap', det_ap, epoch)
        writer.add_scalar('map', map, epoch)
        writer.add_scalar('rank1', rank1, epoch)
    writer.close()
Exemple #13
0
import numpy as np
import scipy.io as sio
import os
from tools.logger import setup_logger
import torch
from torchvision.utils import save_image
import logging
from tools import *
from tools.metrics import R1_mAP_eval

logger = setup_logger('adapatation_test','./out/base',if_train= False)

def test(config, base, loaders,epoch, brief=False):

	base.set_eval()
	evaluator = R1_mAP_eval(num_query= loaders.num_query_target, max_rank= 50, feat_norm= 'yes', eval_dataset_name = config.target_dataset_name)
	evaluator.reset()
	for n_iter, (img,fname, vid, camid) in enumerate(loaders.target_val_loader):
		#print("first iter test")
		with torch.no_grad():
			img = img.to(base.device)
			feature_maps, feat, _ = base.encoder(img,True,False)
			evaluator.update((feat, vid, camid))
			# print(feat.requires_grad)
			# print("vid requires gradient ",vid.requires_grad)
	cmc, mAP = evaluator.compute()
	logger.info("Validation Results - Epoch: {}".format(epoch))
	logger.info("mAP: {:.1%}".format(mAP))
	for r in [1, 5, 10]:
		logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1]))