Exemple #1
0
 def __init__(self, configFn, ctx, outFolder, threshold):
     os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
     config = importlib.import_module(
         configFn.replace('.py', '').replace('/', '.'))
     _, _, _, _, _, _, self.__pModel, _, self.__pTest, self.transform, _, _, _ = config.get_config(
         is_train=False)
     self.__pModel = patch_config_as_nothrow(self.__pModel)
     self.__pTest = patch_config_as_nothrow(self.__pTest)
     self.resizeParam = (800, 1200)
     if callable(self.__pTest.nms.type):
         self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr)
     else:
         from operator_py.nms import py_nms_wrapper
         self.__nms = py_nms_wrapper(self.__pTest.nms.thr)
     arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix,
                                              self.__pTest.model.epoch)
     sym = self.__pModel.test_symbol
     from utils.graph_optimize import merge_bn
     sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params)
     self.__mod = DetModule(
         sym,
         data_names=['data', 'im_info', 'im_id', 'rec_id'],
         context=ctx)
     self.__mod.bind(data_shapes=[('data', (1, 3, self.resizeParam[0],
                                            self.resizeParam[1])),
                                  ('im_info', (1, 3)), ('im_id', (1, )),
                                  ('rec_id', (1, ))],
                     for_training=False)
     self.__mod.set_params(arg_params, aux_params, allow_extra=False)
     self.__saveSymbol(sym, outFolder,
                       self.__pTest.model.prefix.split('/')[-1])
     self.__threshold = threshold
     self.outFolder = outFolder
Exemple #2
0
    def __init__(self, config, batch_size, gpu_id, thresh):
        self.config = config
        self.batch_size = batch_size
        self.thresh = thresh

        # Parse the parameter file of model
        pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
        transform, data_name, label_name, metric_list = config.get_config(is_train=False)

        self.data_name = data_name
        self.label_name = label_name
        self.p_long, self.p_short = transform[1].p.long, transform[1].p.short

        # Define NMS type
        if callable(pTest.nms.type):
            self.do_nms = pTest.nms.type(pTest.nms.thr)
        else:
            from operator_py.nms import py_nms_wrapper

            self.do_nms = py_nms_wrapper(pTest.nms.thr)

        sym = pModel.test_symbol
        sym.save(pTest.model.prefix + "_test.json")

        ctx = mx.gpu(gpu_id)
        data_shape = [
            ('data', (batch_size, 3, 800, 1200)),
            ("im_info", (1, 3)),
            ("im_id", (1, )),
            ("rec_id", (1, )),
        ]

        # Load network
        arg_params, aux_params = load_checkpoint(pTest.model.prefix,
                                                 pTest.model.epoch)
        self.mod = DetModule(sym, data_names=data_name, context=ctx)
        self.mod.bind(data_shapes=data_shape, for_training=False)
        self.mod.set_params(arg_params, aux_params, allow_extra=False)
Exemple #3
0
def create_teacher_module(pTeacherModel, worker_data_shape, input_batch_size,
                          ctx, rank, logger):
    t_prefix = pTeacherModel.prefix
    t_epoch = pTeacherModel.epoch
    t_endpoint = pTeacherModel.endpoint
    t_data_name = pTeacherModel.data_name
    t_label_name = pTeacherModel.label_name
    if rank == 0:
        logger.info(
            'Building teacher module with endpoint: {}'.format(t_endpoint))
    t_sym = pTeacherModel.prefix + '-symbol.json'
    t_sym = mx.sym.load(t_sym)
    t_sym = mx.sym.Group([t_sym.get_internals()[out] for out in t_endpoint])
    t_worker_data_shape = {key: worker_data_shape[key] for key in t_data_name}
    _, t_out_shape, _ = t_sym.infer_shape(**t_worker_data_shape)
    t_terminal_out_shape_dict = zip(t_sym.list_outputs(), t_out_shape)
    t_data_shape = []
    for idx, data_name in enumerate(t_data_name):
        data_shape = t_worker_data_shape[data_name]
        data_shape = (input_batch_size, ) + data_shape[1:]
        t_data_shape.append((data_name, data_shape))
    t_label_shape = []
    for idx, label_name in enumerate(t_label_name):
        label_shape = t_out_shape[idx]
        label_shape = (input_batch_size, ) + label_shape[1:]
        t_label_shape.append((label_name, label_shape))
    if rank == 0:
        logger.info('Teacher data_name: {}'.format(t_data_name))
        logger.info('Teacher data_shape: {}'.format(t_data_shape))
        logger.info('Teacher label_name: {}'.format(t_label_name))
        logger.info('Teacher label_shape: {}'.format(t_label_shape))

    if rank == 0:
        logger.info('Teacher terminal output shape')
        logger.info(pprint.pformat([i for i in t_terminal_out_shape_dict]))
    t_arg_params, t_aux_params = load_checkpoint(t_prefix, t_epoch)
    t_mod = DetModule(t_sym,
                      data_names=t_data_name,
                      label_names=None,
                      logger=logger,
                      context=ctx)
    t_mod.bind(data_shapes=t_data_shape, for_training=False, grad_req='null')
    t_mod.set_params(t_arg_params, t_aux_params)
    if rank == 0:
        logger.info('Finish teacher module build')
    return t_mod, t_label_name, t_label_shape
Exemple #4
0
            terminal_out_shape_dict = zip(sym.list_outputs(), out_shape)
            print('parameter shape')
            print(
                pprint.pformat([
                    i for i in out_shape_dict if not i[0].endswith('output')
                ]))
            print('intermediate output shape')
            print(
                pprint.pformat(
                    [i for i in out_shape_dict if i[0].endswith('output')]))
            print('terminal output shape')
            print(pprint.pformat([i for i in terminal_out_shape_dict]))

            for i in pKv.gpus:
                ctx = mx.gpu(i)
                mod = DetModule(sym, data_names=data_names, context=ctx)
                mod.bind(data_shapes=loader.provide_data, for_training=False)
                mod.set_params(arg_params, aux_params, allow_extra=False)
                execs.append(mod)

        all_outputs = []

        if index_split == 0:

            def eval_worker(exe, data_queue, result_queue):
                while True:
                    batch = data_queue.get()
                    exe.forward(batch, is_train=False)
                    out = [x.asnumpy() for x in exe.get_outputs()]
                    result_queue.put(out)
def train_net(config):
    pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
    transform, data_name, label_name, metric_list = config.get_config(is_train=True)
    pGen = patch_config_as_nothrow(pGen)
    pKv = patch_config_as_nothrow(pKv)
    pRpn = patch_config_as_nothrow(pRpn)
    pRoi = patch_config_as_nothrow(pRoi)
    pBbox = patch_config_as_nothrow(pBbox)
    pDataset = patch_config_as_nothrow(pDataset)
    pModel = patch_config_as_nothrow(pModel)
    pOpt = patch_config_as_nothrow(pOpt)
    pTest = patch_config_as_nothrow(pTest)

    ctx = [mx.gpu(int(i)) for i in pKv.gpus]
    pretrain_prefix = pModel.pretrain.prefix
    pretrain_epoch = pModel.pretrain.epoch
    prefix = pGen.name
    save_path = os.path.join("experiments", prefix)
    begin_epoch = pOpt.schedule.begin_epoch
    end_epoch = pOpt.schedule.end_epoch
    lr_iter = pOpt.schedule.lr_iter

    # only rank==0 print all debug infos
    kvstore_type = "dist_sync" if os.environ.get(
        "DMLC_ROLE") == "worker" else pKv.kvstore
    kv = mx.kvstore.create(kvstore_type)
    rank = kv.rank

    # for distributed training using shared file system
    os.makedirs(save_path, exist_ok=True)

    from utils.logger import config_logger
    config_logger(os.path.join(save_path, "log.txt"))

    model_prefix = os.path.join(save_path, "checkpoint")

    # set up logger
    logger = logging.getLogger()

    sym = pModel.train_symbol

    # setup multi-gpu
    input_batch_size = pKv.batch_image * len(ctx)

    # print config
    # if rank == 0:
    #     logger.info(pprint.pformat(config))

    # load dataset and prepare imdb for training
    image_sets = pDataset.image_set
    roidbs = [
        pkl.load(open("data/cache/{}.roidb".format(i), "rb"),
                 encoding="latin1") for i in image_sets
    ]
    roidb = reduce(lambda x, y: x + y, roidbs)
    # filter empty image
    roidb = [rec for rec in roidb if rec["gt_bbox"].shape[0] > 0]
    # add flip roi record
    flipped_roidb = []
    for rec in roidb:
        new_rec = rec.copy()
        new_rec["flipped"] = True
        flipped_roidb.append(new_rec)
    roidb = roidb + flipped_roidb

    from core.detection_input import AnchorLoader
    train_data = AnchorLoader(roidb=roidb,
                              transform=transform,
                              data_name=data_name,
                              label_name=label_name,
                              batch_size=input_batch_size,
                              shuffle=True,
                              kv=kv,
                              num_worker=pGen.loader_worker or 12,
                              num_collector=pGen.loader_collector or 1,
                              worker_queue_depth=2,
                              collector_queue_depth=2)

    # infer shape
    worker_data_shape = dict(train_data.provide_data +
                             train_data.provide_label)
    for key in worker_data_shape:
        worker_data_shape[key] = (
            pKv.batch_image, ) + worker_data_shape[key][1:]
    arg_shape, _, aux_shape = sym.infer_shape(**worker_data_shape)

    _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape)
    out_shape_dict = list(zip(sym.get_internals().list_outputs(), out_shape))

    _, out_shape, _ = sym.infer_shape(**worker_data_shape)
    terminal_out_shape_dict = zip(sym.list_outputs(), out_shape)

    if rank == 0:
        logger.info('parameter shape')
        logger.info(
            pprint.pformat(
                [i for i in out_shape_dict if not i[0].endswith('output')]))

        logger.info('intermediate output shape')
        logger.info(
            pprint.pformat(
                [i for i in out_shape_dict if i[0].endswith('output')]))

        logger.info('terminal output shape')
        logger.info(pprint.pformat([i for i in terminal_out_shape_dict]))

    # memonger
    if pModel.memonger:
        last_block = pModel.memonger_until or ""
        if rank == 0:
            logger.info("do memonger up to {}".format(last_block))

        type_dict = {k: np.float32 for k in worker_data_shape}
        sym = search_plan_to_layer(sym,
                                   last_block,
                                   1000,
                                   type_dict=type_dict,
                                   **worker_data_shape)

    # load and initialize params
    if pOpt.schedule.begin_epoch != 0:
        arg_params, aux_params = load_checkpoint(model_prefix, begin_epoch)
    elif pModel.from_scratch:
        arg_params, aux_params = dict(), dict()
    else:
        arg_params, aux_params = load_checkpoint(pretrain_prefix,
                                                 pretrain_epoch)

    if pModel.process_weight is not None:
        pModel.process_weight(sym, arg_params, aux_params)
    '''
    there are some conflicts between `mergebn` and `attach_quantized_node` in graph_optimize.py 
    when mergebn ahead of attach_quantized_node
    such as `Symbol.ComposeKeyword`
    '''
    if pModel.QuantizeTrainingParam is not None and pModel.QuantizeTrainingParam.quantize_flag:
        pQuant = pModel.QuantizeTrainingParam
        assert pGen.fp16 == False, "current quantize training only support fp32 mode."
        from utils.graph_optimize import attach_quantize_node
        _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape)
        out_shape_dictoinary = dict(
            zip(sym.get_internals().list_outputs(), out_shape))
        sym = attach_quantize_node(sym, out_shape_dictoinary,
                                   pQuant.WeightQuantizeParam,
                                   pQuant.ActQuantizeParam,
                                   pQuant.quantized_op)
    # merge batch normalization to save memory in fix bn training
    from utils.graph_optimize import merge_bn
    sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params)

    if pModel.random:
        import time
        mx.random.seed(int(time.time()))
        np.random.seed(int(time.time()))

    init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2)
    init.set_verbosity(verbose=True)

    # create solver
    fixed_param = pModel.pretrain.fixed_param
    excluded_param = pModel.pretrain.excluded_param
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]

    if pModel.teacher_param:
        from models.KD.utils import create_teacher_module
        from models.KD.detection_module import KDDetModule
        t_mod, t_label_name, t_label_shape = create_teacher_module(
            pModel.teacher_param, worker_data_shape, input_batch_size, ctx,
            rank, logger)
        mod = KDDetModule(sym,
                          teacher_module=t_mod,
                          teacher_label_names=t_label_name,
                          teacher_label_shapes=t_label_shape,
                          data_names=data_names,
                          label_names=label_names,
                          logger=logger,
                          context=ctx,
                          fixed_param=fixed_param,
                          excluded_param=excluded_param)
    else:
        mod = DetModule(sym,
                        data_names=data_names,
                        label_names=label_names,
                        logger=logger,
                        context=ctx,
                        fixed_param=fixed_param,
                        excluded_param=excluded_param)

    eval_metrics = mx.metric.CompositeEvalMetric(metric_list)

    # callback
    batch_end_callback = [
        callback.Speedometer(train_data.batch_size,
                             frequent=pGen.log_frequency)
    ]
    batch_end_callback += pModel.batch_end_callbacks or []
    epoch_end_callback = callback.do_checkpoint(model_prefix)
    sym.save(model_prefix + ".json")

    # decide learning rate
    lr_mode = pOpt.optimizer.lr_mode or 'step'
    base_lr = pOpt.optimizer.lr * kv.num_workers
    lr_factor = pOpt.schedule.lr_factor or 0.1

    iter_per_epoch = len(train_data) // input_batch_size
    total_iter = iter_per_epoch * (end_epoch - begin_epoch)
    lr_iter = [total_iter + it if it < 0 else it for it in lr_iter]
    lr_iter = [it // kv.num_workers for it in lr_iter]
    lr_iter = [it - iter_per_epoch * begin_epoch for it in lr_iter]
    lr_iter_discount = [it for it in lr_iter if it > 0]
    current_lr = base_lr * (lr_factor**(len(lr_iter) - len(lr_iter_discount)))
    if rank == 0:
        logging.info('total iter {}'.format(total_iter))
        logging.info('lr {}, lr_iters {}'.format(current_lr, lr_iter_discount))
        logging.info('lr mode: {}'.format(lr_mode))

    if pOpt.warmup and pOpt.schedule.begin_epoch == 0:
        if rank == 0:
            logging.info('warmup lr {}, warmup step {}'.format(
                pOpt.warmup.lr, pOpt.warmup.iter))
        if lr_mode == 'step':
            lr_scheduler = WarmupMultiFactorScheduler(
                step=lr_iter_discount,
                factor=lr_factor,
                warmup=True,
                warmup_type=pOpt.warmup.type,
                warmup_lr=pOpt.warmup.lr,
                warmup_step=pOpt.warmup.iter)
        elif lr_mode == 'cosine':
            warmup_lr_scheduler = AdvancedLRScheduler(mode='linear',
                                                      base_lr=pOpt.warmup.lr,
                                                      target_lr=base_lr,
                                                      niters=pOpt.warmup.iter)
            cosine_lr_scheduler = AdvancedLRScheduler(
                mode='cosine',
                base_lr=base_lr,
                target_lr=0,
                niters=(iter_per_epoch *
                        (end_epoch - begin_epoch)) - pOpt.warmup.iter)
            lr_scheduler = LRSequential(
                [warmup_lr_scheduler, cosine_lr_scheduler])
        else:
            raise NotImplementedError
    else:
        if lr_mode == 'step':
            lr_scheduler = WarmupMultiFactorScheduler(step=lr_iter_discount,
                                                      factor=lr_factor)
        elif lr_mode == 'cosine':
            lr_scheduler = AdvancedLRScheduler(mode='cosine',
                                               base_lr=base_lr,
                                               target_lr=0,
                                               niters=iter_per_epoch *
                                               (end_epoch - begin_epoch))
        else:
            lr_scheduler = None

    # optimizer
    optimizer_params = dict(momentum=pOpt.optimizer.momentum,
                            wd=pOpt.optimizer.wd,
                            learning_rate=current_lr,
                            lr_scheduler=lr_scheduler,
                            rescale_grad=1.0 / (len(ctx) * kv.num_workers),
                            clip_gradient=pOpt.optimizer.clip_gradient)

    if pKv.fp16:
        optimizer_params['multi_precision'] = True
        optimizer_params['rescale_grad'] /= 128.0

    profile = pGen.profile or False
    if profile:
        mx.profiler.set_config(profile_all=True,
                               filename=os.path.join(save_path,
                                                     "profile.json"))

    # train
    mod.fit(train_data=train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=kv,
            optimizer=pOpt.optimizer.type,
            optimizer_params=optimizer_params,
            initializer=init,
            allow_missing=True,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch,
            profile=profile)

    logging.info("Training has done")
    time.sleep(10)
    logging.info("Exiting")
Exemple #6
0
    if pModel.QuantizeTrainingParam is not None and pModel.QuantizeTrainingParam.quantize_flag:
        pQuant = pModel.QuantizeTrainingParam
        assert pGen.fp16 == False, "current quantize training only support fp32 mode."
        from utils.graph_optimize import attach_quantize_node
        worker_data_shape = dict([(name, tuple(shape)) for name, shape in data_shape])
        # print(worker_data_shape)
        # raise NotImplementedError
        _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape)
        out_shape_dictoinary = dict(zip(sym.get_internals().list_outputs(), out_shape))
        sym = attach_quantize_node(sym, out_shape_dictoinary, pQuant.WeightQuantizeParam,
                                   pQuant.ActQuantizeParam, pQuant.quantized_op)
    sym.save(pTest.model.prefix + "_infer_speed.json")


    ctx = mx.gpu(gpu)
    mod = DetModule(sym, data_names=data_names, context=ctx)
    mod.bind(data_shapes=data_shape, for_training=False)
    mod.set_params({}, {}, True)

    # let AUTOTUNE run for once
    mod.forward(data_batch, is_train=False)
    for output in mod.get_outputs():
        output.wait_to_read()

    tic = time.time()
    for _ in range(count):
        mod.forward(data_batch, is_train=False)
        for output in mod.get_outputs():
            output.wait_to_read()
    toc = time.time()
if __name__ == "__main__":
    # os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"

    args, config = parse_args()

    pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
    transform, data_name, label_name, metric_list = config.get_config(is_train=False)

    nms = py_nms_wrapper(pTest.nms.thr)
    sym = pModel.test_symbol
    pshort = 800
    plong = 2000

    arg_params, aux_params = load_checkpoint(pTest.model.prefix, args.epoch)
    mod = DetModule(sym,
                    data_names=["data", "im_info", "im_id", "rec_id"],
                    context=mx.gpu(args.gpu_id))
    provide_data = [("data", (1, 3, pshort, plong)), ("im_info", (1, 3)),
                    ("im_id", (1, )), ("rec_id", (1, ))]
    mod.bind(data_shapes=provide_data, for_training=False)
    mod.set_params(arg_params, aux_params, allow_extra=False)

    image_list = []
    if os.path.isfile(args.path):
        if ".txt" in args.path:
            list_file = open(args.path, 'r')
            list_lines = list_file.readlines()
            list_file.close()
            (fpath, fname) = os.path.split(args.path)
            for aline in list_lines:
                uints = aline.split(' ')
Exemple #8
0
class predictor(object):
    def __init__(self, config, batch_size, gpu_id, thresh):
        self.config = config
        self.batch_size = batch_size
        self.thresh = thresh

        # Parse the parameter file of model
        pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
        transform, data_name, label_name, metric_list = config.get_config(is_train=False)

        self.data_name = data_name
        self.label_name = label_name
        self.p_long, self.p_short = transform[1].p.long, transform[1].p.short

        # Define NMS type
        if callable(pTest.nms.type):
            self.do_nms = pTest.nms.type(pTest.nms.thr)
        else:
            from operator_py.nms import py_nms_wrapper

            self.do_nms = py_nms_wrapper(pTest.nms.thr)

        sym = pModel.test_symbol
        sym.save(pTest.model.prefix + "_test.json")

        ctx = mx.gpu(gpu_id)
        data_shape = [
            ('data', (batch_size, 3, 800, 1200)),
            ("im_info", (1, 3)),
            ("im_id", (1, )),
            ("rec_id", (1, )),
        ]

        # Load network
        arg_params, aux_params = load_checkpoint(pTest.model.prefix,
                                                 pTest.model.epoch)
        self.mod = DetModule(sym, data_names=data_name, context=ctx)
        self.mod.bind(data_shapes=data_shape, for_training=False)
        self.mod.set_params(arg_params, aux_params, allow_extra=False)

    def preprocess_image(self, input_img):
        image = input_img[:, :, ::-1]  # BGR -> RGB

        short = min(image.shape[:2])
        long = max(image.shape[:2])
        scale = min(self.p_short / short, self.p_long / long)

        h, w = image.shape[:2]
        im_info = (round(h * scale), round(w * scale), scale)

        image = cv2.resize(image,
                           None,
                           None,
                           scale,
                           scale,
                           interpolation=cv2.INTER_LINEAR)
        image = image.transpose((2, 0, 1))  # HWC -> CHW

        return image, im_info

    def run_image(self, img_path):
        image = cv2.imread(img_path, cv2.IMREAD_COLOR)
        image, im_info = self.preprocess_image(image)
        input_data = {
            'data': [image],
            'im_info': [im_info],
            'im_id': [0],
            'rec_id': [0],
        }

        data = [mx.nd.array(input_data[name]) for name in self.data_name]
        label = []
        provide_data = [(k, v.shape) for k, v in zip(self.data_name, data)]
        provide_label = [(k, v.shape) for k, v in zip(self.label_name, label)]

        data_batch = mx.io.DataBatch(data=data,
                                     label=label,
                                     provide_data=provide_data,
                                     provide_label=provide_label)

        self.mod.forward(data_batch, is_train=False)
        out = [x.asnumpy() for x in self.mod.get_outputs()]

        cls_score = out[3]
        bboxes = out[4]

        result = {}
        for cid in range(cls_score.shape[1]):
            if cid == 0:  # Ignore the background
                continue
            score = cls_score[:, cid]
            if bboxes.shape[1] != 4:
                cls_box = bboxes[:, cid * 4:(cid + 1) * 4]
            else:
                cls_box = bboxes
            valid_inds = np.where(score >= self.thresh)[0]
            box = cls_box[valid_inds]
            score = score[valid_inds]
            det = np.concatenate((box, score.reshape(-1, 1)),
                                 axis=1).astype(np.float32)
            det = self.do_nms(det)
            if len(det) > 0:
                det[:, :4] = det[:, :4] / im_info[
                    2]  # Restore to the original size
                result[CATEGORIES[cid]] = det

        return result
Exemple #9
0
class TDNDetector:
    def __init__(self, configFn, ctx, outFolder, threshold):
        os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
        config = importlib.import_module(configFn.replace('.py', '').replace('/', '.'))
        _,_,_,_,_,_, self.__pModel,_, self.__pTest, self.transform,_,_,_ = config.get_config(is_train=False)
        self.__pModel = patch_config_as_nothrow(self.__pModel)
        self.__pTest = patch_config_as_nothrow(self.__pTest)
        self.resizeParam = (800, 1200)
        if callable(self.__pTest.nms.type):
            self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr)
        else:
            from operator_py.nms import py_nms_wrapper
            self.__nms = py_nms_wrapper(self.__pTest.nms.thr)
        arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix, self.__pTest.model.epoch)
        sym = self.__pModel.test_symbol
        from utils.graph_optimize import merge_bn
        sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params)
        self.__mod = DetModule(sym, data_names=['data','im_info','im_id','rec_id'], context=ctx)
        self.__mod.bind(data_shapes=[('data', (1, 3, self.resizeParam[0], self.resizeParam[1])), 
                                     ('im_info', (1, 3)),
                                     ('im_id', (1,)),
                                     ('rec_id', (1,))], for_training=False)
        self.__mod.set_params(arg_params, aux_params, allow_extra=False)
        self.__saveSymbol(sym, outFolder, self.__pTest.model.prefix.split('/')[-1])
        self.__threshold = threshold

    def __call__(self, imgFilename): # detect onto image
        roi_record, scale = self.__readImg(imgFilename)
        h, w = roi_record['data'][0].shape

        im_c1 = roi_record['data'][0].reshape(1,1,h,w)
        im_c2 = roi_record['data'][1].reshape(1,1,h,w)
        im_c3 = roi_record['data'][2].reshape(1,1,h,w)
        im_data = np.concatenate((im_c1, im_c2, im_c3), axis=1)

        im_info, im_id, rec_id = [(h, w, scale)], [1], [1] 
        data = mx.io.DataBatch(data=[mx.nd.array(im_data),
                                     mx.nd.array(im_info),
                                     mx.nd.array(im_id),
                                     mx.nd.array(rec_id)])
        self.__mod.forward(data, is_train=False)
        # extract results
        outputs = self.__mod.get_outputs(merge_multi_context=False)
        rid, id, info, cls, box = [x[0].asnumpy() for x in outputs]
        rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze(), cls.squeeze(), box.squeeze()
        cls = cls[:, 1:]   # remove background
        box = box / scale
        output_record = dict(rec_id=rid, im_id=id, im_info=info, bbox_xyxy=box, cls_score=cls)
        output_record = self.__pTest.process_output([output_record], None)[0]
        final_result  = self.__do_nms(output_record)
        # obtain representable output
        detections = []
        for cid ,bbox in final_result.items():
            idx = np.where(bbox[:,-1] > self.__threshold)[0] 
            for i in idx:
                final_box = bbox[i][:4]
                score = bbox[i][-1]
                detections.append({'cls':cid, 'box':final_box, 'score':score})
        return detections,None

    def __do_nms(self, all_output):
        box   = all_output['bbox_xyxy']
        score = all_output['cls_score']
        final_dets = {}
        for cid in range(score.shape[1]):

            score_cls = score[:, cid]
            valid_inds = np.where(score_cls > self.__threshold)[0]
            box_cls = box[valid_inds]
            score_cls = score_cls[valid_inds]
            if valid_inds.shape[0]==0:
                continue
            det = np.concatenate((box_cls, score_cls.reshape(-1, 1)), axis=1).astype(np.float32)
            det = self.__nms(det)
            cls = coco[cid]
            final_dets[cls] = det
        return final_dets

    def __readImg(self, imgFilename):
        img = cv2.imread(imgFilename, cv2.IMREAD_COLOR)
        height, width, channels = img.shape
        roi_record = {'gt_bbox': np.array([[0., 0., 0., 0.]]),'gt_class': np.array([0])}
        roi_record['image_url'] = imgFilename
        roi_record['h'] = height
        roi_record['w'] = width
 
        for trans in self.transform:
            trans.apply(roi_record)
        img_shape = [roi_record['h'], roi_record['w']]
        shorts, longs = min(img_shape), max(img_shape)
        scale = min(self.resizeParam[0] / shorts, self.resizeParam[1] / longs)

        return roi_record, scale

    def __saveSymbol(self, sym, outFolder, fnPrefix):
        if not os.path.exists(outFolder): os.makedirs(outFolder)
        resFilename = os.path.join(outFolder, fnPrefix + "_symbol_test.json")
        sym.save(resFilename)
Exemple #10
0
def train_net(config):
    pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
    transform, data_name, label_name, metric_list = config.get_config(is_train=True)

    ctx = [mx.gpu(int(i)) for i in pKv.gpus]
    pretrain_prefix = pModel.pretrain.prefix
    pretrain_epoch = pModel.pretrain.epoch
    prefix = pGen.name
    save_path = os.path.join("experiments", prefix)
    begin_epoch = pOpt.schedule.begin_epoch
    end_epoch = pOpt.schedule.end_epoch
    lr_iter = pOpt.schedule.lr_iter

    # only rank==0 print all debug infos
    kvstore_type = "dist_sync" if os.environ.get(
        "DMLC_ROLE") == "worker" else pKv.kvstore
    kv = mx.kvstore.create(kvstore_type)
    rank = kv.rank

    # for distributed training using shared file system
    if rank == 0:
        if not os.path.exists(save_path):
            os.makedirs(save_path)

    from utils.logger import config_logger
    config_logger(os.path.join(save_path, "log.txt"))

    model_prefix = os.path.join(save_path, "checkpoint")

    # set up logger
    logger = logging.getLogger()

    sym = pModel.train_symbol

    # setup multi-gpu
    input_batch_size = pKv.batch_image * len(ctx)

    # print config
    # if rank == 0:
    #     logger.info(pprint.pformat(config))

    # load dataset and prepare imdb for training
    image_sets = pDataset.image_set
    roidbs = [
        pkl.load(open("data/cache/{}.roidb".format(i), "rb"),
                 encoding="latin1") for i in image_sets
    ]
    roidb = reduce(lambda x, y: x + y, roidbs)
    # filter empty image
    roidb = [rec for rec in roidb if rec["gt_bbox"].shape[0] > 0]
    # add flip roi record
    flipped_roidb = []
    for rec in roidb:
        new_rec = rec.copy()
        new_rec["flipped"] = True
        flipped_roidb.append(new_rec)
    roidb = roidb + flipped_roidb

    from core.detection_input import AnchorLoader
    train_data = AnchorLoader(roidb=roidb,
                              transform=transform,
                              data_name=data_name,
                              label_name=label_name,
                              batch_size=input_batch_size,
                              shuffle=True,
                              kv=kv)

    # infer shape
    worker_data_shape = dict(train_data.provide_data +
                             train_data.provide_label)
    for key in worker_data_shape:
        worker_data_shape[key] = (
            pKv.batch_image, ) + worker_data_shape[key][1:]
    arg_shape, _, aux_shape = sym.infer_shape(**worker_data_shape)

    _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape)
    out_shape_dict = list(zip(sym.get_internals().list_outputs(), out_shape))

    _, out_shape, _ = sym.infer_shape(**worker_data_shape)
    terminal_out_shape_dict = zip(sym.list_outputs(), out_shape)

    if rank == 0:
        logger.info('parameter shape')
        logger.info(
            pprint.pformat(
                [i for i in out_shape_dict if not i[0].endswith('output')]))

        logger.info('intermediate output shape')
        logger.info(
            pprint.pformat(
                [i for i in out_shape_dict if i[0].endswith('output')]))

        logger.info('terminal output shape')
        logger.info(pprint.pformat([i for i in terminal_out_shape_dict]))

    # memonger
    if pModel.memonger:
        last_block = pModel.memonger_until or ""
        if rank == 0:
            logger.info("do memonger up to {}".format(last_block))

        type_dict = {k: np.float32 for k in worker_data_shape}
        sym = search_plan_to_layer(sym,
                                   last_block,
                                   1000,
                                   type_dict=type_dict,
                                   **worker_data_shape)

    # load and initialize params
    if pOpt.schedule.begin_epoch != 0:
        arg_params, aux_params = load_checkpoint(model_prefix, begin_epoch)
    elif pModel.from_scratch:
        arg_params, aux_params = dict(), dict()
    else:
        arg_params, aux_params = load_checkpoint(pretrain_prefix,
                                                 pretrain_epoch)

    try:
        pModel.process_weight(sym, arg_params, aux_params)
    except AttributeError:
        pass

    if pModel.random:
        import time
        mx.random.seed(int(time.time()))
        np.random.seed(int(time.time()))

    init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2)
    init.set_verbosity(verbose=True)

    # create solver
    fixed_param_prefix = pModel.pretrain.fixed_param
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]

    mod = DetModule(sym,
                    data_names=data_names,
                    label_names=label_names,
                    logger=logger,
                    context=ctx,
                    fixed_param_prefix=fixed_param_prefix)

    eval_metrics = mx.metric.CompositeEvalMetric(metric_list)

    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=pGen.log_frequency)
    epoch_end_callback = callback.do_checkpoint(model_prefix)
    sym.save(model_prefix + ".json")

    # decide learning rate
    base_lr = pOpt.optimizer.lr * kv.num_workers
    lr_factor = 0.1

    iter_per_epoch = len(train_data) // input_batch_size
    lr_iter = [it // kv.num_workers for it in lr_iter]
    lr_iter = [it - iter_per_epoch * begin_epoch for it in lr_iter]
    lr_iter_discount = [it for it in lr_iter if it > 0]
    current_lr = base_lr * (lr_factor**(len(lr_iter) - len(lr_iter_discount)))
    if rank == 0:
        logging.info('total iter {}'.format(iter_per_epoch *
                                            (end_epoch - begin_epoch)))
        logging.info('lr {}, lr_iters {}'.format(current_lr, lr_iter_discount))
    if pOpt.warmup is not None and pOpt.schedule.begin_epoch == 0:
        if rank == 0:
            logging.info('warmup lr {}, warmup step {}'.format(
                pOpt.warmup.lr, pOpt.warmup.iter))

        lr_scheduler = WarmupMultiFactorScheduler(step=lr_iter_discount,
                                                  factor=lr_factor,
                                                  warmup=True,
                                                  warmup_type=pOpt.warmup.type,
                                                  warmup_lr=pOpt.warmup.lr,
                                                  warmup_step=pOpt.warmup.iter)
    else:
        if len(lr_iter_discount) > 0:
            lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(
                lr_iter_discount, lr_factor)
        else:
            lr_scheduler = None

    # optimizer
    optimizer_params = dict(momentum=pOpt.optimizer.momentum,
                            wd=pOpt.optimizer.wd,
                            learning_rate=current_lr,
                            lr_scheduler=lr_scheduler,
                            rescale_grad=1.0 /
                            (len(pKv.gpus) * kv.num_workers),
                            clip_gradient=pOpt.optimizer.clip_gradient)

    if pKv.fp16:
        optimizer_params['multi_precision'] = True
        optimizer_params['rescale_grad'] /= 128.0

    # train
    mod.fit(train_data=train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=kv,
            optimizer=pOpt.optimizer.type,
            optimizer_params=optimizer_params,
            initializer=init,
            allow_missing=True,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)

    logging.info("Training has done")
Exemple #11
0
class TDNDetector:
    def __init__(self, configFn, ctx, outFolder, threshold):
        os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
        config = importlib.import_module(
            configFn.replace('.py', '').replace('/', '.'))
        _, _, _, _, _, _, self.__pModel, _, self.__pTest, self.transform, _, _, _ = config.get_config(
            is_train=False)
        self.__pModel = patch_config_as_nothrow(self.__pModel)
        self.__pTest = patch_config_as_nothrow(self.__pTest)
        self.resizeParam = (800, 1200)
        if callable(self.__pTest.nms.type):
            self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr)
        else:
            from operator_py.nms import py_nms_wrapper
            self.__nms = py_nms_wrapper(self.__pTest.nms.thr)
        arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix,
                                                 self.__pTest.model.epoch)
        sym = self.__pModel.test_symbol
        from utils.graph_optimize import merge_bn
        sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params)
        self.__mod = DetModule(
            sym,
            data_names=['data', 'im_info', 'im_id', 'rec_id'],
            context=ctx)
        self.__mod.bind(data_shapes=[('data', (1, 3, self.resizeParam[0],
                                               self.resizeParam[1])),
                                     ('im_info', (1, 3)), ('im_id', (1, )),
                                     ('rec_id', (1, ))],
                        for_training=False)
        self.__mod.set_params(arg_params, aux_params, allow_extra=False)
        self.__saveSymbol(sym, outFolder,
                          self.__pTest.model.prefix.split('/')[-1])
        self.__threshold = threshold
        self.outFolder = outFolder

    def __call__(self, imgFilename):  # detect onto image
        roi_record, scale, img = self.__readImg(imgFilename)
        h, w = roi_record['data'][0].shape

        im_c1 = roi_record['data'][0].reshape(1, 1, h, w)
        im_c2 = roi_record['data'][1].reshape(1, 1, h, w)
        im_c3 = roi_record['data'][2].reshape(1, 1, h, w)
        im_data = np.concatenate((im_c1, im_c2, im_c3), axis=1)

        im_info, im_id, rec_id = [(h, w, scale)], [1], [1]
        data = mx.io.DataBatch(data=[
            mx.nd.array(im_data),
            mx.nd.array(im_info),
            mx.nd.array(im_id),
            mx.nd.array(rec_id)
        ])
        self.__mod.forward(data, is_train=False)
        # extract results
        outputs = self.__mod.get_outputs(merge_multi_context=False)
        rid, id, info, cls, box = [x[0].asnumpy() for x in outputs]
        rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze(
        ), cls.squeeze(), box.squeeze()
        cls = cls[:, 1:]  # remove background
        box = box / scale
        output_record = dict(rec_id=rid,
                             im_id=id,
                             im_info=info,
                             bbox_xyxy=box,
                             cls_score=cls)
        output_record = self.__pTest.process_output([output_record], None)[0]
        final_result = self.__do_nms(output_record)
        # obtain representable output
        detections = []
        for cid, bbox in final_result.items():
            idx = np.where(bbox[:, -1] > self.__threshold)[0]
            for i in idx:
                final_box = bbox[i][:4]
                score = bbox[i][-1]
                detections.append({
                    'cls': cid,
                    'box': final_box,
                    'score': score
                })
        img_vis = self.__vis_detections(detections, img)
        cv2.imwrite(os.path.join(self.outFolder, imgFilename), img_vis)
        #print(os.path.join(self.outFolder,imgFilename))
        return detections, None

    def __vis_detections(self, dets, img):
        font = cv2.FONT_HERSHEY_SIMPLEX
        for d in dets:
            box = d['box']
            clsID = d['cls']
            score = d['score']
            img = cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                (255, 0, 0), 4)
            img = cv2.putText(img,
                              str(clsID) + ': ' + str(round(score, 2)),
                              (box[0], box[1]), font, 1, (255, 0, 0), 2,
                              cv2.LINE_AA)
        return img

    def __do_nms(self, all_output):
        box = all_output['bbox_xyxy']
        score = all_output['cls_score']
        final_dets = {}
        for cid in range(score.shape[1]):
            score_cls = score[:, cid]
            valid_inds = np.where(score_cls > self.__threshold)[0]
            box_cls = box[valid_inds]
            score_cls = score_cls[valid_inds]
            if valid_inds.shape[0] == 0:
                continue
            det = np.concatenate((box_cls, score_cls.reshape(-1, 1)),
                                 axis=1).astype(np.float32)
            det = self.__nms(det)
            #cls = coco[cid]
            final_dets[cid] = det
        return final_dets

    def __readImg(self, imgFilename):
        img = cv2.imread(imgFilename, cv2.IMREAD_COLOR)
        height, width, channels = img.shape
        roi_record = {
            'gt_bbox': np.array([[0., 0., 0., 0.]]),
            'gt_class': np.array([0])
        }
        roi_record['image_url'] = imgFilename
        roi_record['h'] = height
        roi_record['w'] = width

        for trans in self.transform:
            trans.apply(roi_record)
        img_shape = [roi_record['h'], roi_record['w']]
        shorts, longs = min(img_shape), max(img_shape)
        scale = min(self.resizeParam[0] / shorts, self.resizeParam[1] / longs)

        return roi_record, scale, img

    def __saveSymbol(self, sym, outFolder, fnPrefix):
        if not os.path.exists(outFolder): os.makedirs(outFolder)
        resFilename = os.path.join(outFolder, fnPrefix + "_symbol_test.json")
        sym.save(resFilename)


#import mxnet as mx
#import argparse
#from infer import TDNDetector

#def parse_args():
#    parser = argparse.ArgumentParser(description='Test Detection')
#    parser.add_argument('--config', type=str, default='config/faster_r101v2c4_c5_256roi_1x.py', help='config file path')
#    parser.add_argument('--ctx',    type=int, default=0,     help='GPU index. Set negative value to use CPU')
#    #parser.add_argument('--inputs', type=str, nargs='+', required=True, default='', help='File(-s) to test')
#    parser.add_argument('--output', type=str, default='results', help='Where to store results')
#    parser.add_argument('--threshold', type=float, default=0.5,  help='Detector threshold')
#    return parser.parse_args()

#if __name__ == "__main__":
#    args = parse_args()
#    ctx = mx.gpu(args.ctx) if args.ctx>=0 else args.cpu()
#    #imgFilenames = args.inputs
#    imgFilenames = ['car.jpg', 'COCO_val2014_000000581929.jpg']
#    detector = TDNDetector(args.config, ctx, args.output, args.threshold)
#    for i, imgFilename in enumerate(imgFilenames):
#            print(imgFilename)
#            dets,_= detector(imgFilename)
#            print(dets)