def main(): update_config("configs/coco/resnet_v1_101_coco_trainval_fpn_dcn_end2end_ohem.yaml") log_init(filename=config.TRAIN.model_prefix + "train.log") msg = pprint.pformat(config) logging.info(msg) os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" os.environ["MXNET_GPU_MEM_POOL_TYPE"] = "Round" ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')] ctx = ctx * config.network.IM_PER_GPU train_net(ctx, config.TRAIN.begin_epoch, config.TRAIN.lr, config.TRAIN.lr_step)
def main(): update_config( "configs/voc/resnet_v1_50_voc0712_rfcn_dcn_end2end_ohem_one_gpu.yaml") log_init(filename=config.TRAIN.model_prefix + "train.log") msg = pprint.pformat(config) logging.info(msg) os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" os.environ["MXNET_GPU_MEM_POOL_TYPE"] = "Round" ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')] train_net(ctx, config.network.pretrained, config.network.pretrained_epoch, config.TRAIN.model_prefix, config.TRAIN.begin_epoch, config.TRAIN.end_epoch, config.TRAIN.lr, config.TRAIN.lr_step)
def main(): os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" os.environ["MXNET_GPU_MEM_POOL_TYPE"] = "Round" args = parse_args() setattr(mobula.config, "NVCC", args.nvcc) config = easydict.EasyDict() config.gpus = [int(x) for x in str(args.gpus).split(',')] config.dataset = easydict.EasyDict() config.dataset.NUM_CLASSES = args.num_classes config.dataset.dataset_type = args.dataset_type config.dataset.dataset_path = args.dataset_root config.retinanet = easydict.EasyDict() config.retinanet.network = easydict.EasyDict() config.retinanet.network.FPN_STRIDES = [8, 16, 32, 64, 128] config.retinanet.network.BASE_SIZES = [(32, 32), (64, 64), (128, 128), (256, 256), (512, 512)] config.retinanet.network.SCALES = [2**0, 2**(1 / 2), 2**(2 / 3)] config.retinanet.network.RATIOS = [1 / 2, 1, 2] config.retinanet.network.bbox_norm_coef = [0.1, 0.1, 0.2, 0.2] config.TRAIN = easydict.EasyDict() config.TRAIN.batch_size = 2 * len(config.gpus) config.TRAIN.lr = 0.01 * config.TRAIN.batch_size / 16 config.TRAIN.warmup_lr = config.TRAIN.lr config.TRAIN.warmup_step = 1000 config.TRAIN.wd = 1e-4 config.TRAIN.momentum = .9 config.TRAIN.log_path = "output/{}/RetinaNet-hflip".format( config.dataset.dataset_type, config.TRAIN.lr) config.TRAIN.log_interval = 100 config.TRAIN.cls_focal_loss_alpha = .25 config.TRAIN.cls_focal_loss_gamma = 2 config.TRAIN.image_short_size = 600 config.TRAIN.image_max_long_size = 1333 config.TRAIN.aspect_grouping = True # if aspect_grouping is set to False, all images will be pad to (PAD_H, PAD_W) config.TRAIN.PAD_H = 768 config.TRAIN.PAD_W = 768 config.TRAIN.begin_epoch = 0 config.TRAIN.end_epoch = 28 config.TRAIN.lr_step = [6, 8] config.TRAIN.FLIP = True config.TRAIN.resume = None config.TRAIN.trainer_resume = None config.network = easydict.EasyDict() config.network.FIXED_PARAMS = [] config.network.sync_bn = True config.network.use_global_stats = False if config.network.sync_bn else True config.network.merge_backbone_bn = False config.val = easydict.EasyDict() if args.demo: config.val.params_file = args.demo_params config.val.viz = args.viz demo_net(config) else: os.makedirs(config.TRAIN.log_path, exist_ok=True) log_init(filename=os.path.join(config.TRAIN.log_path, "train_{}.log".format(time.time()))) msg = pprint.pformat(config) logging.info(msg) train_net(config)
def main(): os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" load_mobula_ops() args = parse_args() setattr(mobula.config, "NVCC", args.nvcc) setattr(mobula.config, "SHOW_BUILDING_COMMAND", True) config = easydict.EasyDict() config.gpus = [int(x) for x in str(args.gpus).split(',')] config.dataset = easydict.EasyDict() config.dataset.NUM_CLASSES = args.num_classes config.dataset.dataset_type = args.dataset_type config.dataset.dataset_path = args.dataset_root config.dataset.max_bbox_number = 200 config.retinanet = easydict.EasyDict() config.retinanet.network = easydict.EasyDict() config.retinanet.network.FPN_STRIDES = [8, 16, 32, 64, 128] config.retinanet.network.BASE_SIZES = [(32, 32), (64, 64), (128, 128), (256, 256), (512, 512)] config.retinanet.network.SCALES = [2**0, 2**(1 / 3), 2**(2 / 3)] config.retinanet.network.RATIOS = [1 / 2, 1, 2] config.retinanet.network.bbox_norm_coef = [1, 1, 1, 1] config.TRAIN = easydict.EasyDict() config.TRAIN.batch_size = args.im_per_gpu * len(config.gpus) config.TRAIN.lr = 0.01 * config.TRAIN.batch_size / 16 config.TRAIN.warmup_lr = config.TRAIN.lr * 1 / 3 config.TRAIN.warmup_step = int(1000 * 16 / config.TRAIN.batch_size) config.TRAIN.wd = 1e-4 config.TRAIN.momentum = .9 config.TRAIN.log_interval = 100 config.TRAIN.cls_focal_loss_alpha = .25 config.TRAIN.cls_focal_loss_gamma = 2 config.TRAIN.image_short_size = 800 config.TRAIN.image_max_long_size = 1333 config.TRAIN.aspect_grouping = True config.TRAIN.negative_iou_threshold = .4 config.TRAIN.positive_iou_threshold = .5 # if aspect_grouping is set to False, all images will be pad to (PAD_H, PAD_W) config.TRAIN.PAD_H = 768 config.TRAIN.PAD_W = 768 config.TRAIN.begin_epoch = 0 config.TRAIN.end_epoch = 28 config.TRAIN.lr_step = [5] config.TRAIN.FLIP = True config.TRAIN.resume = None config.TRAIN.trainer_resume = None config.network = easydict.EasyDict() config.network.BACKBONE = easydict.EasyDict() config.network.BACKBONE.name = "resnetv1b" config.network.BACKBONE.kwargs = easydict.EasyDict() config.network.BACKBONE.kwargs.num_layers = 50 config.network.BACKBONE.kwargs.pretrained = True config.network.BACKBONE.kwargs.norm_kwargs = { "num_devices": len(config.gpus) } config.network.BACKBONE.kwargs.norm_layer = FrozenBatchNorm2d config.network.FIXED_PARAMS = [".*layers1.*", ".*resnetv1b_conv0.*"] config.network.fpn_neck_feature_dim = 256 config.TRAIN.log_path = "output/{}/RetinaNet-hflip-{}-{}".format( config.dataset.dataset_type, config.TRAIN.lr, config.TRAIN.image_short_size, config.TRAIN.image_max_long_size) config.val = easydict.EasyDict() if args.demo: config.val.params_file = args.demo_params config.val.viz = args.viz demo_net(config) else: os.makedirs(config.TRAIN.log_path, exist_ok=True) log_init(filename=os.path.join(config.TRAIN.log_path, "train_{}.log".format(time.time()))) msg = pprint.pformat(config) logging.info(msg) train_net(config)
def main(): os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" # os.environ["MXNET_GPU_MEM_POOL_TYPE"] = "Round" load_mobula_ops() args = parse_args() setattr(mobula.config, "NVCC", args.nvcc) setattr(mobula.config, "SHOW_BUILDING_COMMAND", True) config = easydict.EasyDict() config.gpus = [int(x) for x in str(args.gpus).split(',')] config.use_hvd = args.hvd if config.use_hvd: import horovod.mxnet as hvd hvd.init() config.gpus = [hvd.local_rank()] config.dataset = easydict.EasyDict() config.dataset.NUM_CLASSES = args.num_classes config.dataset.dataset_type = args.dataset_type config.dataset.dataset_path = args.dataset_root config.FCOS = easydict.EasyDict() config.FCOS.network = easydict.EasyDict() config.FCOS.network.FPN_SCALES = [8, 16, 32, 64, 128] config.FCOS.network.FPN_MINIMUM_DISTANCES = [0, 64, 128, 256, 512] config.FCOS.network.FPN_MAXIMUM_DISTANCES = [64, 128, 256, 512, 4096] config.TRAIN = easydict.EasyDict() config.TRAIN.batch_size = args.im_per_gpu * len(config.gpus) config.TRAIN.lr = 0.01 * config.TRAIN.batch_size / 16 config.TRAIN.warmup_lr = config.TRAIN.lr * 1 / 3 config.TRAIN.warmup_step = int(1000 * 16 / config.TRAIN.batch_size) config.TRAIN.wd = 1e-4 config.TRAIN.momentum = .9 config.TRAIN.log_interval = 100 config.TRAIN.cls_focal_loss_alpha = .25 config.TRAIN.cls_focal_loss_gamma = 2 config.TRAIN.image_short_size = 800 config.TRAIN.image_max_long_size = 1333 config.TRAIN.aspect_grouping = True # if aspect_grouping is set to False, all images will be pad to (PAD_H, PAD_W) config.TRAIN.PAD_H = 768 config.TRAIN.PAD_W = 768 config.TRAIN.begin_epoch = 0 config.TRAIN.end_epoch = 28 config.TRAIN.lr_step = [5] config.TRAIN.FLIP = True config.TRAIN.resume = None config.TRAIN.trainer_resume = None config.TRAIN.USE_FP16 = False if config.TRAIN.USE_FP16: os.environ["MXNET_SAFE_ACCUMULATION"] = "1" config.network = easydict.EasyDict() config.network.BACKBONE = easydict.EasyDict() config.network.BACKBONE.name = "resnetv1" config.network.BACKBONE.kwargs = easydict.EasyDict() config.network.BACKBONE.kwargs.pretrained = True config.network.FIXED_PARAMS = [".*stage1.*", ".*resnetv10_conv0.*"] config.network.use_global_stats = True config.network.sync_bn = False config.network.fpn_neck_feature_dim = 256 if config.TRAIN.USE_FP16: assert config.network.sync_bn is False, "Sync BatchNorm is not supported by amp." config.val = easydict.EasyDict() if os.path.exists(args.config): with open(args.config) as f: config_loaded = yaml.load(f, yaml.CLoader) def update_config(c, v): for k in v.keys(): if isinstance(v[k], dict): update_config(c[k], v[k]) else: c[k] = v[k] update_config(config, config_loaded) else: logging.info("Escape loading config since it does not exist.") config.TRAIN.log_path = "output/{}/{}-{}-{}-{}/reg_weighted_by_centerness_focal_alpha_gamma_lr_{}_{}_{}".format( "FCOS-{}-p5-{}".format(config.network.BACKBONE.name, args.extra_flag), "fp16" if config.TRAIN.USE_FP16 else "fp32", "sync_bn" if config.network.sync_bn else "normal_bn", "hvd" if config.use_hvd else "", config.dataset.dataset_type, config.TRAIN.lr, config.TRAIN.image_short_size, config.TRAIN.image_max_long_size) if args.demo: config.val.params_file = args.demo_params config.val.viz = args.viz demo_net(config) else: os.makedirs(config.TRAIN.log_path, exist_ok=True) log_init(filename=os.path.join(config.TRAIN.log_path, "train_{}.log".format(time.time()))) msg = pprint.pformat(config) logging.info(msg) train_net(config)