def get_module(ctx, sym, provide_data, provide_label, batch_size=None, is_train=True, use_memonger=False): if use_memonger: name, data_shapes = provide_data[0] sym = memonger.search_plan(sym, data=data_shapes) mod = mx.mod.Module(symbol=sym, data_names=[name for name, _ in provide_data], label_names=[name for name, _ in provide_label], context=ctx) if batch_size is not None: provide_data = [(name, (batch_size,) + shape[1:]) for name, shape in provide_data] provide_label = [(name, (batch_size,) + shape[1:]) for name, shape in provide_label] if is_train: mod.bind(data_shapes=provide_data, label_shapes=provide_label, for_training=True, inputs_need_grad=False) else: mod.bind(data_shapes=provide_data, label_shapes=provide_label, for_training=False, inputs_need_grad=False) mod.init_params(initializer=mx.init.Xavier(magnitude=2.)) mod.init_optimizer(optimizer='ccsgd', optimizer_params={ 'learning_rate': 0.0001, 'momentum': 0.0, 'wd': 0.0 }) return mod
layers : list of stage configuratrion """ assert(len(layers) == 4) base_filter = 64 data = mx.sym.Variable(name='data') conv1 = ConvModule(data, base_filter, kernel=(7, 7), pad=(3, 3), stride=(2, 2)) mp1 = mx.sym.Pooling(data=conv1, pool_type="max", kernel=(3, 3), stride=(2, 2)) sym = mp1 for j in range(len(layers)): for i in range(layers[j]): sym = ResModule(sym, base_filter, j, i) avg = mx.symbol.Pooling(data=sym, kernel=(7, 7), stride=(1, 1), name="global_pool", pool_type='avg') flatten = mx.symbol.Flatten(data=avg, name='flatten') fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=1000, name='fc1') net = mx.symbol.SoftmaxOutput(data=fc1, name='softmax') return net layers = [3, 24, 36, 3] batch_size = 32 net = get_symbol(layers) dshape = (batch_size, 3, 224, 224) net_mem_planned = memonger.search_plan(net, data=dshape) old_cost = memonger.get_cost(net, data=dshape) new_cost = memonger.get_cost(net_mem_planned, data=dshape) print('Old feature map cost=%d MB' % old_cost) print('New feature map cost=%d MB' % new_cost) # You can savely feed the net to the subsequent mxnet training script.
seq_len = 1000 num_hidden = 1024 num_embed = 1024 input_size = 50 num_lstm_layer = 4 num_label = 5000 net = lstm_unroll( num_lstm_layer=num_lstm_layer, seq_len=seq_len, input_size=input_size, num_hidden=num_hidden, num_embed=num_embed, num_label=num_label, concat_decode=concat_decode, use_loss=use_loss) ishapes = get_input_shapes(net, batch_size=batch_size, num_hidden=num_hidden, input_size=input_size, seq_len=seq_len) net_mem_planned = memonger.search_plan(net, **ishapes) old_cost = memonger.get_cost(net, **ishapes) new_cost = memonger.get_cost(net_mem_planned, **ishapes) print('Old feature map cost=%d MB' % old_cost) print('New feature map cost=%d MB' % new_cost) # You can savely feed the net to the subsequent mxnet training script.
def main(): if args.data_type == "cifar10": args.aug_level = 1 args.num_classes = 10 # depth should be one of 110, 164, 1001,...,which is should fit (args.depth-2)%9 == 0 if ((args.depth - 2) % 9 == 0 and args.depth >= 164): per_unit = [(args.depth - 2) / 9] filter_list = [16, 64, 128, 256] bottle_neck = True elif ((args.depth - 2) % 6 == 0 and args.depth < 164): per_unit = [(args.depth - 2) / 6] filter_list = [16, 16, 32, 64] bottle_neck = False else: raise ValueError( "no experiments done on detph {}, you can do it youself". format(args.depth)) units = per_unit * 3 symbol = resnet(units=units, num_stage=3, filter_list=filter_list, num_class=args.num_classes, data_type="cifar10", bottle_neck=bottle_neck, bn_mom=args.bn_mom, workspace=args.workspace, memonger=args.memonger) elif args.data_type == "imagenet": args.num_classes = 1000 if args.depth == 18: units = [2, 2, 2, 2] elif args.depth == 34: units = [3, 4, 6, 3] elif args.depth == 50: units = [3, 4, 6, 3] elif args.depth == 101: units = [3, 4, 23, 3] elif args.depth == 152: units = [3, 8, 36, 3] elif args.depth == 200: units = [3, 24, 36, 3] elif args.depth == 269: units = [3, 30, 48, 8] else: raise ValueError( "no experiments done on detph {}, you can do it youself". format(args.depth)) symbol = resnet(units=units, num_stage=4, filter_list=[64, 256, 512, 1024, 2048] if args.depth >= 50 else [64, 64, 128, 256, 512], num_class=args.num_classes, data_type="imagenet", bottle_neck=True if args.depth >= 50 else False, bn_mom=args.bn_mom, workspace=args.workspace, memonger=args.memonger) else: raise ValueError("do not support {} yet".format(args.data_type)) kv = mx.kvstore.create(args.kv_store) devs = mx.cpu() if args.gpus is None else [ mx.gpu(int(i)) for i in args.gpus.split(',') ] epoch_size = max(int(args.num_examples / args.batch_size / kv.num_workers), 1) begin_epoch = args.model_load_epoch if args.model_load_epoch else 0 if not os.path.exists("./model"): os.mkdir("./model") model_prefix = "model/resnet-{}-{}-{}".format(args.data_type, args.depth, kv.rank) checkpoint = mx.callback.do_checkpoint(model_prefix) arg_params = None aux_params = None if args.retrain: _, arg_params, aux_params = mx.model.load_checkpoint( model_prefix, args.model_load_epoch) if args.memonger: import memonger symbol = memonger.search_plan( symbol, data=(args.batch_size, 3, 32, 32) if args.data_type == "cifar10" else (args.batch_size, 3, 224, 224)) train = mx.io.ImageRecordIter( path_imgrec=os.path.join(args.data_dir, "cifar10_train.rec") if args.data_type == 'cifar10' else os.path.join(args.data_dir, "train_256_q90.rec") if args.aug_level == 1 else os.path.join(args.data_dir, "train_480_q90.rec"), label_width=1, data_name='data', label_name='softmax_label', data_shape=(3, 32, 32) if args.data_type == "cifar10" else (3, 224, 224), batch_size=args.batch_size, pad=4 if args.data_type == "cifar10" else 0, fill_value=127, # only used when pad is valid rand_crop=True, max_random_scale=1.0, # 480 with imagnet, 32 with cifar10 min_random_scale=1.0 if args.data_type == "cifar10" else 1.0 if args.aug_level == 1 else 0.533, # 256.0/480.0 max_aspect_ratio=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 0.25, random_h=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 36, # 0.4*90 random_s=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 50, # 0.4*127 random_l=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 50, # 0.4*127 max_rotate_angle=0 if args.aug_level <= 2 else 10, max_shear_ratio=0 if args.aug_level <= 2 else 0.1, rand_mirror=True, shuffle=True, num_parts=kv.num_workers, part_index=kv.rank) val = mx.io.ImageRecordIter( path_imgrec=os.path.join(args.data_dir, "cifar10_val.rec") if args.data_type == 'cifar10' else os.path.join( args.data_dir, "val_256_q90.rec"), label_width=1, data_name='data', label_name='softmax_label', batch_size=args.batch_size, data_shape=(3, 32, 32) if args.data_type == "cifar10" else (3, 224, 224), rand_crop=False, rand_mirror=False, num_parts=kv.num_workers, part_index=kv.rank) model = mx.model.FeedForward( ctx=devs, symbol=symbol, arg_params=arg_params, aux_params=aux_params, num_epoch=200 if args.data_type == "cifar10" else 120, begin_epoch=begin_epoch, learning_rate=args.lr, momentum=args.mom, wd=args.wd, optimizer='nag', # optimizer = 'sgd', initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), lr_scheduler=multi_factor_scheduler( begin_epoch, epoch_size, step=[120, 160], factor=0.1) if args.data_type == 'cifar10' else multi_factor_scheduler( begin_epoch, epoch_size, step=[30, 60, 90], factor=0.1), ) model.fit(X=train, eval_data=val, eval_metric=['acc', 'ce'] if args.data_type == 'cifar10' else ['acc', mx.metric.create('top_k_accuracy', top_k=5)], kvstore=kv, batch_end_callback=mx.callback.Speedometer( args.batch_size, args.frequent), epoch_end_callback=checkpoint)
def main(): if args.data_type == "cifar10": args.aug_level = 1 args.num_classes = 10 # depth should be one of 110, 164, 1001,...,which is should fit (args.depth-2)%9 == 0 if ((args.depth - 2) % 9 == 0 and args.depth >= 164): per_unit = [(args.depth - 2) / 9] filter_list = [16, 64, 128, 256] bottle_neck = True elif ((args.depth - 2) % 6 == 0 and args.depth < 164): per_unit = [(args.depth - 2) / 6] filter_list = [16, 16, 32, 64] bottle_neck = False else: raise ValueError( "no experiments done on detph {}, you can do it youself". format(args.depth)) units = per_unit * 3 symbol = resnet(units=units, num_stage=3, filter_list=filter_list, num_class=args.num_classes, data_type="cifar10", bottle_neck=bottle_neck, bn_mom=args.bn_mom, workspace=args.workspace, memonger=args.memonger) elif args.data_type == "imagenet": args.num_classes = 1000 if args.depth == 18: units = [2, 2, 2, 2] elif args.depth == 34: units = [3, 4, 6, 3] elif args.depth == 50: units = [3, 4, 6, 3] elif args.depth == 101: units = [3, 4, 23, 3] elif args.depth == 152: units = [3, 8, 36, 3] elif args.depth == 200: units = [3, 24, 36, 3] elif args.depth == 269: units = [3, 30, 48, 8] else: raise ValueError( "no experiments done on detph {}, you can do it youself". format(args.depth)) symbol = resnet(units=units, num_stage=4, filter_list=[64, 256, 512, 1024, 2048] if args.depth >= 50 else [64, 64, 128, 256, 512], num_class=args.num_classes, data_type="imagenet", bottle_neck=True if args.depth >= 50 else False, bn_mom=args.bn_mom, workspace=args.workspace, memonger=args.memonger) else: raise ValueError("do not support {} yet".format(args.data_type)) kv = mx.kvstore.create(args.kv_store) devs = mx.cpu() if args.gpus is None else [ mx.gpu(int(i)) for i in args.gpus.split(',') ] # logging head = '%(asctime)-15s Node[' + str(kv.rank) + '] %(message)s' if 'log_file' in args and args.log_file is not None: log_file = args.log_file log_dir = args.log_dir log_file_full_name = os.path.join(log_dir, log_file) if not os.path.exists(log_dir): os.mkdir(log_dir) logger = logging.getLogger() handler = logging.FileHandler(log_file_full_name) formatter = logging.Formatter(head) handler.setFormatter(formatter) logger.addHandler(handler) logger.setLevel(logging.DEBUG) logger.info('start with arguments %s', args) else: logging.basicConfig(level=logging.DEBUG, format=head) logging.info('start with arguments %s', args) kv_store_type = "" if args.kv_store == "dist_sync": kv_store_type = "bsp" elif args.kv_store == "dist_async": kv_store_type = "asp" elif args.kv_store == "dist_gsync": kv_store_type = "gsp" elif args.kv_store == "dist_ssync": kv_store_type = "ssp" begin_epoch = args.model_load_epoch if args.model_load_epoch else 0 user = getpass.getuser() if not os.path.exists("/home/{}/mxnet_model/model/{}/resnet{}/{}".format( user, args.data_type, args.depth, kv_store_type)): os.makedirs("/home/{}/mxnet_model/model/{}/resnet{}/{}".format( user, args.data_type, args.depth, kv_store_type)) model_prefix = "/home/{}/mxnet_model/model/{}/resnet{}/{}/{}-{}-resnet{}-{}".format( user, args.data_type, args.depth, kv_store_type, kv_store_type, args.data_type, args.depth, kv.rank) checkpoint = None if not args.savemodel else mx.callback.do_checkpoint( model_prefix) arg_params = None aux_params = None if args.retrain: _, arg_params, aux_params = mx.model.load_checkpoint( model_prefix, args.model_load_epoch) if args.memonger: import memonger symbol = memonger.search_plan( symbol, data=(args.batch_size, 3, 32, 32) if args.data_type == "cifar10" else (args.batch_size, 3, 224, 224)) splits = 1 part = 0 val_splits = kv.num_workers val_part = kv.rank '''yegeyan 2016.10.6''' if args.kv_store == "dist_sync" or args.kv_store == "dist_async" or args.kv_store == "dist_ssync": #if args.kv_store == "dist_sync": splits = kv.num_workers part = kv.rank if args.kv_store == "dist_gsync": if args.data_allocator == 1: if args.hostname == "gpu-cluster-1": part = args.cluster1_begin splits = args.cluster1_end elif args.hostname == "gpu-cluster-2": part = args.cluster2_begin splits = args.cluster2_end elif args.hostname == "gpu-cluster-3": part = args.cluster3_begin splits = args.cluster3_end elif args.hostname == "gpu-cluster-4": part = args.cluster4_begin splits = args.cluster4_end else: part = args.cluster5_begin splits = args.cluster5_end args.data_proportion = splits - part else: splits = kv.num_workers part = kv.rank # yegeyan 2017.1.15 epoch_size = args.num_examples / args.batch_size model_args = {} if args.kv_store == 'dist_sync' or args.kv_store == 'dist_async' or args.kv_store == 'dist_ssync': #if args.kv_store == 'dist_sync': epoch_size /= kv.num_workers model_args['epoch_size'] = epoch_size '''yegeyan 2016.12.13''' if args.kv_store == 'dist_gsync': if args.data_allocator == 1: epoch_size *= args.data_proportion model_args['epoch_size'] = epoch_size else: epoch_size /= kv.num_workers model_args['epoch_size'] = epoch_size if 'lr_factor' in args and args.lr_factor < 1: model_args['lr_scheduler'] = mx.lr_scheduler.FactorScheduler( step=max(int(batch_num * args.lr_factor_epoch), 1), # yegeyan 2016.12.13 factor=args.lr_factor) if 'clip_gradient' in args and args.clip_gradient is not None: model_args['clip_gradient'] = args.clip_gradient eval_metrics = ['accuracy'] ## TopKAccuracy only allows top_k > 1 for top_k in [5, 10, 20]: eval_metrics.append(mx.metric.create('top_k_accuracy', top_k=top_k)) # yegeyan 2017.1.4 val_eval_metrics = ['accuracy'] ## TopKAccuracy only allows top_k > 1 for top_k in [5, 10, 20]: val_eval_metrics.append(mx.metric.create('top_k_accuracy', top_k=top_k)) train = mx.io.ImageRecordIter( path_imgrec=os.path.join(args.data_dir, "train.rec") if args.data_type == 'cifar10' else os.path.join(args.data_dir, "train_480.rec") if args.aug_level == 1 else os.path.join(args.data_dir, "train_480.rec"), label_width=1, data_name='data', label_name='softmax_label', data_shape=(3, 32, 32) if args.data_type == "cifar10" else (3, 224, 224), batch_size=args.batch_size, pad=4 if args.data_type == "cifar10" else 0, fill_value=127, # only used when pad is valid rand_crop=True, max_random_scale=1.0, # 480 with imagnet, 32 with cifar10 min_random_scale=1.0 if args.data_type == "cifar10" else 1.0 if args.aug_level == 1 else 0.533, # 256.0/480.0 max_aspect_ratio=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 0.25, random_h=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 36, # 0.4*90 random_s=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 50, # 0.4*127 random_l=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 50, # 0.4*127 max_rotate_angle=0 if args.aug_level <= 2 else 10, max_shear_ratio=0 if args.aug_level <= 2 else 0.1, rand_mirror=True, shuffle=True, preprocess_threads=4, num_parts=splits, part_index=part) val = mx.io.ImageRecordIter( path_imgrec=os.path.join(args.data_dir, "test.rec") if args.data_type == 'cifar10' else os.path.join(args.data_dir, "val_480.rec"), label_width=1, data_name='data', label_name='softmax_label', batch_size=args.batch_size, data_shape=(3, 32, 32) if args.data_type == "cifar10" else (3, 224, 224), rand_crop=False, rand_mirror=False, preprocess_threads=4, num_parts=val_splits, part_index=val_part) model = mx.model.FeedForward( ctx=devs, symbol=symbol, arg_params=arg_params, aux_params=aux_params, num_epoch=args.num_epochs, begin_epoch=begin_epoch, learning_rate=args.lr, momentum=args.mom, wd=args.wd, #optimizer = 'nag', optimizer='sgd', initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), lr_scheduler=multi_factor_scheduler( begin_epoch, epoch_size, step=[220, 260, 280], factor=0.1) if args.data_type == 'cifar10' else multi_factor_scheduler( begin_epoch, epoch_size, step=[30, 60, 90], factor=0.1), **model_args) model.fit(X=train, eval_data=val, eval_metric=eval_metrics, val_eval_metric=val_eval_metrics, kvstore=kv, batch_end_callback=mx.callback.Speedometer(args.batch_size, 50), epoch_end_callback=checkpoint, hostname=socket.gethostbyname_ex(socket.gethostname())[0], dataset=args.data_type, staleness=args.staleness, network_name="resnet_" + str(args.depth), lr=args.lr) #yegeyan 2017.5.15
def main(): if (args.depth-2) % 9 == 0: # and args.depth >= 164: per_unit = [(args.depth-2) / 9] filter_list = [16, 64, 128, 256] bottle_neck = True # elif (args.depth-2) % 6 == 0 and args.depth < 164: # per_unit = [(args.depth-2) / 6] # filter_list = [16, 16, 32, 64] # bottle_neck = False else: raise ValueError( "no experiments done on detph {}, you can do it youself".format(args.depth)) units = per_unit*3 symbol = resnet(units=units, num_stage=3, filter_list=filter_list, num_class=args.num_classes, bottle_neck=bottle_neck, bn_mom=args.bn_mom, workspace=args.workspace, memonger=args.memonger) kv = mx.kvstore.create(args.kv_store) devs = mx.cpu() if args.gpus is None else [ mx.gpu(int(i)) for i in args.gpus.split(',')] epoch_size = max( int(args.num_examples / args.batch_size / kv.num_workers), 1) begin_epoch = args.model_load_epoch if args.model_load_epoch else 0 if not os.path.exists("./model"): os.mkdir("./model") model_prefix = "model/resnet-{}-{}-{}".format( data_type, args.depth, kv.rank) checkpoint = mx.callback.do_checkpoint(model_prefix) arg_params = None aux_params = None if args.retrain: _, arg_params, aux_params = mx.model.load_checkpoint( model_prefix, args.model_load_epoch) if args.memonger: import memonger symbol = memonger.search_plan( symbol, data=(args.batch_size, 3, 32, 32)) train = mx.io.ImageRecordIter( path_imgrec = os.path.join(args.data_dir, "cifar10_train.rec"), label_width = 1, data_shape = (3, 32, 32), num_parts = kv.num_workers, part_index = kv.rank, shuffle = True, batch_size = args.batch_size, rand_crop = True, fill_value = 127, # only used when pad is valid pad = 4, rand_mirror = True, ) val = mx.io.ImageRecordIter( path_imgrec = os.path.join(args.data_dir, "cifar10_val.rec"), label_width = 1, data_shape = (3, 32, 32), num_parts = kv.num_workers, part_index = kv.rank, batch_size = args.batch_size, ) model = mx.mod.Module( symbol = symbol, data_names = ('data', ), label_names = ('softmax_label', ), context = devs, ) model.fit( train_data = train, eval_data = val, eval_metric = ['acc'], epoch_end_callback = checkpoint, batch_end_callback = mx.callback.Speedometer(args.batch_size, args.frequent), kvstore = kv, optimizer = 'nag', optimizer_params = (('learning_rate', args.lr), ('momentum', args.mom), ('wd', args.wd), ( 'lr_scheduler', multi_factor_scheduler(begin_epoch, epoch_size, step=[80], factor=0.1))), initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), arg_params = arg_params, aux_params = aux_params, allow_missing = True, begin_epoch = begin_epoch, num_epoch = args.end_epoch, )
def main(): num_classes = 4 gpus = None lr = 0.001 step = [30, 60, 90, 120] gamma = 0.5 mom = 0.9 wd = 0.00001 batch_size = 4 epoch_size = 120 bn_mom = 0.9 data_channel = 3 data_width = 224 data_height = 224 max_epoch = 200 workspace = 512 frequent = 20 memonger = False # pre_train = ['resnet50/resnet-50', 0] pre_train = None fine_tuning = False arg_params = None aux_params = None train_data = "./data/train.lst" val_data = "./data/valid.lst" # train_idx = "../train.idx" # val_idx = "../valid.idx" model_prefix = "model/plate" if pre_train is None: symbol = resnet.get_symbol(num_classes, 50, '3,224,224') begin_epoch = 0 else: symbol, arg_params, aux_params = mx.model.load_checkpoint( pre_train[0], pre_train[1]) begin_epoch = pre_train[1] # mx.viz.plot_network(symbol).view() if fine_tuning: (symbol, arg_params) = get_fine_tune_model(symbol, arg_params, num_classes, layer_name='flatten0') mx.viz.plot_network(symbol).view() # data_shape = {'data': (1, data_channel, data_height, data_width)} # arg_shape, out_shape, _ = symbol.infer_shape(**data_shape) # arg_name = symbol.list_arguments() # out_name = symbol.list_outputs() # log = zip(arg_name, arg_shape) # for message in log: # print message # print({'input' : dict(zip(arg_name, arg_shape)), 'output': dict(zip(out_name, out_shape))}) # mx.viz.plot_network(symbol).view() devs = mx.cpu() if gpus is None else [mx.gpu(i) for i in gpus] checkpoint = mx.callback.do_checkpoint(model_prefix) if memonger: import memonger symbol = memonger.search_plan(symbol, data=(batch_size, data_channel, data_height, data_width)) train_iter = mx.image.ImageIter( path_imglist=train_data, path_root='./', # path_imgrec = train_data, # path_imgidx = train_idx, # data_name = 'data', # label_name = 'softmax_label', data_shape=(data_channel, data_height, data_width), batch_size=batch_size, resize=240, rand_crop=True, rand_mirror=True, shuffle=True) val_iter = mx.image.ImageIter( path_imglist=val_data, path_root='./', # path_imgrec = val_data, # path_imgidx = val_idx, # data_name = 'data', # label_name = 'softmax_label', batch_size=batch_size, data_shape=(data_channel, data_height, data_width), resize=data_height, rand_crop=False, rand_mirror=False, shuffle=True) model = mx.mod.Module(symbol=symbol, context=devs, data_names=['data'], label_names=['softmax_label']) lr_scheduler = multi_factor_scheduler(0, epoch_size, step=step, factor=gamma) initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) optimizer_params = { 'learning_rate': lr, 'momentum': mom, 'wd': wd, 'lr_scheduler': lr_scheduler, # 'multi_precision': True } model.fit(train_iter, begin_epoch=begin_epoch, num_epoch=max_epoch, eval_data=val_iter, eval_metric='acc', optimizer='sgd', optimizer_params=optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=mx.callback.Speedometer(batch_size, frequent), epoch_end_callback=checkpoint) metric = mx.metric.Accuracy() print(model.score(val_iter, metric))
mx_workspace = 8000 classes = 19 feat_stride = 8 def get_symbol(): from symbol.symbol import cfg as symcfg symcfg['lr_type'] = 'alex' symcfg['workspace'] = mx_workspace symcfg['bn_use_global_stats'] = True from symbol.resnet_v2 import fcrna_model_a1, rna_model_a1 net = fcrna_model_a1(classes, feat_stride, bootstrapping=False) # net = rna_model_a1(classes) return net batch_size = 24 net = get_symbol() dshape = (batch_size, 3, 500, 500) net_mem_planned = memonger.search_plan(net, data=dshape) new_cost = memonger.get_cost(net_mem_planned, data=dshape) old_cost = memonger.get_cost(net, data=dshape) os.environ['MXNET_BACKWARD_DO_MIRROR'] = '1' old_cost2 = memonger.get_cost(net, data=dshape) print('Naive feature map cost=%d MB' % old_cost) print('Best feature map cost=%d MB' % new_cost) print('Mirror feature map cost=%d MB' % old_cost2) # You can savely feed the net to the subsequent mxnet training script.
def get_symbol(): """ Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py Original author Wei Wu """ num_classes = config.emb_size num_layers = config.num_layers if num_layers >= 500: filter_list = [64, 256, 512, 1024, 2048] bottle_neck = True else: filter_list = [64, 64, 128, 256, 512] bottle_neck = False num_stages = 4 if num_layers == 18: units = [2, 2, 2, 2] elif num_layers == 34: units = [3, 4, 6, 3] elif num_layers == 49: units = [3, 4, 14, 3] elif num_layers == 50: units = [3, 4, 14, 3] elif num_layers == 74: units = [3, 6, 24, 3] elif num_layers == 90: units = [3, 8, 30, 3] elif num_layers == 98: units = [3, 4, 38, 3] elif num_layers == 99: units = [3, 8, 35, 3] elif num_layers == 100: units = [3, 13, 30, 3] elif num_layers == 134: units = [3, 10, 50, 3] elif num_layers == 136: units = [3, 13, 48, 3] elif num_layers == 140: units = [3, 15, 48, 3] elif num_layers == 124: units = [3, 13, 40, 5] elif num_layers == 160: units = [3, 24, 49, 3] elif num_layers == 101: units = [3, 4, 23, 3] elif num_layers == 152: units = [3, 8, 36, 3] elif num_layers == 200: units = [3, 24, 36, 3] elif num_layers == 269: units = [3, 30, 48, 8] else: raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers)) net = resnet(units = units, num_stages = num_stages, filter_list = filter_list, num_classes = num_classes, bottle_neck = bottle_neck) if config.memonger: dshape = (config.per_batch_size, config.image_shape[2], config.image_shape[0], config.image_shape[1]) net_mem_planned = memonger.search_plan(net, data=dshape) old_cost = memonger.get_cost(net, data=dshape) new_cost = memonger.get_cost(net_mem_planned, data=dshape) print('Old feature map cost=%d MB' % old_cost) print('New feature map cost=%d MB' % new_cost) net = net_mem_planned return net
def get_symbol(args, arg_params, aux_params): data_shape = (args.image_channel, args.image_h, args.image_w) image_shape = ",".join([str(x) for x in data_shape]) margin_symbols = [] if args.network[0] == 'd': embedding = fdensenet.get_symbol(args.emb_size, args.num_layers, version_se=args.version_se, version_input=args.version_input, version_output=args.version_output, version_unit=args.version_unit) elif args.network[0] == 'm': print('init mobilenet', args.num_layers) if args.num_layers == 1: embedding = fmobilenet.get_symbol( args.emb_size, version_se=args.version_se, version_input=args.version_input, version_output=args.version_output, version_unit=args.version_unit) else: embedding = fmobilenetv2.get_symbol(args.emb_size) elif args.network[0] == 'i': print('init inception-resnet-v2', args.num_layers) embedding = finception_resnet_v2.get_symbol( args.emb_size, version_se=args.version_se, version_input=args.version_input, version_output=args.version_output, version_unit=args.version_unit) elif args.network[0] == 'x': print('init xception', args.num_layers) embedding = fxception.get_symbol(args.emb_size, version_se=args.version_se, version_input=args.version_input, version_output=args.version_output, version_unit=args.version_unit) elif args.network[0] == 'p': print('init dpn', args.num_layers) embedding = fdpn.get_symbol(args.emb_size, args.num_layers, version_se=args.version_se, version_input=args.version_input, version_output=args.version_output, version_unit=args.version_unit) elif args.network[0] == 'n': print('init nasnet', args.num_layers) embedding = fnasnet.get_symbol(args.emb_size) else: print('init resnet', args.num_layers) embedding = fresnet.get_symbol(args.emb_size, args.num_layers, version_se=args.version_se, version_input=args.version_input, version_output=args.version_output, version_unit=args.version_unit, input_shape=(args.per_batch_size, 3, 112, 112)) if args.memonger: embedding = memonger.search_plan(embedding) gt_label = mx.symbol.Variable('softmax_label') assert args.loss_type >= 0 extra_loss = None if args.loss_type == 0: #softmax _weight = mx.symbol.Variable('fc7_weight') _bias = mx.symbol.Variable('fc7_bias', lr_mult=2.0, wd_mult=0.0) fc7 = mx.sym.FullyConnected(data=embedding, weight=_weight, bias=_bias, num_hidden=args.num_classes, name='fc7') elif args.loss_type == 1: #sphere _weight = mx.symbol.Variable("fc7_weight", shape=(args.num_classes, args.emb_size), lr_mult=1.0) _weight = mx.symbol.L2Normalization(_weight, mode='instance') fc7 = mx.sym.LSoftmax(data=embedding, label=gt_label, num_hidden=args.num_classes, weight=_weight, beta=args.beta, margin=args.margin, scale=args.scale, beta_min=args.beta_min, verbose=1000, name='fc7') elif args.loss_type == 8: #centerloss, TODO _weight = mx.symbol.Variable('fc7_weight') _bias = mx.symbol.Variable('fc7_bias', lr_mult=2.0, wd_mult=0.0) fc7 = mx.sym.FullyConnected(data=embedding, weight=_weight, bias=_bias, num_hidden=args.num_classes, name='fc7') print('center-loss', args.center_alpha, args.center_scale) extra_loss = mx.symbol.Custom(data=embedding, label=gt_label, name='center_loss', op_type='centerloss',\ num_class=args.num_classes, alpha=args.center_alpha, scale=args.center_scale, batchsize=args.per_batch_size) elif args.loss_type == 2: s = args.margin_s m = args.margin_m _weight = mx.symbol.Variable("fc7_weight", shape=(args.num_classes, args.emb_size), lr_mult=1.0) _weight = mx.symbol.L2Normalization(_weight, mode='instance') if s > 0.0: nembedding = mx.symbol.L2Normalization( embedding, mode='instance', name='fc1n') * s fc7 = mx.sym.FullyConnected(data=nembedding, weight=_weight, no_bias=True, num_hidden=args.num_classes, name='fc7') if m > 0.0: if args.margin_verbose > 0: zy = mx.sym.pick(fc7, gt_label, axis=1) cos_t = zy / s margin_symbols.append(mx.symbol.mean(cos_t)) s_m = s * m gt_one_hot = mx.sym.one_hot(gt_label, depth=args.num_classes, on_value=s_m, off_value=0.0) fc7 = fc7 - gt_one_hot if args.margin_verbose > 0: new_zy = mx.sym.pick(fc7, gt_label, axis=1) new_cos_t = new_zy / s margin_symbols.append(mx.symbol.mean(new_cos_t)) else: fc7 = mx.sym.FullyConnected(data=embedding, weight=_weight, no_bias=True, num_hidden=args.num_classes, name='fc7') if m > 0.0: body = embedding * embedding body = mx.sym.sum_axis(body, axis=1, keepdims=True) body = mx.sym.sqrt(body) body = body * m gt_one_hot = mx.sym.one_hot(gt_label, depth=args.num_classes, on_value=1.0, off_value=0.0) body = mx.sym.broadcast_mul(gt_one_hot, body) fc7 = fc7 - body elif args.loss_type == 3: s = args.margin_s m = args.margin_m assert args.margin == 2 or args.margin == 4 _weight = mx.symbol.Variable("fc7_weight", shape=(args.num_classes, args.emb_size), lr_mult=1.0) _weight = mx.symbol.L2Normalization(_weight, mode='instance') nembedding = mx.symbol.L2Normalization( embedding, mode='instance', name='fc1n') * s fc7 = mx.sym.FullyConnected(data=nembedding, weight=_weight, no_bias=True, num_hidden=args.num_classes, name='fc7') zy = mx.sym.pick(fc7, gt_label, axis=1) cos_t = zy / s if args.margin_verbose > 0: margin_symbols.append(mx.symbol.mean(cos_t)) #threshold = math.cos(args.margin_m) #cond_v = cos_t - threshold #cond = mx.symbol.Activation(data=cond_v, act_type='relu') #body = cos_t #for i in xrange(args.margin//2): # body = body*body # body = body*2-1 #new_zy = body*s #zy_keep = zy #new_zy = mx.sym.where(cond, new_zy, zy_keep) #if args.margin_verbose>0: # new_cos_t = new_zy/s # margin_symbols.append(mx.symbol.mean(new_cos_t)) #diff = new_zy - zy #diff = mx.sym.expand_dims(diff, 1) #gt_one_hot = mx.sym.one_hot(gt_label, depth = args.num_classes, on_value = 1.0, off_value = 0.0) #body = mx.sym.broadcast_mul(gt_one_hot, diff) #fc7 = fc7+body elif args.loss_type == 4: s = args.margin_s m = args.margin_m assert s > 0.0 assert m > 0.0 assert m < (math.pi / 2) cos_m = math.cos(m) sin_m = math.sin(m) mm = math.sin(math.pi - m) * m _weight = mx.symbol.Variable("fc7_weight", shape=(args.num_classes, args.emb_size), lr_mult=1.0) _weight = mx.symbol.L2Normalization(_weight, mode='instance') #threshold = 0.0 threshold = math.cos(math.pi - m) nembedding = mx.symbol.L2Normalization( embedding, mode='instance', name='fc1n') * s fc7 = mx.sym.FullyConnected(data=nembedding, weight=_weight, no_bias=True, num_hidden=args.num_classes, name='fc7') zy = mx.sym.pick(fc7, gt_label, axis=1) cos_t = zy / s if args.margin_verbose > 0: margin_symbols.append(mx.symbol.mean(cos_t)) if args.easy_margin: cond = mx.symbol.Activation(data=cos_t, act_type='relu') #cond_v = cos_t - 0.4 #cond = mx.symbol.Activation(data=cond_v, act_type='relu') else: cond_v = cos_t - threshold cond = mx.symbol.Activation(data=cond_v, act_type='relu') #theta = mx.sym.arccos(costheta) #sintheta = mx.sym.sin(theta) body = cos_t * cos_t body = 1.0 - body sin_t = mx.sym.sqrt(body) new_zy = cos_t * cos_m b = sin_t * sin_m new_zy = new_zy - b new_zy = new_zy * s if args.easy_margin: zy_keep = zy else: zy_keep = zy - s * mm new_zy = mx.sym.where(cond, new_zy, zy_keep) if args.margin_verbose > 0: new_cos_t = new_zy / s margin_symbols.append(mx.symbol.mean(new_cos_t)) diff = new_zy - zy diff = mx.sym.expand_dims(diff, 1) gt_one_hot = mx.sym.one_hot(gt_label, depth=args.num_classes, on_value=1.0, off_value=0.0) body = mx.sym.broadcast_mul(gt_one_hot, diff) fc7 = fc7 + body elif args.loss_type == 10: #marginal loss nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n') params = [1.2, 0.3, 1.0] n1 = mx.sym.expand_dims(nembedding, axis=1) #N,1,C n2 = mx.sym.expand_dims(nembedding, axis=0) #1,N,C body = mx.sym.broadcast_sub(n1, n2) #N,N,C body = body * body body = mx.sym.sum(body, axis=2) # N,N #body = mx.sym.sqrt(body) body = body - params[0] mask = mx.sym.Variable('extra') body = body * mask body = body + params[1] #body = mx.sym.maximum(body, 0.0) body = mx.symbol.Activation(data=body, act_type='relu') body = mx.sym.sum(body) body = body / (args.per_batch_size * args.per_batch_size - args.per_batch_size) extra_loss = mx.symbol.MakeLoss(body, grad_scale=params[2]) elif args.loss_type == 11: #npair loss params = [0.9, 0.2] nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n') nembedding = mx.sym.transpose(nembedding) nembedding = mx.symbol.reshape( nembedding, (args.emb_size, args.per_identities, args.images_per_identity)) nembedding = mx.sym.transpose(nembedding, axes=(2, 1, 0)) #2*id*512 #nembedding = mx.symbol.reshape(nembedding, (args.emb_size, args.images_per_identity, args.per_identities)) #nembedding = mx.sym.transpose(nembedding, axes=(1,2,0)) #2*id*512 n1 = mx.symbol.slice_axis(nembedding, axis=0, begin=0, end=1) n2 = mx.symbol.slice_axis(nembedding, axis=0, begin=1, end=2) #n1 = [] #n2 = [] #for i in xrange(args.per_identities): # _n1 = mx.symbol.slice_axis(nembedding, axis=0, begin=2*i, end=2*i+1) # _n2 = mx.symbol.slice_axis(nembedding, axis=0, begin=2*i+1, end=2*i+2) # n1.append(_n1) # n2.append(_n2) #n1 = mx.sym.concat(*n1, dim=0) #n2 = mx.sym.concat(*n2, dim=0) #rembeddings = mx.symbol.reshape(nembedding, (args.images_per_identity, args.per_identities, 512)) #n1 = mx.symbol.slice_axis(rembeddings, axis=0, begin=0, end=1) #n2 = mx.symbol.slice_axis(rembeddings, axis=0, begin=1, end=2) n1 = mx.symbol.reshape(n1, (args.per_identities, args.emb_size)) n2 = mx.symbol.reshape(n2, (args.per_identities, args.emb_size)) cosine_matrix = mx.symbol.dot(lhs=n1, rhs=n2, transpose_b=True) #id*id, id=N of N-pair data_extra = mx.sym.Variable('extra') data_extra = mx.sym.slice_axis(data_extra, axis=0, begin=0, end=args.per_identities) mask = cosine_matrix * data_extra #body = mx.sym.mean(mask) fii = mx.sym.sum_axis(mask, axis=1) fij_fii = mx.sym.broadcast_sub(cosine_matrix, fii) fij_fii = mx.sym.exp(fij_fii) row = mx.sym.sum_axis(fij_fii, axis=1) row = mx.sym.log(row) body = mx.sym.mean(row) extra_loss = mx.sym.MakeLoss(body) elif args.loss_type == 12: #triplet loss nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n') anchor = mx.symbol.slice_axis(nembedding, axis=0, begin=0, end=args.per_batch_size // 3) positive = mx.symbol.slice_axis(nembedding, axis=0, begin=args.per_batch_size // 3, end=2 * args.per_batch_size // 3) negative = mx.symbol.slice_axis(nembedding, axis=0, begin=2 * args.per_batch_size // 3, end=args.per_batch_size) ap = anchor - positive an = anchor - negative ap = ap * ap an = an * an ap = mx.symbol.sum(ap, axis=1, keepdims=1) #(T,1) an = mx.symbol.sum(an, axis=1, keepdims=1) #(T,1) triplet_loss = mx.symbol.Activation(data=(ap - an + args.triplet_alpha), act_type='relu') triplet_loss = mx.symbol.mean(triplet_loss) #triplet_loss = mx.symbol.sum(triplet_loss)/(args.per_batch_size//3) extra_loss = mx.symbol.MakeLoss(triplet_loss) elif args.loss_type == 9: #coco loss centroids = [] for i in range(args.per_identities): xs = mx.symbol.slice_axis(embedding, axis=0, begin=i * args.images_per_identity, end=(i + 1) * args.images_per_identity) mean = mx.symbol.mean(xs, axis=0, keepdims=True) mean = mx.symbol.L2Normalization(mean, mode='instance') centroids.append(mean) centroids = mx.symbol.concat(*centroids, dim=0) nembedding = mx.symbol.L2Normalization( embedding, mode='instance', name='fc1n') * args.coco_scale fc7 = mx.symbol.dot(nembedding, centroids, transpose_b=True) #(batchsize, per_identities) #extra_loss = mx.symbol.softmax_cross_entropy(fc7, gt_label, name='softmax_ce')/args.per_batch_size #extra_loss = mx.symbol.BlockGrad(extra_loss) else: #embedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n')*float(args.loss_type) embedding = embedding * 5 _weight = mx.symbol.Variable("fc7_weight", shape=(args.num_classes, args.emb_size), lr_mult=1.0) _weight = mx.symbol.L2Normalization(_weight, mode='instance') * 2 fc7 = mx.sym.LSoftmax(data=embedding, label=gt_label, num_hidden=args.num_classes, weight=_weight, beta=args.beta, margin=args.margin, scale=args.scale, beta_min=args.beta_min, verbose=100, name='fc7') #fc7 = mx.sym.Custom(data=embedding, label=gt_label, weight=_weight, num_hidden=args.num_classes, # beta=args.beta, margin=args.margin, scale=args.scale, # op_type='ASoftmax', name='fc7') if args.loss_type <= 1 and args.incay > 0.0: params = [1.e-10] sel = mx.symbol.argmax(data=fc7, axis=1) sel = (sel == gt_label) norm = embedding * embedding norm = mx.symbol.sum(norm, axis=1) norm = norm + params[0] feature_incay = sel / norm feature_incay = mx.symbol.mean(feature_incay) * args.incay extra_loss = mx.symbol.MakeLoss(feature_incay) #out = softmax #l2_embedding = mx.symbol.L2Normalization(embedding) #ce = mx.symbol.softmax_cross_entropy(fc7, gt_label, name='softmax_ce')/args.per_batch_size #out = mx.symbol.Group([mx.symbol.BlockGrad(embedding), softmax, mx.symbol.BlockGrad(ce)]) out_list = [mx.symbol.BlockGrad(embedding)] softmax = None if args.loss_type < 10: softmax = mx.symbol.SoftmaxOutput(data=fc7, label=gt_label, name='softmax', normalization='valid') out_list.append(softmax) if softmax is None: out_list.append(mx.sym.BlockGrad(gt_label)) if extra_loss is not None: out_list.append(extra_loss) for _sym in margin_symbols: _sym = mx.sym.BlockGrad(_sym) out_list.append(_sym) out = mx.symbol.Group(out_list) return (out, arg_params, aux_params)
def main(): if args.data_type == "cifar10": args.aug_level = 1 args.num_classes = 10 symbol = get_xception_symbol(args.num_classes) elif args.data_type == "imagenet": args.num_classes = 1000 symbol = get_xception_symbol(args.num_classes) else: raise ValueError("do not support {} yet".format(args.data_type)) kv = mx.kvstore.create(args.kv_store) devs = mx.cpu() if args.gpus is None else [ mx.gpu(int(i)) for i in args.gpus.split(',') ] epoch_size = max(int(args.num_examples / args.batch_size / kv.num_workers), 1) begin_epoch = args.model_load_epoch if args.model_load_epoch else 0 if not os.path.exists("./model"): os.mkdir("./model") model_prefix = "model/xception-{}-{}-{}".format(args.data_type, kv.rank, 0) checkpoint = mx.callback.do_checkpoint(model_prefix) arg_params = None aux_params = None if args.retrain: _, arg_params, aux_params = mx.model.load_checkpoint( model_prefix, args.model_load_epoch) if args.memonger: import memonger symbol = memonger.search_plan( symbol, data=(args.batch_size, 3, 32, 32) if args.data_type == "cifar10" else (args.batch_size, 3, 224, 224)) train = mx.io.ImageRecordIter( path_imgrec=os.path.join(args.data_dir, "train.rec") if args.data_type == 'cifar10' else os.path.join(args.data_dir, "train_256_q90.rec") if args.aug_level == 1 else os.path.join(args.data_dir, "train_480_q90.rec"), label_width=1, data_name='data', label_name='softmax_label', data_shape=(3, 32, 32) if args.data_type == "cifar10" else (3, 224, 224), batch_size=args.batch_size, pad=4 if args.data_type == "cifar10" else 0, fill_value=127, # only used when pad is valid rand_crop=True, max_random_scale=1.0, # 480 with imagnet, 32 with cifar10 min_random_scale=1.0 if args.data_type == "cifar10" else 1.0 if args.aug_level == 1 else 0.533, # 256.0/480.0=0.533, 256.0/384.0=0.667 256.0/256=1.0 max_aspect_ratio=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 0.25, # 0.25 random_h=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 36, # 0.4*90 random_s=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 50, # 0.4*127 random_l=0 if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 50, # 0.4*127 max_rotate_angle=0 if args.aug_level <= 2 else 10, max_shear_ratio=0 if args.aug_level <= 2 else 0.1, #0.1 args.aug_level = 3 rand_mirror=True, shuffle=True, num_parts=kv.num_workers, part_index=kv.rank) val = mx.io.ImageRecordIter( path_imgrec=os.path.join(args.data_dir, "val.rec") if args.data_type == 'cifar10' else os.path.join(args.data_dir, "val_256_q90.rec"), label_width=1, data_name='data', label_name='softmax_label', batch_size=args.batch_size, data_shape=(3, 32, 32) if args.data_type == "cifar10" else (3, 224, 224), rand_crop=False, rand_mirror=False, num_parts=kv.num_workers, part_index=kv.rank) model = mx.model.FeedForward( ctx=devs, symbol=symbol, arg_params=arg_params, aux_params=aux_params, num_epoch=200 if args.data_type == "cifar10" else 125, begin_epoch=begin_epoch, learning_rate=args.lr, momentum=args.mom, wd=args.wd, optimizer='nag', # optimizer = 'sgd', initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), lr_scheduler=multi_factor_scheduler( begin_epoch, epoch_size, step=[220, 260, 280], factor=0.1) if args.data_type == 'cifar10' else multi_factor_scheduler( begin_epoch, epoch_size, step=[20, 40, 60, 80, 90, 110], factor=0.1), ) model.fit(X=train, eval_data=val, eval_metric=['acc'] if args.data_type == 'cifar10' else ['acc', mx.metric.create('top_k_accuracy', top_k=5)], kvstore=kv, batch_end_callback=mx.callback.Speedometer( args.batch_size, args.frequent), epoch_end_callback=checkpoint)
def main(): if args.data_type == "imagenet": args.num_classes = 1000 if args.depth == 121: units = [6, 12, 24, 16] elif args.depth == 169: units = [6, 12, 32, 32] elif args.depth == 201: units = [6, 12, 48, 32] elif args.depth == 161: units = [6, 12, 36, 24] else: raise ValueError("no experiments done on detph {}, you can do it youself".format(args.depth)) symbol = DenseNet(units=units, num_stage=4, growth_rate=48 if args.depth == 161 else args.growth_rate, num_class=args.num_classes, data_type="imagenet", reduction=args.reduction, drop_out=args.drop_out, bottle_neck=True, bn_mom=args.bn_mom, workspace=args.workspace) elif args.data_type == "vggface": args.num_classes = 2613 if args.depth == 121: units = [6, 12, 24, 16] elif args.depth == 169: units = [6, 12, 32, 32] elif args.depth == 201: units = [6, 12, 48, 32] elif args.depth == 161: units = [6, 12, 36, 24] else: raise ValueError("no experiments done on detph {}, you can do it youself".format(args.depth)) symbol = DenseNet(units=units, num_stage=4, growth_rate=48 if args.depth == 161 else args.growth_rate, num_class=args.num_classes, data_type="vggface", reduction=args.reduction, drop_out=args.drop_out, bottle_neck=True, bn_mom=args.bn_mom, workspace=args.workspace) elif args.data_type == "msface": args.num_classes = 79051 if args.depth == 121: units = [6, 12, 24, 16] elif args.depth == 169: units = [6, 12, 32, 32] elif args.depth == 201: units = [6, 12, 48, 32] elif args.depth == 161: units = [6, 12, 36, 24] else: raise ValueError("no experiments done on detph {}, you can do it youself".format(args.depth)) symbol = DenseNet(units=units, num_stage=4, growth_rate=48 if args.depth == 161 else args.growth_rate, num_class=args.num_classes, data_type="msface", reduction=args.reduction, drop_out=args.drop_out, bottle_neck=True, bn_mom=args.bn_mom, workspace=args.workspace) elif args.data_type == "kd": args.num_classes = 15 if args.depth == 121: units = [6, 12, 24, 16] elif args.depth == 169: units = [6, 12, 32, 32] elif args.depth == 201: units = [6, 12, 48, 32] elif args.depth == 161: units = [6, 12, 36, 24] elif args.depth == 80: units = [4, 8, 16, 12] else: raise ValueError("no experiments done on detph {}, you can do it youself".format(args.depth)) symbol = DenseNet(units=units, num_stage=4, growth_rate=48 if args.depth == 161 else args.growth_rate, num_class=args.num_classes, data_type="kd", reduction=args.reduction, drop_out=args.drop_out, bottle_neck=True, bn_mom=args.bn_mom, workspace=args.workspace) # mx.viz.plot_network(symbol).view() else: raise ValueError("do not support {} yet".format(args.data_type)) kv = mx.kvstore.create(args.kv_store) devs = mx.cpu() if args.gpus is None else [mx.gpu(int(i)) for i in args.gpus.split(',')] epoch_size = max(int(args.num_examples / args.batch_size / kv.num_workers), 1) begin_epoch = args.model_load_epoch if args.model_load_epoch else 0 if not os.path.exists("/data/deeplearning/lane_detect/kd.senet.assist/output/desenet_models"): os.makedirs("/data/deeplearning/lane_detect/kd.senet.assist/output/desenet_models") model_prefix = "/data/deeplearning/lane_detect/kd.senet.assist/output/desenet_models/densenet-{}-{}-{}".format( args.data_type, args.depth, kv.rank) checkpoint = mx.callback.do_checkpoint(model_prefix) arg_params = None aux_params = None if args.retrain: _, arg_params, aux_params = mx.model.load_checkpoint(model_prefix, args.model_load_epoch) if args.memonger: import memonger symbol = memonger.search_plan(symbol, data=(args.batch_size, 3, 32, 32) if args.data_type == "cifar10" else (args.batch_size, 3, 224, 224)) train = mx.io.ImageRecordIter( path_imgrec=os.path.join(args.data_dir, "main_assist_train.rec"), preprocess_threads=20, label_width=1, data_name='data', label_name='softmax_label', data_shape=(3, 224, 224), resize=256, batch_size=args.batch_size, pad=0, fill_value=127, # only used when pad is valid max_img_size=256, min_img_size=256, rand_crop=True, max_random_scale=1.1, # 480 with imagnet and vggface, 384 with msface, 32 with cifar10 min_random_scale=0.9, # 256.0/480.0=0.533, 256.0/384.0=0.667 max_aspect_ratio=0.1, random_h=0, # 0.4*90 random_s=0, # 0.4*127 random_l=0, # 0.4*127 max_rotate_angle=5, max_shear_ratio=0, rand_mirror=False, shuffle=True, num_parts=kv.num_workers, part_index=kv.rank ) val = mx.io.ImageRecordIter( path_imgrec=os.path.join(args.data_dir, "main_assist_val.rec"), preprocess_threads=20, label_width=1, resize=256, max_img_size=256, min_img_size=256, data_name='data', label_name='softmax_label', batch_size=args.batch_size, data_shape=(3, 224, 224), rand_crop=False, rand_mirror=False, num_parts=kv.num_workers, part_index=kv.rank) model = mx.model.FeedForward( ctx=devs, symbol=symbol, arg_params=arg_params, aux_params=aux_params, num_epoch=1000, begin_epoch=begin_epoch, learning_rate=args.lr, momentum=args.mom, wd=args.wd, optimizer='nag', # optimizer = 'sgd', initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), lr_scheduler=multi_factor_scheduler(begin_epoch, epoch_size, step=[10, 18, 25, 30, 40, 50], factor=0.1), ) model.fit( X=train, eval_data=val, eval_metric=['acc'], kvstore=kv, batch_end_callback=mx.callback.Speedometer(args.batch_size, args.frequent), epoch_end_callback=checkpoint)
if args.num_layers == 121: units = [6, 12, 24, 16] elif args.num_layers == 169: units = [6, 12, 32, 32] elif args.num_layers == 201: units = [6, 12, 48, 32] elif args.num_layers == 161: units = [6, 12, 36, 24] else: raise ValueError("no experiments done on detph {}, you can do it youself".format(args.num_layers)) sym = densenet.get_symbol(args.num_classes,args.num_block,units,args.growth_rate) dshape = (args.batch_size, 3, 224, 224) net_mem_planned, cost, threshold = search_plan(sym, data=dshape) old_cost = get_cost(sym, data=dshape) new_cost = get_cost(net_mem_planned, data=dshape) print('Old feature map cost=%d MB' % old_cost) print('New feature map cost=%d MB' % new_cost) (new_sym, new_args) = get_fine_tune_model(net_mem_planned, arg_params, args.num_classes, args.layer_before_fullc, args.dtype) # train fit.fit(args = args, network = new_sym, data_loader = data.get_rec_iter, arg_params = new_args, aux_params = aux_params)
batch_size = 64 seq_len = 1000 num_hidden = 1024 num_embed = 1024 input_size = 50 num_lstm_layer = 4 num_label = 5000 net = lstm_unroll(num_lstm_layer=num_lstm_layer, seq_len=seq_len, input_size=input_size, num_hidden=num_hidden, num_embed=num_embed, num_label=num_label, concat_decode=concat_decode, use_loss=use_loss) ishapes = get_input_shapes(net, batch_size=batch_size, num_hidden=num_hidden, input_size=input_size, seq_len=seq_len) net_mem_planned = memonger.search_plan(net, **ishapes) old_cost = memonger.get_cost(net, **ishapes) new_cost = memonger.get_cost(net_mem_planned, **ishapes) print('Old feature map cost=%d MB' % old_cost) print('New feature map cost=%d MB' % new_cost) # You can savely feed the net to the subsequent mxnet training script.