Ejemplo n.º 1
0
def get_model(dshape, checkpoint, name):
    if name == "res50":
        net = getResNet50Model()
        net = net.symbol
    elif name == "vgg":
        net, arg_params, aux_params = mx.model.load_checkpoint("vgg16", 0)
    elif name == "res152":
        net, arg_params, aux_params = mx.model.load_checkpoint("resnet-152", 0)
    else:
        print("Unsupport network type ", name)
        raise NotImplementedError

    old_cost = memonger.get_cost(net, data=dshape)
    print('Old feature map cost=%d MB' % old_cost)
    if checkpoint > 0:
        #  net = memonger.search_plan(net, data=dshape)
        plan_info = {}
        net = memonger.make_mirror_plan(net,
                                        checkpoint,
                                        plan_info,
                                        data=dshape)
        print(plan_info)
        new_cost = memonger.get_cost(net, data=dshape)
        print('New feature map cost=%d MB' % new_cost)
    mod = mx.mod.Module(symbol=net,
                        context=mx.cpu(),
                        data_names=['data'],
                        label_names=['softmax_label'])

    return mod
Ejemplo n.º 2
0
def get_symbol():  ##embedding = eval(config.net_name).get_symbol()
    """
    Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
    Original author Wei Wu
    """

    num_classes = config.emb_size
    num_layers = config.num_layers
    if num_layers >= 500:
        filter_list = [64, 256, 512, 1024, 2048]
        bottle_neck = True
    else:
        filter_list = [64, 64, 128, 256, 512]  #this is block out c
        bottle_neck = False
    num_stages = 4
    if num_layers == 18:
        units = [2, 2, 2, 2]
    elif num_layers == 34:
        units = [3, 4, 6, 3]
    elif num_layers == 49:
        units = [3, 4, 14, 3]
    elif num_layers == 50:
        units = [3, 4, 14, 3]
    elif num_layers == 74:
        units = [3, 6, 24, 3]
    elif num_layers == 90:
        units = [3, 8, 30, 3]
    elif num_layers == 98:
        units = [3, 4, 38, 3]
    elif num_layers == 99:
        units = [3, 8, 35, 3]
    elif num_layers == 100:
        units = [3, 13, 30, 3]
    elif num_layers == 124:
        units = [3, 13, 40, 5]
    elif num_layers == 160:
        units = [3, 24, 49, 3]
    elif num_layers == 101:
        units = [3, 4, 23, 3]
    elif num_layers == 152:
        units = [3, 8, 36, 3]
    elif num_layers == 200:
        units = [3, 24, 36, 3]
    elif num_layers == 269:
        units = [3, 30, 48, 8]
    else:
        raise ValueError(
            "no experiments done on num_layers {}, you can do it yourself".
            format(num_layers))

    net = resnet(
        units=units,
        num_stages=num_stages,  ##4
        filter_list=filter_list,  #[64, 64, 128, 256, 512]
        num_classes=num_classes,  #config.emb_size=512
        bottle_neck=bottle_neck)  #False

    if config.memonger:  #False
        dshape = (config.per_batch_size, config.image_shape[2],
                  config.image_shape[0], config.image_shape[1])
        net_mem_planned = memonger.search_plan(net, data=dshape)
        old_cost = memonger.get_cost(net, data=dshape)
        new_cost = memonger.get_cost(net_mem_planned, data=dshape)

        print('Old feature map cost=%d MB' % old_cost)
        print('New feature map cost=%d MB' % new_cost)
        net = net_mem_planned
    return net
Ejemplo n.º 3
0
def main():
    if args.data_type == "imagenet":
        args.num_classes = 1000
        if args.depth == 121:
            units = [6, 12, 24, 16]
        elif args.depth == 169:
            units = [6, 12, 32, 32]
        elif args.depth == 201:
            units = [6, 12, 48, 32]
        elif args.depth == 161:
            units = [6, 12, 36, 24]
        else:
            raise ValueError(
                "no experiments done on detph {}, you can do it youself".
                format(args.depth))
        symbol = DenseNet(
            units=units,
            num_stage=4,
            growth_rate=48 if args.depth == 161 else args.growth_rate,
            num_class=args.num_classes,
            data_type="imagenet",
            reduction=args.reduction,
            drop_out=args.drop_out,
            bottle_neck=True,
            bn_mom=args.bn_mom,
            workspace=args.workspace)
    elif args.data_type == "vggface":
        args.num_classes = 2613
        if args.depth == 121:
            units = [6, 12, 24, 16]
        elif args.depth == 169:
            units = [6, 12, 32, 32]
        elif args.depth == 201:
            units = [6, 12, 48, 32]
        elif args.depth == 161:
            units = [6, 12, 36, 24]
        else:
            raise ValueError(
                "no experiments done on detph {}, you can do it youself".
                format(args.depth))
        symbol = DenseNet(
            units=units,
            num_stage=4,
            growth_rate=48 if args.depth == 161 else args.growth_rate,
            num_class=args.num_classes,
            data_type="vggface",
            reduction=args.reduction,
            drop_out=args.drop_out,
            bottle_neck=True,
            bn_mom=args.bn_mom,
            workspace=args.workspace)
    elif args.data_type == "msface":
        args.num_classes = 79051
        if args.depth == 121:
            units = [6, 12, 24, 16]
        elif args.depth == 169:
            units = [6, 12, 32, 32]
        elif args.depth == 201:
            units = [6, 12, 48, 32]
        elif args.depth == 161:
            units = [6, 12, 36, 24]
        else:
            raise ValueError(
                "no experiments done on detph {}, you can do it youself".
                format(args.depth))
        symbol = DenseNet(
            units=units,
            num_stage=4,
            growth_rate=48 if args.depth == 161 else args.growth_rate,
            num_class=args.num_classes,
            data_type="msface",
            reduction=args.reduction,
            drop_out=args.drop_out,
            bottle_neck=True,
            bn_mom=args.bn_mom,
            workspace=args.workspace)
    elif args.data_type == 'cifar10':
        args.num_classes = 10
        N = (args.depth - 4) // 6
        units = [16, 16, 16]
        symbol = DenseNet(units, 3, 12, 10, 'cifar10')
    else:
        raise ValueError("do not support {} yet".format(args.data_type))
    kv = mx.kvstore.create(args.kv_store)
    devs = mx.cpu() if args.gpus is None else [
        mx.gpu(int(i)) for i in args.gpus.split(',')
    ]
    epoch_size = max(int(args.num_examples / args.batch_size / kv.num_workers),
                     1)
    begin_epoch = args.model_load_epoch if args.model_load_epoch else 0
    if not os.path.exists("./model"):
        os.mkdir("./model")
    model_prefix = "model/densenet-{}-{}-{}".format(args.data_type, args.depth,
                                                    kv.rank)
    checkpoint = mx.callback.do_checkpoint(model_prefix)
    arg_params = None
    aux_params = None
    if args.retrain:
        _, arg_params, aux_params = mx.model.load_checkpoint(
            model_prefix, args.model_load_epoch)

    # import pdb
    # pdb.set_trace()

    # print(symbol.debug_str())
    # mx.viz.plot_network(symbol)

    train = mx.io.ImageRecordIter(
        path_imgrec=os.path.join(args.data_dir, "train.rec")
        if args.data_type == 'cifar10' else
        os.path.join(args.data_dir, "train_256_q90.rec") if args.aug_level == 1
        else os.path.join(args.data_dir, "train_480_q90.rec"),
        label_width=1,
        data_name='data',
        label_name='softmax_label',
        data_shape=(3, 32, 32) if args.data_type == "cifar10" else
        (3, 224, 224),
        batch_size=args.batch_size,
        pad=4 if args.data_type == "cifar10" else 0,
        fill_value=127,  # only used when pad is valid
        rand_crop=True,
        max_random_scale=
        1.0,  # 480 with imagnet and vggface, 384 with msface, 32 with cifar10
        min_random_scale=1.0 if args.data_type == "cifar10" else 1.0 if
        args.aug_level == 1 else 0.667,  # 256.0/480.0=0.533, 256.0/384.0=0.667
        max_aspect_ratio=0
        if args.data_type == "cifar10" else 0 if args.aug_level == 1 else 0.25,
        random_h=0 if args.data_type == "cifar10" else
        0 if args.aug_level == 1 else 36,  # 0.4*90
        random_s=0 if args.data_type == "cifar10" else
        0 if args.aug_level == 1 else 50,  # 0.4*127
        random_l=0 if args.data_type == "cifar10" else
        0 if args.aug_level == 1 else 50,  # 0.4*127
        max_rotate_angle=0 if args.aug_level <= 2 else 10,
        max_shear_ratio=0 if args.aug_level <= 2 else 0.1,
        rand_mirror=True,
        shuffle=True,
        num_parts=kv.num_workers,
        part_index=kv.rank)
    val = mx.io.ImageRecordIter(
        path_imgrec=os.path.join(args.data_dir, "val.rec") if args.data_type
        == 'cifar10' else os.path.join(args.data_dir, "val_256_q90.rec"),
        label_width=1,
        data_name='data',
        label_name='softmax_label',
        batch_size=args.batch_size,
        data_shape=(3, 32, 32) if args.data_type == "cifar10" else
        (3, 224, 224),
        rand_crop=False,
        rand_mirror=False,
        num_parts=kv.num_workers,
        part_index=kv.rank)

    dshape = (64, 3, 32, 32)
    net_planned = memonger.search_plan(symbol, 1, data=dshape)

    c = get_cost(symbol, data=dshape)
    print('cost %d MB' % c)
    # dshape = (64, 3, 32, 32)
    # old_cost = memonger.get_cost(symbol, data=dshape)
    new_cost = memonger.get_cost(net_planned, data=dshape)

    # print('Old feature map cost=%d MB' % old_cost)
    print('New feature map cost=%d MB' % new_cost)

    model = mx.model.FeedForward(
        # net_planned,
        ctx=devs,
        symbol=net_planned,
        arg_params=arg_params,
        aux_params=aux_params,
        num_epoch=300 if args.data_type == "cifar10" else 125,
        begin_epoch=begin_epoch,
        learning_rate=args.lr,
        momentum=args.mom,
        wd=args.wd,
        optimizer='sgd',
        initializer=mx.init.Xavier(rnd_type='gaussian',
                                   factor_type="in",
                                   magnitude=2),
        lr_scheduler=multi_factor_scheduler(
            begin_epoch, epoch_size, step=[220, 260, 280], factor=0.1)
        if args.data_type == 'cifar10' else multi_factor_scheduler(
            begin_epoch,
            epoch_size,
            step=[30, 60, 90, 95, 115, 120],
            factor=0.1),
    )

    # import pdb
    # pdb.set_trace()

    model.fit(X=train,
              eval_data=val,
              eval_metric=['acc'] if args.data_type == 'cifar10' else
              ['acc', mx.metric.create('top_k_accuracy', top_k=5)],
              kvstore=kv,
              batch_end_callback=mx.callback.Speedometer(
                  args.batch_size, args.frequent),
              epoch_end_callback=checkpoint)
Ejemplo n.º 4
0
    layers : list of stage configuratrion
    """
    assert(len(layers) == 4)
    base_filter = 64
    data = mx.sym.Variable(name='data')
    conv1 = ConvModule(data, base_filter, kernel=(7, 7), pad=(3, 3), stride=(2, 2))
    mp1 = mx.sym.Pooling(data=conv1, pool_type="max", kernel=(3, 3), stride=(2, 2))
    sym = mp1
    for j in range(len(layers)):
        for i in range(layers[j]):
            sym = ResModule(sym, base_filter, j, i)

    avg = mx.symbol.Pooling(data=sym, kernel=(7, 7), stride=(1, 1), name="global_pool", pool_type='avg')
    flatten = mx.symbol.Flatten(data=avg, name='flatten')
    fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=1000, name='fc1')
    net = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
    return net


layers = [3, 24, 36, 3]
batch_size = 32
net = get_symbol(layers)
dshape = (batch_size, 3, 224, 224)
net_mem_planned = memonger.search_plan(net, data=dshape)
old_cost = memonger.get_cost(net, data=dshape)
new_cost = memonger.get_cost(net_mem_planned, data=dshape)

print('Old feature map cost=%d MB' % old_cost)
print('New feature map cost=%d MB' % new_cost)
# You can savely feed the net to the subsequent mxnet training script.
Ejemplo n.º 5
0
                         pool_type="max",
                         kernel=(3, 3),
                         stride=(2, 2))
    sym = mp1
    for j in range(len(layers)):
        for i in range(layers[j]):
            sym = ResModule(sym, base_filter, j, i)

    avg = mx.symbol.Pooling(data=sym,
                            kernel=(7, 7),
                            stride=(1, 1),
                            name="global_pool",
                            pool_type='avg')
    flatten = mx.symbol.Flatten(data=avg, name='flatten')
    fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=1000, name='fc1')
    net = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
    return net


layers = [3, 24, 36, 3]
batch_size = 32
net = get_symbol(layers)
dshape = (batch_size, 3, 224, 224)
net_mem_planned = memonger.search_plan(net, data=dshape)
old_cost = memonger.get_cost(net, data=dshape)
new_cost = memonger.get_cost(net_mem_planned, data=dshape)

print('Old feature map cost=%d MB' % old_cost)
print('New feature map cost=%d MB' % new_cost)
# You can savely feed the net to the subsequent mxnet training script.
Ejemplo n.º 6
0
seq_len = 1000
num_hidden = 1024
num_embed = 1024
input_size = 50
num_lstm_layer = 4
num_label = 5000

net = lstm_unroll(
    num_lstm_layer=num_lstm_layer,
    seq_len=seq_len, input_size=input_size,
    num_hidden=num_hidden, num_embed=num_embed,
    num_label=num_label,
    concat_decode=concat_decode,
    use_loss=use_loss)

ishapes = get_input_shapes(net,
                           batch_size=batch_size,
                           num_hidden=num_hidden,
                           input_size=input_size,
                           seq_len=seq_len)

net_mem_planned = memonger.search_plan(net, **ishapes)
old_cost = memonger.get_cost(net, **ishapes)
new_cost = memonger.get_cost(net_mem_planned, **ishapes)

print('Old feature map cost=%d MB' % old_cost)
print('New feature map cost=%d MB' % new_cost)
# You can savely feed the net to the subsequent mxnet training script.


Ejemplo n.º 7
0
def get_symbol():
    """
    Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
    Original author Wei Wu
    """
    num_classes = config.emb_size
    num_layers = config.num_layers
    if num_layers >= 500:
        filter_list = [64, 256, 512, 1024, 2048]
        bottle_neck = True
    else:
        filter_list = [64, 64, 128, 256, 512]
        bottle_neck = False
    num_stages = 4
    if num_layers == 18:
        units = [2, 2, 2, 2]
    elif num_layers == 34:
        units = [3, 4, 6, 3]
    elif num_layers == 49:
        units = [3, 4, 14, 3]
    elif num_layers == 50:
        units = [3, 4, 14, 3]
    elif num_layers == 74:
        units = [3, 6, 24, 3]
    elif num_layers == 90:
        units = [3, 8, 30, 3]
    elif num_layers == 98:
        units = [3, 4, 38, 3]
    elif num_layers == 99:
        units = [3, 8, 35, 3]
    elif num_layers == 100:
        units = [3, 13, 30, 3]
    elif num_layers == 124:
        units = [3, 13, 40, 5]
    elif num_layers == 160:
        units = [3, 24, 49, 3]
    elif num_layers == 101:
        units = [3, 4, 23, 3]
    elif num_layers == 152:
        units = [3, 8, 36, 3]
    elif num_layers == 200:
        units = [3, 24, 36, 3]
    elif num_layers == 269:
        units = [3, 30, 48, 8]
    else:
        raise ValueError(
            "no experiments done on num_layers {}, you can do it yourself".
            format(num_layers))

    net = resnet(units=units,
                 num_stages=num_stages,
                 filter_list=filter_list,
                 num_classes=num_classes,
                 bottle_neck=bottle_neck)

    # plot network architecture
    digraph = mx.viz.plot_network(net,
                                  shape={'data': (1, 3, 224, 224)},
                                  save_format='pdf',
                                  node_attrs={
                                      "shape": "oval",
                                      "fixedsize": "false"
                                  })
    digraph.render(filename='fresnet{}_unit{}_se{}'.format(
        num_layers, config.net_unit, config.net_se))

    if config.memonger:
        dshape = (config.per_batch_size, config.image_shape[2],
                  config.image_shape[0], config.image_shape[1])
        net_mem_planned = memonger.search_plan(net, data=dshape)
        old_cost = memonger.get_cost(net, data=dshape)
        new_cost = memonger.get_cost(net_mem_planned, data=dshape)

        print('Old feature map cost=%d MB' % old_cost)
        print('New feature map cost=%d MB' % new_cost)
        net = net_mem_planned
    return net
Ejemplo n.º 8
0
def predictNetMem(mod):
    act_mem = memonger.get_cost(mod.symbol, data=dshape)
    return act_mem * 1.0 / 1024 + C
Ejemplo n.º 9
0
    
    if args.num_layers == 121:
        units = [6, 12, 24, 16]
    elif args.num_layers == 169:
        units = [6, 12, 32, 32]
    elif args.num_layers == 201:
        units = [6, 12, 48, 32]
    elif args.num_layers == 161:
        units = [6, 12, 36, 24]
    else:
        raise ValueError("no experiments done on detph {}, you can do it youself".format(args.num_layers))
    

    sym = densenet.get_symbol(args.num_classes,args.num_block,units,args.growth_rate)
    dshape = (args.batch_size, 3, 224, 224)
    net_mem_planned, cost, threshold = search_plan(sym, data=dshape)
    old_cost = get_cost(sym, data=dshape)
    new_cost = get_cost(net_mem_planned, data=dshape)
    print('Old feature map cost=%d MB' % old_cost)
    print('New feature map cost=%d MB' % new_cost)
    
    (new_sym, new_args) = get_fine_tune_model(net_mem_planned, arg_params, args.num_classes,
                                              args.layer_before_fullc, args.dtype)

    # train
    fit.fit(args        = args,
            network     = new_sym,
            data_loader = data.get_rec_iter,
            arg_params  = new_args,
            aux_params  = aux_params)
Ejemplo n.º 10
0
batch_size = 64
seq_len = 1000
num_hidden = 1024
num_embed = 1024
input_size = 50
num_lstm_layer = 4
num_label = 5000

net = lstm_unroll(num_lstm_layer=num_lstm_layer,
                  seq_len=seq_len,
                  input_size=input_size,
                  num_hidden=num_hidden,
                  num_embed=num_embed,
                  num_label=num_label,
                  concat_decode=concat_decode,
                  use_loss=use_loss)

ishapes = get_input_shapes(net,
                           batch_size=batch_size,
                           num_hidden=num_hidden,
                           input_size=input_size,
                           seq_len=seq_len)

net_mem_planned = memonger.search_plan(net, **ishapes)
old_cost = memonger.get_cost(net, **ishapes)
new_cost = memonger.get_cost(net_mem_planned, **ishapes)

print('Old feature map cost=%d MB' % old_cost)
print('New feature map cost=%d MB' % new_cost)
# You can savely feed the net to the subsequent mxnet training script.