Exemple #1
0
	def run(self):
		data_validate = mx.io.CSVIter(data_csv="../validate-64x64-data.csv", data_shape=(30, 64, 64), batch_size=1)
		network = get_lenet()
		batch_size = 32
		devs = [mx.cpu(0), mx.cpu(0), mx.cpu(0), mx.cpu(0)] #..distribute to multiple cores
		data_train = mx.io.CSVIter(data_csv=self.input()['data'].path, data_shape=(30, 64, 64),
				label_csv=self.input()['label'].path, label_shape=(600,), batch_size=batch_size)


		print "\n%d epochs\n" % self.tune_epoch()
		model = mx.model.FeedForward(ctx=devs,
				symbol             = network,
				num_epoch          = self.tune_epoch(),
				learning_rate      = 0.001,
				wd                 = 0.00001,
				momentum           = 0.9)

		model.fit(X=data_train, eval_metric = mx.metric.np(CRPS))
		prob = model.predict(data_validate)
		prob_fname = "%s_prob" % self.name
		try:
			np.save(prob_fname, prob)
		except:
			pickle.dump(prob, open(prob_fname + '.p', 'wb'))

		pickle.dump(model, open(self.output().path, 'wb'))
def test_convolution_with_type():
    np.random.seed(1234)
    sym1 = mx.sym.Convolution(num_filter=3, kernel=(3,3), name='conv')

    data = mx.sym.Variable('conv_data')
    w = mx.sym.Variable('conv_weight')
    b = mx.sym.Variable('conv_bias')
    w = mx.sym.transpose(w, axes=(0,2,3,1))
    sym2 = mx.sym.transpose(data, axes=(0,2,3,1))
    sym2 = mx.sym.Convolution(sym2, w, b, layout='NHWC', num_filter=3, kernel=(3,3))
    sym2 = mx.sym.transpose(sym2, axes=(0,3,1,2), name='conv')

    sym = [sym1, sym1, sym1, sym1, sym1, sym2, sym2]
    ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float64}},
                {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float32}},
                {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float16}},
                {'ctx': mx.cpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float64}},
                {'ctx': mx.cpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float32}},
                # NHWC
                {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'conv_weight': (3, 2, 3, 3),
                 'type_dict': {'conv_data': np.float32, 'conv_weight': np.float32}},
                {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'conv_weight': (3, 2, 3, 3),
                 'type_dict': {'conv_data': np.float16, 'conv_weight': np.float16}}
                ]
    # wider tolerance needed for true-fp16 NCHW test above
    tol = {np.dtype(np.float16): 0.5,
               np.dtype(np.float32): 1e-3,
               np.dtype(np.float64): 1e-5,
               np.dtype(np.uint8): 0,
               np.dtype(np.int32): 0}
    check_consistency(sym, ctx_list, tol=tol)
    # test ability to turn off training on bias
    check_consistency(sym, ctx_list, grad_req={'conv_data': 'write', 'conv_weight': 'write', 'conv_bias': 'null'}, tol=tol)
def test_elementwisesum_with_type():
    sym = mx.sym.ElementWiseSum(name="ews", num_args=2)
    ctx_list = [
        {
            "ctx": mx.gpu(0),
            "ews_arg1": (2, 10),
            "ews_arg0": (2, 10),
            "type_dict": {"ews_arg0": np.float64, "ews_arg1": np.float64},
        },
        {
            "ctx": mx.gpu(0),
            "ews_arg1": (2, 10),
            "ews_arg0": (2, 10),
            "type_dict": {"ews_arg0": np.float32, "ews_arg1": np.float32},
        },
        {
            "ctx": mx.gpu(0),
            "ews_arg1": (2, 10),
            "ews_arg0": (2, 10),
            "type_dict": {"ews_arg0": np.float16, "ews_arg1": np.float16},
        },
        {
            "ctx": mx.cpu(0),
            "ews_arg1": (2, 10),
            "ews_arg0": (2, 10),
            "type_dict": {"ews_arg0": np.float64, "ews_arg1": np.float64},
        },
        {
            "ctx": mx.cpu(0),
            "ews_arg1": (2, 10),
            "ews_arg0": (2, 10),
            "type_dict": {"ews_arg0": np.float32, "ews_arg1": np.float32},
        },
    ]
    check_consistency(sym, ctx_list)
Exemple #4
0
def test_load_000800():
    with mx.AttrScope(ctx_group='stage1'):
        data = mx.symbol.Variable('data', lr_mult=0.2)
        weight = mx.sym.Variable(name='fc1_weight', lr_mult=1.2)
        fc1  = mx.symbol.FullyConnected(data = data, weight=weight, name='fc1', num_hidden=128, wd_mult=0.3)
        act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")

    set_stage1 = set(act1.list_arguments())
    with mx.AttrScope(ctx_group='stage2'):
        fc2  = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64, lr_mult=0.01)
        act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
        fc3  = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10)
        fc3 = mx.symbol.BatchNorm(fc3, name='batchnorm0')
        sym1  = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax')

    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
    sym2 = mx.sym.load(os.path.join(curr_path, 'save_000800.json'))

    attr1 = sym1.attr_dict()
    attr2 = sym2.attr_dict()
    for k, v1 in attr1.items():
        assert k in attr2, k
        v2 = attr2[k]
        for kk, vv1 in v1.items():
            if kk.startswith('__') and kk.endswith('__'):
                assert kk in v2 and v2[kk] == vv1, k + str(v1) + str(v2)

    check_symbol_consistency(sym1, sym2,
        {'ctx': mx.cpu(0), 'group2ctx': {'stage1' : mx.cpu(1), 'stage2' : mx.cpu(2)}, 'data': (1,200)})
def test_activation_with_type():
    sym = mx.sym.Activation(name='act', act_type='sigmoid')
    ctx_list = [{'ctx': mx.gpu(0), 'act_data': (2, 2, 10, 10), 'type_dict': {'act_data': np.float64}},
                {'ctx': mx.gpu(0), 'act_data': (2, 2, 10, 10), 'type_dict': {'act_data': np.float32}},
                {'ctx': mx.cpu(0), 'act_data': (2, 2, 10, 10), 'type_dict': {'act_data': np.float64}},
                {'ctx': mx.cpu(0), 'act_data': (2, 2, 10, 10), 'type_dict': {'act_data': np.float32}}]
    check_consistency(sym, ctx_list)
def test_convolution_with_type():
    sym = mx.sym.Convolution(num_filter=3, kernel=(3,3), name='conv')
    ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float64}},
                {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float32}},
                {'ctx': mx.cpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float64}},
                {'ctx': mx.cpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float32}}]
    check_consistency(sym, ctx_list)
def test_bucket_module_ctx_group():
    num_hidden = 10
    batch_size = 5
    def sym_gen(seq_len):
        with mx.AttrScope(ctx_group='dev1'):
            data = mx.symbol.Variable('data')
            weight = mx.symbol.Variable('dev1_weight')
            bias = mx.symbol.Variable('dev1_bias')
            fc = data
            for i in range(seq_len):
                fc  = mx.symbol.FullyConnected(data=fc, weight=weight, bias=bias,
                                               name='dev1_fc_%d' % i, num_hidden=num_hidden)
        with mx.AttrScope(ctx_group='dev2'):
            label = mx.symbol.Variable('label')
            weight = mx.symbol.Variable('dev2_weight')
            bias = mx.symbol.Variable('dev2_bias')
            for i in range(seq_len):
                fc  = mx.symbol.FullyConnected(data=fc, weight=weight, bias=bias,
                                               name='dev2_fc_%d' % i, num_hidden=num_hidden)
            sym = mx.symbol.SoftmaxOutput(fc, label, name='softmax')

        return sym, ('data',), ('label',)

    mod = mx.mod.BucketingModule(sym_gen=sym_gen, default_bucket_key=10, context=[mx.cpu(0)],
                                 group2ctxs=[{'dev1': mx.cpu(1), 'dev2': mx.cpu(2)}])
    mod.bind(data_shapes=[['data', (batch_size, num_hidden)]],
             label_shapes=[['label', (batch_size,)]],
             for_training=True, inputs_need_grad=True)
    assert(mod.binded)
def test_save_load():
    net = mx.gluon.model_zoo.vision.get_resnet(1, 18, pretrained=True)
    net.save_parameters('test_save_load.params')

    net = mx.gluon.model_zoo.vision.get_resnet(1, 18)
    net.output = mx.gluon.nn.Dense(1000)

    net.load_parameters('test_save_load.params')

    class Network(gluon.Block):
        def __init__(self, **kwargs):
            super(Network, self).__init__(**kwargs)
            with self.name_scope():
                self.encoders = gluon.nn.Sequential()
                with self.encoders.name_scope():
                    for _ in range(2):
                        lstm = mx.gluon.rnn.LSTM(200, 1, bidirectional=True)
                        self.encoders.add(lstm)

        def forward(self, x):
            for i in range(2):
                x = self.encoders[i](x)
            return x
    net = Network()
    net.initialize(mx.init.Xavier(), ctx=mx.cpu())
    net.hybridize()
    x = np.random.rand(32, 10, 10)
    x = mx.nd.array(x).as_in_context(mx.cpu())
    net(x)
    net.save_parameters('tmp.params')
    net2 = Network()
    net2.load_parameters('tmp.params')
def test_module_states():
    stack = mx.rnn.SequentialRNNCell()
    for i in range(2):
        stack.add(mx.rnn.LSTMCell(num_hidden=20, prefix='lstm_l%d_'%i))
    begin_state = stack.begin_state(func=mx.sym.Variable)
    _, states = stack.unroll(10, begin_state=begin_state, inputs=mx.sym.Variable('data'))

    state_names = [i.name for i in begin_state]
    mod = mx.mod.Module(mx.sym.Group(states), context=[mx.cpu(0), mx.cpu(1)],
                        label_names=None, state_names=state_names)
    mod.bind(data_shapes=[('data', (5, 10))], label_shapes=None, for_training=False)
    mod.init_params()
    batch = mx.io.DataBatch(data=[mx.nd.zeros((5, 10))], label=[])

    mod.set_states(value=1)
    mod.forward(batch)
    out = mod.get_outputs(merge_multi_context=False)
    out1 = mod.get_outputs(merge_multi_context=True)

    mod.set_states(states=out)
    mod.forward(batch)
    out2 = mod.get_outputs(merge_multi_context=True)

    for x1, x2 in zip(out1, out2):
        assert not mx.test_utils.almost_equal(x1.asnumpy(), x2.asnumpy(), rtol=1e-3)
def test_module_reshape():
    data = mx.sym.Variable('data')
    sym = mx.sym.FullyConnected(data, num_hidden=20, name='fc')

    dshape = (7, 20)
    mod = mx.mod.Module(sym, ('data',), None, context=[mx.cpu(0), mx.cpu(1)])
    mod.bind(data_shapes=[('data', dshape)])
    mod.init_params()
    mod.init_optimizer(optimizer_params={'learning_rate': 1})

    mod.forward(mx.io.DataBatch(data=[mx.nd.ones(dshape)],
                                label=None))
    mod.backward([mx.nd.ones(dshape)])
    mod.update()
    assert mod.get_outputs()[0].shape == dshape
    assert (mod.get_params()[0]['fc_bias'].asnumpy() == -1).all()

    dshape = (14, 20)
    mod.reshape(data_shapes=[('data', dshape)])
    mod.forward(mx.io.DataBatch(data=[mx.nd.ones(dshape)],
                                label=None))
    mod.backward([mx.nd.ones(dshape)])
    mod.update()
    assert mod.get_outputs()[0].shape == dshape
    assert (mod.get_params()[0]['fc_bias'].asnumpy() == -3).all()
Exemple #11
0
def test_convolution_grouping():
    num_filter = 4
    num_group = 2
    kernel = (3, 3)
    shape = (1, 4, 9, 9)

    x = mx.sym.Variable('x')
    w = mx.sym.Variable('w')
    b = mx.sym.Variable('b')
    y1 = mx.sym.Convolution(data=x, weight=w, bias=b, num_filter=num_filter, num_group=num_group, kernel=kernel)
    xslice = mx.sym.SliceChannel(data=x, num_outputs=num_group, axis=1)
    wslice = mx.sym.SliceChannel(data=w, num_outputs=num_group, axis=0)
    bslice = mx.sym.SliceChannel(data=b, num_outputs=num_group, axis=0)
    y2 = mx.sym.Concat(*[mx.sym.Convolution(data=xslice[i], weight=wslice[i], bias=bslice[i],
                                            num_filter=num_filter//num_group, kernel=kernel)
                       for i in range(num_group)])

    exe1 = y1.simple_bind(mx.cpu(), x=shape)
    exe2 = y2.simple_bind(mx.cpu(), x=shape, w=(num_filter, shape[1]//num_group, kernel[0], kernel[1]), b=(num_filter,))
    for arr1, arr2 in zip(exe1.arg_arrays, exe2.arg_arrays):
        arr1[:] = np.random.normal(size=arr1.shape)
        arr2[:] = arr1
    exe1.forward(is_train=True)
    exe1.backward(exe1.outputs[0])
    exe2.forward(is_train=True)
    exe2.backward(exe2.outputs[0])

    for arr1, arr2 in zip(exe1.outputs + exe1.grad_arrays, exe2.outputs + exe2.grad_arrays):
        np.testing.assert_allclose(arr1.asnumpy(), arr2.asnumpy(), rtol=1e-3)
Exemple #12
0
    def __init__(self,
                 seq_len,
                 input_size,
                 num_hidden,
                 num_embed,
                 num_label,
                 arg_params,
                 ctx=mx.cpu(),
                 dropout=0.):
        self.sym = bi_lstm_inference_symbol(input_size, seq_len,
                                            num_hidden,
                                            num_embed,
                                            num_label,
                                            dropout)
        batch_size = 1
        init_c = [('l%d_init_c'%l, (batch_size, num_hidden)) for l in range(2)]
        init_h = [('l%d_init_h'%l, (batch_size, num_hidden)) for l in range(2)]
        
        data_shape = [("data", (batch_size, seq_len, ))]

        input_shapes = dict(init_c + init_h + data_shape)
        self.executor = self.sym.simple_bind(ctx=mx.cpu(), **input_shapes)

        for key in self.executor.arg_dict.keys():
            if key in arg_params:
                arg_params[key].copyto(self.executor.arg_dict[key])

        state_name = []
        for i in range(2):
            state_name.append("l%d_init_c" % i)
            state_name.append("l%d_init_h" % i)

        self.states_dict = dict(zip(state_name, self.executor.outputs[1:]))
        self.input_arr = mx.nd.zeros(data_shape[0][1])
Exemple #13
0
def test_paramdict():
    params = gluon.ParameterDict('net_')
    params.get('weight', shape=(10, 10))
    assert list(params.keys()) == ['net_weight']
    params.initialize(ctx=mx.cpu())
    params.save('test.params')
    params.load('test.params', mx.cpu())
Exemple #14
0
def validate(val_data, val_dataset, net, ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]

    val_metric.reset()

    from tqdm import tqdm
    for batch in tqdm(val_data):
        data, scale, center, score, imgid = val_batch_fn(batch, ctx)

        outputs = [net(X) for X in data]
        if opt.flip_test:
            data_flip = [nd.flip(X, axis=3) for X in data]
            outputs_flip = [net(X) for X in data_flip]
            outputs_flipback = [flip_heatmap(o, val_dataset.joint_pairs, shift=True) for o in outputs_flip]
            outputs = [(o + o_flip)/2 for o, o_flip in zip(outputs, outputs_flipback)]

        if len(outputs) > 1:
            outputs_stack = nd.concat(*[o.as_in_context(mx.cpu()) for o in outputs], dim=0)
        else:
            outputs_stack = outputs[0].as_in_context(mx.cpu())

        preds, maxvals = get_final_preds(outputs_stack, center.asnumpy(), scale.asnumpy())
        val_metric.update(preds, maxvals, score, imgid)

    res = val_metric.get()
    return
def run_synthetic_SGLD():
    theta1 = 0
    theta2 = 1
    sigma1 = numpy.sqrt(10)
    sigma2 = 1
    sigmax = numpy.sqrt(2)
    X = load_synthetic(theta1=theta1, theta2=theta2, sigmax=sigmax, num=100)
    minibatch_size = 1
    total_iter_num = 1000000
    lr_scheduler = SGLDScheduler(begin_rate=0.01, end_rate=0.0001, total_iter_num=total_iter_num,
                                 factor=0.55)
    optimizer = mx.optimizer.create('sgld',
                                    learning_rate=None,
                                    rescale_grad=1.0,
                                    lr_scheduler=lr_scheduler,
                                    wd=0)
    updater = mx.optimizer.get_updater(optimizer)
    theta = mx.random.normal(0, 1, (2,), mx.cpu())
    grad = nd.empty((2,), mx.cpu())
    samples = numpy.zeros((2, total_iter_num))
    start = time.time()
    for i in xrange(total_iter_num):
        if (i + 1) % 100000 == 0:
            end = time.time()
            print("Iter:%d, Time spent: %f" % (i + 1, end - start))
            start = time.time()
        ind = numpy.random.randint(0, X.shape[0])
        synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad=
        X.shape[0] / float(minibatch_size), grad=grad)
        updater('theta', grad, theta)
        samples[:, i] = theta.asnumpy()
    plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet)
    plt.colorbar()
    plt.show()
def test_parameter_sharing():
    class Net(gluon.Block):
        def __init__(self, in_units=0, **kwargs):
            super(Net, self).__init__(**kwargs)
            with self.name_scope():
                self.dense0 = nn.Dense(5, in_units=in_units)
                self.dense1 = nn.Dense(5, in_units=in_units)

        def forward(self, x):
            return self.dense1(self.dense0(x))

    net1 = Net(prefix='net1_', in_units=5)
    net2 = Net(prefix='net2_', params=net1.collect_params())
    net1.collect_params().initialize()
    net2(mx.nd.zeros((3, 5)))

    net1.save_parameters('net1.params')

    net3 = Net(prefix='net3_')
    net3.load_parameters('net1.params', mx.cpu())

    net4 = Net(prefix='net4_')
    net5 = Net(prefix='net5_', in_units=5, params=net4.collect_params())
    net4.collect_params().initialize()
    net5(mx.nd.zeros((3, 5)))

    net4.save_parameters('net4.params')

    net6 = Net(prefix='net6_')
    net6.load_parameters('net4.params', mx.cpu())
Exemple #17
0
 def check_trainer_reset_kv(kv):
     params = gluon.ParameterDict()
     x = params.get('x', shape=(10,), lr_mult=1.0)
     params.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
     trainer = gluon.Trainer(params, 'sgd', {'learning_rate': 0.1}, kvstore=kv)
     params.save('test_trainer_reset_kv.params')
     with mx.autograd.record():
         for w in x.list_data():
             y = w + 1
             y.backward()
     trainer.step(1)
     assert trainer._kvstore.type == kv
     # load would reset kvstore
     mx.nd.waitall()
     params.load('test_trainer_reset_kv.params')
     if trainer._update_on_kvstore:
         # drop kvstore state if new parameters are loaded
         assert trainer._kvstore is None
         assert trainer._kv_initialized is False
     with mx.autograd.record():
         for w in x.list_data():
             y = w + 1
             y.backward()
     trainer.step(1)
     # the updated parameter should be based on the loaded checkpoint
     assert (x.data(mx.cpu()) == -0.2).asnumpy().all()
Exemple #18
0
def test_module_ctx_group():
    with mx.AttrScope(ctx_group='dev1'):
        a = mx.symbol.Variable('a')
        a = a * 2
    with mx.AttrScope(ctx_group='dev2'):
        b = mx.symbol.Variable('b')
        c = a + b
    shape = (2, 5)
    mod1 = mx.mod.Module(c, context=[mx.cpu(0)], data_names=['a', 'b'], label_names=None,
                         group2ctxs=[{'dev1':mx.cpu(1),'dev2':mx.cpu(2)}])
    mod1.bind(data_shapes=[['a', shape], ['b', shape]], inputs_need_grad=True)
    mod1.init_params()
    mod1.forward(data_batch=mx.io.DataBatch(data=[mx.nd.ones(shape), mx.nd.ones(shape)]), is_train=True)
    mod1.backward([mx.nd.ones(shape)])
    mod1_input_grads = mod1.get_input_grads()

    mod2 = mx.mod.Module(c, data_names=['a', 'b'], label_names=None)
    mod2.bind(data_shapes=[['a', shape], ['b', shape]], inputs_need_grad=True)
    mod2.init_params()
    mod2.forward(data_batch=mx.io.DataBatch(data=[mx.nd.ones(shape), mx.nd.ones(shape)]), is_train=True)
    mod2.backward([mx.nd.ones(shape)])
    mod2_input_grads = mod2.get_input_grads()

    assert np.all(mod1_input_grads[0].asnumpy() == mod2_input_grads[0].asnumpy())
    assert np.all(mod1_input_grads[1].asnumpy() == mod2_input_grads[1].asnumpy())
def test_concat_with_type():
    sym = mx.sym.Concat(name="concat", num_args=2)
    ctx_list = [
        {
            "ctx": mx.gpu(0),
            "concat_arg1": (2, 10),
            "concat_arg0": (2, 10),
            "type_dict": {"concat_arg0": np.float64, "concat_arg1": np.float64},
        },
        {
            "ctx": mx.gpu(0),
            "concat_arg1": (2, 10),
            "concat_arg0": (2, 10),
            "type_dict": {"concat_arg0": np.float32, "concat_arg1": np.float32},
        },
        {
            "ctx": mx.gpu(0),
            "concat_arg1": (2, 10),
            "concat_arg0": (2, 10),
            "type_dict": {"concat_arg0": np.float16, "concat_arg1": np.float16},
        },
        {
            "ctx": mx.cpu(0),
            "concat_arg1": (2, 10),
            "concat_arg0": (2, 10),
            "type_dict": {"concat_arg0": np.float64, "concat_arg1": np.float64},
        },
        {
            "ctx": mx.cpu(0),
            "concat_arg1": (2, 10),
            "concat_arg0": (2, 10),
            "type_dict": {"concat_arg0": np.float32, "concat_arg1": np.float32},
        },
    ]
    check_consistency(sym, ctx_list)
def test_fullyconnected_with_type():
    sym = mx.sym.FullyConnected(num_hidden=3, name='inner')
    ctx_list = [{'ctx': mx.gpu(0), 'inner_data': (2, 10), 'type_dict': {'inner_data': np.float64}},
                {'ctx': mx.gpu(0), 'inner_data': (2, 10), 'type_dict': {'inner_data': np.float32}},
                {'ctx': mx.cpu(0), 'inner_data': (2, 10), 'type_dict': {'inner_data': np.float64}},
                {'ctx': mx.cpu(0), 'inner_data': (2, 10), 'type_dict': {'inner_data': np.float32}}]
    check_consistency(sym, ctx_list)
def test_ctx_group():
    with mx.AttrScope(ctx_group='stage1'):
        data = mx.symbol.Variable('data')
        fc1  = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
        act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")

    set_stage1 = set(act1.list_arguments())
    with mx.AttrScope(ctx_group='stage2'):
        fc2  = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
        act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
        fc3  = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10)
        fc3 = mx.symbol.BatchNorm(fc3)
        mlp  = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax')

    set_stage2 = set(mlp.list_arguments()) - set_stage1

    group2ctx = {
        'stage1' : mx.cpu(1),
        'stage2' : mx.cpu(2)
    }

    texec = mlp.simple_bind(mx.cpu(0),
                            group2ctx=group2ctx,
                            data=(1,200))

    for arr, name in zip(texec.arg_arrays, mlp.list_arguments()):
        if name in set_stage1:
            assert arr.context == group2ctx['stage1']
        else:
            assert arr.context == group2ctx['stage2']
Exemple #22
0
def test_parameter():
    p = gluon.Parameter('weight', shape=(10, 10))
    p.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)])
    assert len(p.list_data()) == 2
    assert len(p.list_grad()) == 2
    assert p.data(mx.cpu(1)).context == mx.cpu(1)
    assert p.data(mx.cpu(0)).shape == (10, 10)
    assert p.var().name == 'weight'
Exemple #23
0
def test_upsampling_with_type():
    sym = mx.sym.UpSampling(scale=2, num_filter=2, name='up', sample_type = 'nearest', num_args=1)
    ctx_list = [{'ctx': mx.gpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float64}},
                {'ctx': mx.gpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float32}},
                {'ctx': mx.gpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float16}},
                {'ctx': mx.cpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float64}},
                {'ctx': mx.cpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float32}}]
    check_consistency(sym, ctx_list)
Exemple #24
0
def test_reshape_with_type():
    sym = mx.sym.Reshape(name='reshape', shape=(-1,1,1,0))
    ctx_list = [{'ctx': mx.gpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float64}},
                {'ctx': mx.gpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float32}},
                {'ctx': mx.gpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float16}},
                {'ctx': mx.cpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float64}},
                {'ctx': mx.cpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float32}}]
    check_consistency(sym, ctx_list)
Exemple #25
0
def test_blockgrad_with_type():
    sym = mx.sym.BlockGrad(name='bg')
    ctx_list = [{'ctx': mx.gpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float64}},
                {'ctx': mx.gpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float32}},
                {'ctx': mx.gpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float16}},
                {'ctx': mx.cpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float64}},
                {'ctx': mx.cpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float32}}]
    check_consistency(sym, ctx_list)
Exemple #26
0
def test_swapaxis_with_type():
    sym = mx.sym.SwapAxis(name='swap', dim1=1)
    ctx_list = [{'ctx': mx.gpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float64}},
                {'ctx': mx.gpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float32}},
                {'ctx': mx.gpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float16}},
                {'ctx': mx.cpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float64}},
                {'ctx': mx.cpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float32}}]
    check_consistency(sym, ctx_list)
Exemple #27
0
 def test_wrapper(*args, **kwargs):
     try:
         a = mx.nd.zeros((1,), ctx=mx.cpu(cpu_id))
         ctx = mx.cpu(cpu_id)
     except Exception:
         ctx = mx.cpu(0)
     with ctx:
         orig_test(*args, **kwargs)
def get_extractor():
    model = mx.model.FeedForward.load('./resnet-50', 0, ctx=mx.cpu(), numpy_batch_size=1)
    fea_symbol = model.symbol.get_internals()["flatten0_output"]
    feature_extractor = mx.model.FeedForward(ctx=mx.cpu(), symbol=fea_symbol, numpy_batch_size=64,
                                             arg_params=model.arg_params, aux_params=model.aux_params,
                                             allow_extra_params=True)

    return feature_extractor
Exemple #29
0
def test_deconvolution_with_type():
    sym = mx.sym.Deconvolution(num_filter=2, kernel=(3,3), name='deconv')
    ctx_list = [{'ctx': mx.gpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float64}},
                {'ctx': mx.gpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float32}},
                {'ctx': mx.gpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float16}},
                {'ctx': mx.cpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float64}},
                {'ctx': mx.cpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float32}}]
    check_consistency(sym, ctx_list)
    check_consistency(sym, ctx_list, grad_req="add")
Exemple #30
0
def test_svmoutput_with_type():
    sym = mx.sym.SVMOutput(name='svmoutput', use_linear=True)
    ctx_list = [{'ctx': mx.gpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float64}},
                {'ctx': mx.gpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float32}},
                {'ctx': mx.gpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float16}},
                {'ctx': mx.cpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float64}},
                {'ctx': mx.cpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float32}},
                {'ctx': mx.cpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float16}}]
    check_consistency(sym, ctx_list)
def evaluate(data_eval, model, nsp_loss, mlm_loss, vocab_size, ctx,
             log_interval, dtype):
    """Evaluation function."""
    logging.info('Running evaluation ... ')
    mlm_metric = MaskedAccuracy()
    nsp_metric = MaskedAccuracy()
    mlm_metric.reset()
    nsp_metric.reset()

    eval_begin_time = time.time()
    begin_time = time.time()
    step_num = 0
    running_mlm_loss = running_nsp_loss = 0
    total_mlm_loss = total_nsp_loss = 0
    running_num_tks = 0
    for _, dataloader in enumerate(data_eval):
        for _, data_batch in enumerate(dataloader):
            step_num += 1

            data_list = split_and_load(data_batch, ctx)
            loss_list = []
            ns_label_list, ns_pred_list = [], []
            mask_label_list, mask_pred_list, mask_weight_list = [], [], []
            for data in data_list:
                out = forward(data, model, mlm_loss, nsp_loss, vocab_size,
                              dtype)
                (ls, next_sentence_label, classified, masked_id, decoded,
                 masked_weight, ls1, ls2, valid_length) = out
                loss_list.append(ls)
                ns_label_list.append(next_sentence_label)
                ns_pred_list.append(classified)
                mask_label_list.append(masked_id)
                mask_pred_list.append(decoded)
                mask_weight_list.append(masked_weight)

                running_mlm_loss += ls1.as_in_context(mx.cpu())
                running_nsp_loss += ls2.as_in_context(mx.cpu())
                running_num_tks += valid_length.sum().as_in_context(mx.cpu())
            nsp_metric.update(ns_label_list, ns_pred_list)
            mlm_metric.update(mask_label_list, mask_pred_list,
                              mask_weight_list)

            # logging
            if (step_num + 1) % (log_interval) == 0:
                total_mlm_loss += running_mlm_loss
                total_nsp_loss += running_nsp_loss
                log(begin_time, running_num_tks, running_mlm_loss,
                    running_nsp_loss, step_num, mlm_metric, nsp_metric, None,
                    log_interval)
                begin_time = time.time()
                running_mlm_loss = running_nsp_loss = running_num_tks = 0
                mlm_metric.reset_local()
                nsp_metric.reset_local()

    mx.nd.waitall()
    eval_end_time = time.time()
    # accumulate losses from last few batches, too
    if running_mlm_loss != 0:
        total_mlm_loss += running_mlm_loss
        total_nsp_loss += running_nsp_loss
    total_mlm_loss /= step_num
    total_nsp_loss /= step_num
    logging.info(
        'Eval mlm_loss={:.3f}\tmlm_acc={:.1f}\tnsp_loss={:.3f}\tnsp_acc={:.1f}\t'
        .format(total_mlm_loss.asscalar(),
                mlm_metric.get_global()[1] * 100, total_nsp_loss.asscalar(),
                nsp_metric.get_global()[1] * 100))
    logging.info('Eval cost={:.1f}s'.format(eval_end_time - eval_begin_time))
Exemple #32
0
# We use CSV so that not all data need to sit into memory
# You can also use inmemory numpy array if your machine is large enough
encode_csv("./train-label.csv", "./train-stytole.csv", "./train-diastole.csv")

num_epoch = 35
learning_rate = 0.01
wd = 0.00001
momentum = 0.95

# # Training the stytole net
MXNET_CPU_WORKER_NTHREADS = 32
# In[4]:

network = get_lenet()
batch_size = 32
devs = [mx.cpu(8)]
data_train = mx.io.CSVIter(data_csv="./train-64x64-data.csv",
                           data_shape=(30, 64, 64),
                           label_csv="./train-stytole.csv",
                           label_shape=(600, ),
                           batch_size=batch_size)

data_validate = mx.io.CSVIter(data_csv="./validate-64x64-data.csv",
                              data_shape=(30, 64, 64),
                              batch_size=1)

stytole_model = mx.model.FeedForward(ctx=devs,
                                     symbol=network,
                                     num_epoch=num_epoch,
                                     learning_rate=learning_rate,
                                     wd=wd,
# define mlp

data = mx.symbol.Variable('data')
fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=128)
act1 = mx.symbol.Activation(data=fc1, name='relu1', act_type="relu")
fc2 = mx.symbol.FullyConnected(data=act1, name='fc2', num_hidden=64)
act2 = mx.symbol.Activation(data=fc2, name='relu2', act_type="relu")
fc3 = mx.symbol.FullyConnected(data=act2, name='fc3', num_hidden=10)
#mlp = mx.symbol.Softmax(data = fc3, name = 'softmax')
mlp = mx.symbol.Custom(data=fc3, name='softmax', op_type='softmax')

# data

train, val = MNISTIterator(batch_size=100, input_shape=(784, ))

# train

logging.basicConfig(level=logging.DEBUG)

# MXNET_CPU_WORKER_NTHREADS must be greater than 1 for custom op to work on CPU
model = mx.model.FeedForward(ctx=mx.cpu(0),
                             symbol=mlp,
                             num_epoch=20,
                             learning_rate=0.1,
                             momentum=0.9,
                             wd=0.00001)

model.fit(X=train,
          eval_data=val,
          batch_end_callback=mx.callback.Speedometer(100, 100))
Exemple #34
0
import mxnet as mx
import mxnet.ndarray as nd
import mxnet.autograd as autograd
from matplotlib import pyplot as plt
from data_preprocessing import data_preprocessing
from tqdm import *
import urllib
import os

def artistic_Image(noise_image, image_size):
    image = noise_image.reshape((-1, ) + image_size)
    r, g, b = nd.split(image, axis=0, num_outputs=3)
    #Denormalization by JG
    r = nd.multiply(r, 0.229) + 0.485
    g = nd.multiply(g, 0.224) + 0.456
    b = nd.multiply(b, 0.225) + 0.406
    image = nd.concat(r, g, b, dim=0)
    '''
    matplotlib supports float32 and uint8 data types. For grayscale, matplotlib supports only float32. 
    If your array data does not meet one of these descriptions, you need to rescale it.
    '''
    image = nd.transpose(image, axes=(1, 2, 0))
    image = nd.clip(image, a_min=0, a_max=1)
    image = nd.multiply(image, 255)
    image = nd.clip(image, a_min=0, a_max=255).astype('uint8')
    plt.imshow(image.asnumpy())
    plt.savefig("Artistic Image.png", dpi=200)

def neuralstyle(epoch=1000,
                show_period=100,
Exemple #35
0
def get_res2net(blocks,
                width,
                scale,
                model_name=None,
                pretrained=False,
                ctx=cpu(),
                root=os.path.join("~", ".mxnet", "models"),
                **kwargs):
    """
    Create Res2Net model with specific parameters.

    Parameters:
    ----------
    blocks : int
        Number of blocks.
    width : int
        Width of filters.
    scale : int
        Number of scale.
    model_name : str or None, default None
        Model name for loading pretrained model.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '~/.mxnet/models'
        Location for keeping the model parameters.
    """
    bottleneck = True

    if blocks == 50:
        layers = [3, 4, 6, 3]
    elif blocks == 101:
        layers = [3, 4, 23, 3]
    elif blocks == 152:
        layers = [3, 8, 36, 3]
    else:
        raise ValueError(
            "Unsupported Res2Net with number of blocks: {}".format(blocks))

    assert (sum(layers) * 3 + 2 == blocks)

    init_block_channels = 64
    channels_per_layers = [64, 128, 256, 512]

    if bottleneck:
        bottleneck_factor = 4
        channels_per_layers = [
            ci * bottleneck_factor for ci in channels_per_layers
        ]

    channels = [[ci] * li for (ci, li) in zip(channels_per_layers, layers)]

    net = Res2Net(channels=channels,
                  init_block_channels=init_block_channels,
                  width=width,
                  scale=scale,
                  **kwargs)

    if pretrained:
        if (model_name is None) or (not model_name):
            raise ValueError(
                "Parameter `model_name` should be properly initialized for loading pretrained model."
            )
        from .model_store import get_model_file
        net.load_parameters(filename=get_model_file(
            model_name=model_name, local_model_store_dir_path=root),
                            ctx=ctx)

    return net
Exemple #36
0
        self.critic_network.save_parameters(
            'A2C_CartPole_critic_network.params')

    def load(self):
        self.actor_network.load_parameters('A2C_CartPole_actor_network.params')
        self.critic_network.load_parameters(
            'A2C_CartPole_critic_network.params')


if __name__ == '__main__':
    seed = 77777777
    np.random.seed(seed)
    mx.random.seed(seed)
    env = gym.make('CartPole-v0').unwrapped
    env.seed(seed)
    ctx = mx.cpu()
    render = False

    agent = A2C(gamma=0.99,
                action_dim=env.action_space.n,
                observation_dim=env.observation_space.shape[0],
                ctx=ctx)

    episode_reward_list = []
    max_episodes = 400
    max_episode_steps = 500
    for episode in range(max_episodes):
        state = env.reset()
        episode_reward = 0
        for episode_step in range(max_episode_steps):
            if render:
# The first fully-connected layer and the corresponding activation function
fc1  = mx.sym.FullyConnected(data=data, num_hidden=128)
act1 = mx.sym.Activation(data=fc1, act_type="relu")

# The second fully-connected layer and the corresponding activation function
fc2  = mx.sym.FullyConnected(data=act1, num_hidden = 64)
act2 = mx.sym.Activation(data=fc2, act_type="relu")

# MNIST has 10 classes
fc3  = mx.sym.FullyConnected(data=act2, num_hidden=10)

# Softmax with cross entropy loss
mlp  = mx.sym.SoftmaxOutput(data=fc3, name='softmax')

# create a trainable module on CPU
mlp_model = mx.mod.Module(symbol=mlp, context=mx.cpu())

# training & logging time
times = []
for _ in range(10):
    batch_size = 100
    train_iter = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True)
    val_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)
    ts = time.time()
    mlp_model.fit(train_iter,  # train data
              eval_data=val_iter,  # validation data
              optimizer='sgd',  # use SGD to train
              optimizer_params={'learning_rate':0.1},  # use fixed learning rate
              eval_metric='acc',  # report accuracy during training
              batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches
              num_epoch=10)  # train for at most 10 dataset passes
Exemple #38
0
def fit(args, network, data_loader, **kwargs):
    """
    train a model
    args : argparse returns
    network : the symbol definition of the nerual network
    data_loader : function that returns the train and val data iterators
    """
    # kvstore
    kv = mx.kvstore.create(args.kv_store)

    # logging
    head = '%(asctime)-15s Node[' + str(kv.rank) + '] %(message)s'
    logging.basicConfig(level=logging.DEBUG, format=head)
    logging.info('start with arguments %s', args)

    # data iterators
    (train, val) = data_loader(args, kv)
    if args.test_io:
        tic = time.time()
        for i, batch in enumerate(train):
            for j in batch.data:
                j.wait_to_read()
            if (i+1) % args.disp_batches == 0:
                logging.info('Batch [%d]\tSpeed: %.2f samples/sec' % (
                    i, args.disp_batches*args.batch_size/(time.time()-tic)))
                tic = time.time()

        return


    # load model
    if 'arg_params' in kwargs and 'aux_params' in kwargs:
        arg_params = kwargs['arg_params']
        aux_params = kwargs['aux_params']
    else:
        sym, arg_params, aux_params = _load_model(args, kv.rank)
        if sym is not None:
            assert sym.tojson() == network.tojson()

    # save model
    checkpoint = _save_model(args, kv.rank)

    # devices for training
    devs = mx.cpu() if args.gpus is None or args.gpus is '' else [
        mx.gpu(int(i)) for i in args.gpus.split(',')]

    # learning rate
    lr, lr_scheduler = _get_lr_scheduler(args, kv)

    # create model
    model = mx.mod.Module(
        context       = devs,
        symbol        = network
    )

    lr_scheduler  = lr_scheduler
    optimizer_params = {
            'learning_rate': lr,
            'momentum' : args.mom,
            'wd' : args.wd,
            'lr_scheduler': lr_scheduler}

    monitor = mx.mon.Monitor(args.monitor, pattern=".*") if args.monitor > 0 else None

    if args.network == 'alexnet':
        # AlexNet will not converge using Xavier
        initializer = mx.init.Normal()
    else:
        initializer = mx.init.Xavier(
            rnd_type='gaussian', factor_type="in", magnitude=2)
    # initializer   = mx.init.Xavier(factor_type="in", magnitude=2.34),

    # evaluation metrices
    eval_metrics = ['accuracy']
    if args.top_k > 0:
        eval_metrics.append(mx.metric.create('top_k_accuracy', top_k=args.top_k))

    # callbacks that run after each batch
    batch_end_callbacks = [mx.callback.Speedometer(args.batch_size, args.disp_batches)]
    if 'batch_end_callback' in kwargs:
        cbs = kwargs['batch_end_callback']
        batch_end_callbacks += cbs if isinstance(cbs, list) else [cbs]

    # run
    model.fit(train,
        begin_epoch        = args.load_epoch if args.load_epoch else 0,
        num_epoch          = args.num_epochs,
        eval_data          = val,
        eval_metric        = eval_metrics,
        kvstore            = kv,
        optimizer          = args.optimizer,
        optimizer_params   = optimizer_params,
        initializer        = initializer,
        arg_params         = arg_params,
        aux_params         = aux_params,
        batch_end_callback = batch_end_callbacks,
        epoch_end_callback = checkpoint,
        allow_missing      = True,
        monitor            = monitor)
Exemple #39
0
    def fit(self,
            train_data,
            eval_data=None,
            eval_metric='acc',
            grad_req='write',
            epoch_end_callback=None,
            batch_end_callback=None,
            kvstore='local',
            logger=None):
        global outimgiter
        if logger is None:
            logger = logging
        logging.info('Start training with %s', str(self.ctx))
        logging.info(str(self.kwargs))
        batch_size = train_data.provide_data[0][1][0]
        arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape( \
            data=tuple(train_data.provide_data[0][1]), label_det=(batch_size,200,6))
        arg_names = self.symbol.list_arguments()
        out_names = self.symbol.list_outputs()
        aux_names = self.symbol.list_auxiliary_states()

        # pprint([(n,s) for n,s in zip(arg_names,arg_shapes)])
        # pprint([(n,s) for n,s in zip(out_names,out_shapes)])
        # pprint([(n,s) for n,s in zip(aux_names,aux_shapes)])

        if grad_req != 'null':
            self.grad_params = {}
            for name, shape in zip(arg_names, arg_shapes):
                if not (name.endswith('data') or name.endswith('label')):
                    self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
        else:
            self.grad_params = None
        self.aux_params = {
            k: mx.nd.zeros(s, self.ctx)
            for k, s in zip(aux_names, aux_shapes)
        }
        data_name = train_data.provide_data[0][0]
        label_name_det = train_data.provide_label[0][0]
        label_name_seg = train_data.provide_label[1][0]
        input_names = [data_name, label_name_det, label_name_seg]

        print(train_data.provide_label)
        # print(os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"])

        self.optimizer = opt.create(self.optimizer,
                                    rescale_grad=(1.0 / train_data.batch_size),
                                    **(self.kwargs))
        self.updater = get_updater(self.optimizer)
        eval_metric = CustomAccuracyMetric()  # metric.create(eval_metric)
        multibox_metric = MultiBoxMetric()

        eval_metrics = metric.CompositeEvalMetric()
        eval_metrics.add(multibox_metric)
        eval_metrics.add(eval_metric)

        # begin training
        for epoch in range(self.begin_epoch, self.num_epoch):
            nbatch = 0
            train_data.reset()
            eval_metrics.reset()
            logger.info('learning rate: ' + str(self.optimizer.learning_rate))
            for data, _ in train_data:
                if self.evaluation_only:
                    break
                nbatch += 1
                label_shape_det = data.label[0].shape
                label_shape_seg = data.label[1].shape
                self.arg_params[data_name] = mx.nd.array(
                    data.data[0], self.ctx)
                self.arg_params[label_name_det] = mx.nd.array(
                    data.label[0], self.ctx)
                self.arg_params[label_name_seg] = mx.nd.array(
                    data.label[1], self.ctx)
                output_names = self.symbol.list_outputs()

                ###################### analyze shapes ####################
                # pprint([(k,v.shape) for k,v in self.arg_params.items()])

                self.executor = self.symbol.bind(self.ctx,
                                                 self.arg_params,
                                                 args_grad=self.grad_params,
                                                 grad_req=grad_req,
                                                 aux_states=self.aux_params)
                assert len(self.symbol.list_arguments()) == len(
                    self.executor.grad_arrays)
                update_dict = {name: nd for name, nd in zip(self.symbol.list_arguments(), \
                    self.executor.grad_arrays) if nd is not None}
                output_dict = {}
                output_buff = {}
                for key, arr in zip(self.symbol.list_outputs(),
                                    self.executor.outputs):
                    output_dict[key] = arr
                    output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
                    # output_buff[key] = mx.nd.empty(arr.shape, ctx=self.ctx)

                def stat_helper(name, array):
                    """wrapper for executor callback"""
                    import ctypes
                    from mxnet.ndarray import NDArray
                    from mxnet.base import NDArrayHandle, py_str
                    array = ctypes.cast(array, NDArrayHandle)
                    if 0:
                        array = NDArray(array, writable=False).asnumpy()
                        print(name, array.shape, np.mean(array), np.std(array),
                              ('%.1fms' %
                               (float(time.time() - stat_helper.start_time) *
                                1000)))
                    else:
                        array = NDArray(array, writable=False)
                        array.wait_to_read()
                        elapsed = float(time.time() -
                                        stat_helper.start_time) * 1000.
                        if elapsed > 0:
                            print(name, array.shape, ('%.1fms' % (elapsed, )))
                    stat_helper.start_time = time.time()

                stat_helper.start_time = float(time.time())
                # self.executor.set_monitor_callback(stat_helper)

                tic = time.time()

                self.executor.forward(is_train=True)
                for key in output_dict:
                    output_dict[key].copyto(output_buff[key])

                # exit(0) # for debugging forward pass only

                self.executor.backward()
                for key, arr in update_dict.items():
                    if key != "bigscore_weight":
                        self.updater(key, arr, self.arg_params[key])

                for output in self.executor.outputs:
                    output.wait_to_read()
                if TIMING:
                    print("%.0fms" % ((time.time() - tic) * 1000., ))

                output_dict = dict(zip(output_names, self.executor.outputs))
                pred_det_shape = output_dict["det_out_output"].shape
                pred_seg_shape = output_dict["seg_out_output"].shape
                label_det = mx.nd.array(data.label[0].reshape(
                    (label_shape_det[0],
                     label_shape_det[1] * label_shape_det[2])))
                label_seg = mx.nd.array(data.label[1].reshape(
                    (label_shape_seg[0],
                     label_shape_seg[1] * label_shape_seg[2])))
                pred_det = mx.nd.array(output_buff["det_out_output"].reshape(
                    (pred_det_shape[0], pred_det_shape[1], pred_det_shape[2])))
                pred_seg = mx.nd.array(output_buff["seg_out_output"].reshape(
                    (pred_seg_shape[0], pred_seg_shape[1],
                     pred_seg_shape[2] * pred_seg_shape[3])))
                if DEBUG:
                    print(data.label[0].asnumpy()[0, :2, :])

                if TIMING:
                    print("%.0fms" % ((time.time() - tic) * 1000., ))

                eval_metrics.get_metric(0).update([
                    mx.nd.zeros(output_buff["cls_prob_output"].shape),
                    mx.nd.zeros(output_buff["loc_loss_output"].shape),
                    label_det
                ], [
                    output_buff["cls_prob_output"],
                    output_buff["loc_loss_output"],
                    output_buff["cls_label_output"]
                ])
                eval_metrics.get_metric(1).update(
                    [label_seg.as_in_context(self.ctx)],
                    [pred_seg.as_in_context(self.ctx)])

                self.executor.outputs[0].wait_to_read()

                ##################### display results ##############################
                # out_img = output_dict["seg_out_output"].asnumpy()
                # out_det = output_dict["det_out_output"].asnumpy()
                # for imgidx in range(out_img.shape[0]):
                #     res_img = np.squeeze(out_img[imgidx,:,:].argmax(axis=0).astype(np.uint8))
                #     label_img = data.label[1].asnumpy()[imgidx,:,:].astype(np.uint8)
                #     img = np.squeeze(data.data[0].asnumpy()[imgidx,:,:,:])
                #     det = out_det[imgidx,:,:]
                #     gt = label_det.asnumpy()[imgidx,:].reshape((-1,6))
                #     display_results(res_img,np.expand_dims(label_img,axis=0),img, det, gt, self.class_names)
                #     [exit(0) if (cv2.waitKey()&0xff)==27 else None]
                # outimgiter += 1

                batch_end_params = BatchEndParam(epoch=epoch,
                                                 nbatch=nbatch,
                                                 eval_metric=eval_metrics)
                batch_end_callback(batch_end_params)

                if TIMING:
                    print("%.0fms" % ((time.time() - tic) * 1000., ))

                # exit(0) # for debugging only

            ##### save snapshot
            if (not self.evaluation_only) and (epoch_end_callback is not None):
                epoch_end_callback(epoch, self.symbol, self.arg_params,
                                   self.aux_params)

            names, values = eval_metrics.get()
            for name, value in zip(names, values):
                logger.info("                     --->Epoch[%d] Train-%s=%f",
                            epoch, name, value)

            # evaluation
            if eval_data:
                logger.info(" in eval process...")
                nbatch = 0
                depth_metric = DistanceAccuracyMetric(
                    class_names=self.class_names)
                eval_data.reset()
                eval_metrics.reset()
                self.valid_metric.reset()
                depth_metric.reset()
                timing_results = []
                for data, fnames in eval_data:
                    nbatch += 1
                    label_shape_det = data.label[0].shape
                    label_shape_seg = data.label[1].shape
                    self.arg_params[data_name] = mx.nd.array(
                        data.data[0], self.ctx)
                    self.arg_params[label_name_det] = mx.nd.array(
                        data.label[0], self.ctx)
                    self.arg_params[label_name_seg] = mx.nd.array(
                        data.label[1], self.ctx)
                    self.executor = self.symbol.bind(
                        self.ctx,
                        self.arg_params,
                        args_grad=self.grad_params,
                        grad_req=grad_req,
                        aux_states=self.aux_params)

                    output_names = self.symbol.list_outputs()
                    output_dict = dict(zip(output_names,
                                           self.executor.outputs))

                    cpu_output_array = mx.nd.zeros(
                        output_dict["seg_out_output"].shape)

                    ############## monitor status
                    # def stat_helper(name, array):
                    #     """wrapper for executor callback"""
                    #     import ctypes
                    #     from mxnet.ndarray import NDArray
                    #     from mxnet.base import NDArrayHandle, py_str
                    #     array = ctypes.cast(array, NDArrayHandle)
                    #     if 1:
                    #         array = NDArray(array, writable=False).asnumpy()
                    #         print (name, array.shape, np.mean(array), np.std(array),
                    #                ('%.1fms' % (float(time.time()-stat_helper.start_time)*1000)))
                    #     else:
                    #         array = NDArray(array, writable=False)
                    #         array.wait_to_read()
                    #         elapsed = float(time.time()-stat_helper.start_time)*1000.
                    #         if elapsed>5:
                    #             print (name, array.shape, ('%.1fms' % (elapsed,)))
                    #     stat_helper.start_time=time.time()
                    # stat_helper.start_time=float(time.time())
                    # self.executor.set_monitor_callback(stat_helper)

                    ############## forward
                    tic = time.time()
                    self.executor.forward(is_train=True)
                    output_dict["seg_out_output"].wait_to_read()
                    timing_results.append((time.time() - tic) * 1000.)

                    output_dict["seg_out_output"].copyto(cpu_output_array)
                    pred_shape = output_dict["seg_out_output"].shape
                    label = mx.nd.array(data.label[1].reshape(
                        (label_shape_seg[0],
                         label_shape_seg[1] * label_shape_seg[2])))
                    output_dict["seg_out_output"].wait_to_read()
                    seg_out_output = output_dict["seg_out_output"].asnumpy()

                    pred_det_shape = output_dict["det_out_output"].shape
                    pred_seg_shape = output_dict["seg_out_output"].shape
                    label_det = mx.nd.array(data.label[0].reshape(
                        (label_shape_det[0],
                         label_shape_det[1] * label_shape_det[2])))
                    label_seg = mx.nd.array(data.label[1].reshape(
                        (label_shape_seg[0],
                         label_shape_seg[1] * label_shape_seg[2])),
                                            ctx=self.ctx)
                    pred_det = mx.nd.array(
                        output_dict["det_out_output"].reshape(
                            (pred_det_shape[0], pred_det_shape[1],
                             pred_det_shape[2])))
                    pred_seg = mx.nd.array(
                        output_dict["seg_out_output"].reshape(
                            (pred_seg_shape[0], pred_seg_shape[1],
                             pred_seg_shape[2] * pred_seg_shape[3])),
                        ctx=self.ctx)

                    #### remove invalid boxes
                    out_dets = output_dict["det_out_output"].asnumpy()
                    assert len(out_dets.shape) == 3
                    pred_det = np.zeros((batch_size, 200, 7), np.float32) - 1.
                    for idx, out_det in enumerate(out_dets):
                        assert len(out_det.shape) == 2
                        out_det = np.expand_dims(out_det, axis=0)
                        indices = np.where(
                            out_det[:, :, 0] >= 0)  # labeled as negative
                        out_det = np.expand_dims(out_det[indices[0],
                                                         indices[1], :],
                                                 axis=0)
                        indices = np.where(
                            out_det[:, :, 1] > .25)  # higher confidence
                        out_det = np.expand_dims(out_det[indices[0],
                                                         indices[1], :],
                                                 axis=0)
                        pred_det[idx, :out_det.shape[1], :] = out_det
                        del out_det
                    pred_det = mx.nd.array(pred_det)

                    ##### display results
                    if self.evaluation_only:
                        out_img = output_dict["seg_out_output"]
                        out_img = mx.nd.split(out_img,
                                              axis=0,
                                              num_outputs=out_img.shape[0],
                                              squeeze_axis=0)
                        if not isinstance(out_img, list):
                            out_img = [out_img]
                        for imgidx in range(eval_data.batch_size):
                            ### segmentation
                            seg_prob = out_img[imgidx]
                            seg_prob = mx.nd.array(np.squeeze(
                                seg_prob.asnumpy(), axis=(0, )),
                                                   ctx=self.ctx)
                            res_img = np.squeeze(seg_prob.asnumpy().argmax(
                                axis=0).astype(np.uint8))
                            # res_img = np.squeeze(out_img[imgidx,:,:].argmax(axis=0).astype(np.uint8))
                            label_img = data.label[1].asnumpy()[
                                imgidx, :, :].astype(np.uint8)
                            img = np.squeeze(
                                data.data[0].asnumpy()[imgidx, :, :, :])
                            det = pred_det.asnumpy()[imgidx, :, :]
                            ### ground-truth
                            gt = label_det.asnumpy()[imgidx, :].reshape(
                                (-1, 6))
                            # save to results folder for evalutation
                            res_fname = fnames[imgidx].replace(
                                "SegmentationClass",
                                "Results").replace("gtFine_labelTrainIds",
                                                   "results")
                            lut = np.zeros(256)
                            lut[:19] = np.array([
                                7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24,
                                25, 26, 27, 28, 31, 32, 33
                            ])
                            # lut[:20]=np.array([7,8,11,12,13,17,19,20,21,22,23,24,25,26,27,28,31,32,33,34])
                            seg_resized = prob_upsampling(seg_prob,
                                                          target_shape=(1024,
                                                                        2048))
                            seg_resized2 = cv2.LUT(seg_resized, lut)
                            if cv2.imwrite(res_fname, seg_resized2):
                                print(res_fname, 'saved.')
                            # display result
                            display_img = display_results(
                                res_img, np.expand_dims(label_img, axis=0),
                                img, det, gt, self.class_names)
                            res_fname = fnames[imgidx].replace(
                                "SegmentationClass",
                                "Results").replace("gtFine_labelTrainIds",
                                                   "compare")
                            if cv2.imwrite(res_fname, display_img):
                                print(res_fname, 'saved.')
                            # [exit(0) if (cv2.waitKey()&0xff)==27 else None]
                        outimgiter += 1

                    if self.evaluation_only:
                        continue

                    eval_metrics.get_metric(0).update(None, [
                        output_dict["cls_prob_output"],
                        output_dict["loc_loss_output"],
                        output_dict["cls_label_output"]
                    ])
                    eval_metrics.get_metric(1).update([label_seg], [pred_seg])
                    self.valid_metric.update([mx.nd.slice_axis(data.label[0],axis=2,begin=0,end=5)], \
                                             [mx.nd.slice_axis(pred_det,axis=2,begin=0,end=6)])
                    disparities = []
                    for imgidx in range(batch_size):
                        dispname = fnames[imgidx].replace(
                            "SegmentationClass",
                            "Disparity").replace("gtFine_labelTrainIds",
                                                 "disparity")
                        disparities.append(cv2.imread(dispname, -1))
                        assert disparities[
                            0] is not None, dispname + " not found."
                    depth_metric.update(mx.nd.array(disparities), [pred_det])

                    det_metric = self.valid_metric
                    seg_metric = eval_metrics.get_metric(1)
                    det_names, det_values = det_metric.get()
                    seg_name, seg_value = seg_metric.get()
                    depth_names, depth_values = depth_metric.get()
                    print("\r %d/%d speed=%.1fms %.1f%% %s=%.1f %s=%.1f %s=%.1f" % \
                          (nbatch*eval_data.batch_size,eval_data.num_samples,
                           math.fsum(timing_results)/float(nbatch),
                           float(nbatch*eval_data.batch_size)*100./float(eval_data.num_samples),
                           det_names[-1],det_values[-1]*100.,
                           seg_name,seg_value*100.,
                           depth_names[-1],depth_values[-1]*100.,),end='\r')

                names, values = eval_metrics.get()
                for name, value in zip(names, values):
                    logger.info(' epoch[%d] Validation-%s=%f', epoch, name,
                                value)
                logger.info('----------------------------------------------')
                logger.info(' & '.join(names))
                logger.info(' & '.join(
                    map(lambda v: '%.1f' % (v * 100., ), values)))
                logger.info('----------------------------------------------')
                names, values = self.valid_metric.get()
                for name, value in zip(names, values):
                    logger.info(' epoch[%d] Validation-%s=%f', epoch, name,
                                value)
                logger.info('----------------------------------------------')
                logger.info(' & '.join(names))
                logger.info(' & '.join(
                    map(lambda v: '%.1f' % (v * 100., ), values)))
                logger.info('----------------------------------------------')
                names, values = depth_metric.get()
                for name, value in zip(names, values):
                    logger.info(' epoch[%d] Validation-%s=%f', epoch, name,
                                value)
                logger.info('----------------------------------------------')
                logger.info(' & '.join(names))
                logger.info(' & '.join(
                    map(lambda v: '%.1f' % (v * 100., ), values)))
                logger.info('----------------------------------------------')

                if self.evaluation_only:
                    exit(0)  ## for debugging only
Exemple #40
0
        exclude_blocks.extend(
            [net.features[2][0].body[0], net.features[2][0].body[1]])
    print('*' * 25 + ' Exclude blocks ' + '*' * 25)
    for b in exclude_blocks:
        print(b.name)
    print('*' * (25 * 2 + len(' Exclude blocks ')))
    print()
    convert.convert_model(
        net,
        exclude=exclude_blocks,
        convert_fn=convert_fn,
    )

    # initialize for quantization parameters and reset context
    qparams_init(net)
    ctx = gpu(opt.use_gpu) if opt.use_gpu != -1 else cpu()
    net.collect_params().reset_ctx(ctx)

    # construct transformer
    if opt.dataset == 'imagenet':
        eval_transformer = T.Compose([
            T.Resize(256, keep_ratio=True),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    else:
        eval_transformer = T.Compose([
            T.ToTensor(),
            T.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
        ])
            out = net(data)
            out = nd.SoftmaxActivation(out).mean(axis=0)  # softmax process
            out = out.asnumpy().tolist()  # array to list
            # judge and delete
            if (out[2] > threshold) or (out[3] > threshold):
                os.remove(os.path.join(input_dir, movie, _image))
            # you can just write the result into file without doing anything.
            out = [str(number) for number in out]
            string = '%s:%s' % (image_file, ','.join(out))
            writeResult(string + '\n')
            # you can also move these images to another directory

        print('Movie %s finished.' % movie)


if __name__ == "__main__":
    # parse command line arguments
    args = parseArgs()
    task = 'face_classification'
    model_name = args.model
    task_num_class = args.class_number
    task_param = '..data/%s_%s.params' % (model_name, task)
    use_gpu = args.use_gpu
    ctx = mx.gpu() if use_gpu else mx.cpu()
    num_workers = args.worker_number
    input_dir = args.input_dir
    threshold = args.threshold

    net = loadModel(model_name, task_num_class, task_param, ctx)
    predict(net, ctx, input_dir, threshold)
def train_net(args):
    ctx = []
    cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
    if len(cvd)>0:
      for i in xrange(len(cvd.split(','))):
        ctx.append(mx.gpu(i))
    if len(ctx)==0:
      ctx = [mx.cpu()]
      print('use cpu')
    else:
      print('gpu num:', len(ctx))
    prefix = args.prefix
    prefix_dir = os.path.dirname(prefix)
    if not os.path.exists(prefix_dir):
      os.makedirs(prefix_dir)
    end_epoch = args.end_epoch
    args.ctx_num = len(ctx)
    args.num_layers = int(args.network[1:])
    print('num_layers', args.num_layers)
    if args.per_batch_size==0:
      args.per_batch_size = 128
    args.batch_size = args.per_batch_size*args.ctx_num
    args.rescale_threshold = 0
    args.image_channel = 3

    os.environ['BETA'] = str(args.beta)
    data_dir_list = args.data_dir.split(',')
    assert len(data_dir_list)==1
    data_dir = data_dir_list[0]
    path_imgrec = None
    path_imglist = None
    prop = face_image.load_property(data_dir)
    args.num_classes = prop.num_classes
    image_size = prop.image_size
    args.image_h = image_size[0]
    args.image_w = image_size[1]
    print('image_size', image_size)
    assert(args.num_classes>0)
    print('num_classes', args.num_classes)
    path_imgrec = os.path.join(data_dir, "train.rec")

    if args.loss_type==1 and args.num_classes>20000:
      args.beta_freeze = 5000
      args.gamma = 0.06

    print('Called with argument:', args)
    data_shape = (args.image_channel,image_size[0],image_size[1])
    mean = None
    begin_epoch = 0
    base_lr = args.lr
    base_wd = args.wd
    base_mom = args.mom
    if len(args.pretrained)==0:
      arg_params = None
      aux_params = None
      sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params)
    else:
      vec = args.pretrained.split(',')
      print('loading', vec)
      _, arg_params, aux_params = mx.model.load_checkpoint(vec[0], int(vec[1]))
      #if 'fc7_weight' in arg_params.keys():
      #    del arg_params['fc7_weight']
      #if 'fc7_bias' in arg_params.keys():
      #    del arg_params['fc7_bias']
      sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params)
    #if args.network[0]=='s':
    #  data_shape_dict = {'data' : (args.per_batch_size,)+data_shape}
    #  spherenet.init_weights(sym, data_shape_dict, args.num_layers)

    #label_name = 'softmax_label'
    #label_shape = (args.batch_size,)
    model = mx.mod.Module(
        context       = ctx,
        symbol        = sym,
        work_load_list = None,
    )
    val_dataiter = None

    train_dataiter = FaceImageIter(
        batch_size           = args.batch_size,
        data_shape           = data_shape,
        path_imgrec          = path_imgrec,
        shuffle              = True,
        rand_mirror          = args.rand_mirror,
        mean                 = mean,
        cutoff               = args.cutoff,
    )

    if args.loss_type<10:
      _metric = AccMetric()
    else:
      _metric = LossValueMetric()
    eval_metrics = [mx.metric.create(AccMetric()),mx.metric.create(LossValue())]


    if args.network[0]=='r' or args.network[0]=='y':
      initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style
    elif args.network[0]=='i' or args.network[0]=='x':
      initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception
    else:
      initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2)
    _rescale = 1.0/args.ctx_num
    opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale)
    som = 20
    _cb = mx.callback.Speedometer(args.batch_size, som)

    ver_list = []
    ver_name_list = []
    for name in args.target.split(','):
      path = os.path.join(data_dir, name + ".bin")
      #path = os.path.join("/ssd/MegaFace/MF2_aligned_pic9/",name+".bin")
      if os.path.exists(path):
        data_set = verification.load_bin(path, image_size)
        ver_list.append(data_set)
        ver_name_list.append(name)
        print('ver', name)



    def ver_test(nbatch):
      results = []
      for i in xrange(len(ver_list)):
        acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, 10, None, None)
        print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
        #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1))
        print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2))
        results.append(acc2)
      return results



    highest_acc = [0.0, 0.0]  #lfw and target
    #for i in xrange(len(ver_list)):
    #  highest_acc.append(0.0)
    global_step = [0]
    save_step = [0]
    if len(args.lr_steps)==0:
      lr_steps = [16000, 24000]
      if args.loss_type>=1 and args.loss_type<=7:
        #lr_steps = [16000, 24000, 28000]
        lr_steps = [32000, 48000, 60000]
        #lr_steps = [100000, 140000, 160000]
      p = 512.0/args.batch_size
      for l in xrange(len(lr_steps)):
        lr_steps[l] = int(lr_steps[l]*p)
    else:
      lr_steps = [int(x) for x in args.lr_steps.split(',')]
    print('lr_steps', lr_steps)
    def _batch_callback(param):
      #global global_step
      global_step[0]+=1
      mbatch = global_step[0]
      for _lr in lr_steps:
        if mbatch==args.beta_freeze+_lr:
          opt.lr *= 0.1
          print('lr change to', opt.lr)
          break

      _cb(param)
      if mbatch%1000==0:
        print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch)

      if mbatch>=0 and mbatch%args.verbose==0:
        arg, aux = model.get_params()
        mx.model.save_checkpoint(prefix, 0, model.symbol, arg, aux)
        acc_list = ver_test(mbatch)
        save_step[0]+=1
        msave = save_step[0]
        do_save = False
        if len(acc_list)>0:
          lfw_score = acc_list[0]
          if lfw_score>highest_acc[0]:
            highest_acc[0] = lfw_score
            if lfw_score>=0.975:
              do_save = True
          if acc_list[-1]>=highest_acc[-1]:
            highest_acc[-1] = acc_list[-1]
            if lfw_score>=0.985:
              do_save = True
          if acc_list[-1]>=0.985:
              do_save = True
        if args.ckpt==0:
          do_save = False
        elif args.ckpt>1:
          do_save = True
        if do_save:
          print('saving', msave)
          arg, aux = model.get_params()
          mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux)
        print('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[-1]))
      if mbatch<=args.beta_freeze:
        _beta = args.beta
      else:
        move = max(0, mbatch-args.beta_freeze)
        _beta = max(args.beta_min, args.beta*math.pow(1+args.gamma*move, -1.0*args.power))
      #print('beta', _beta)
      os.environ['BETA'] = str(_beta)
      if args.max_steps>0 and mbatch>args.max_steps:
        sys.exit(0)

    epoch_cb = None

    model.fit(train_dataiter,
        begin_epoch        = begin_epoch,
        num_epoch          = end_epoch,
        eval_data          = val_dataiter,
        eval_metric        = eval_metrics,
        kvstore            = 'device',
        optimizer          = opt,
        #optimizer_params   = optimizer_params,
        initializer        = initializer,
        arg_params         = arg_params,
        aux_params         = aux_params,
        allow_missing      = True,
        batch_end_callback = _batch_callback,
        epoch_end_callback = epoch_cb )
def test_coverage_attention(attention_coverage_type,
                            attention_coverage_num_hidden,
                            batch_size=3,
                            encoder_num_hidden=2,
                            decoder_num_hidden=2):
    # source: (batch_size, seq_len, encoder_num_hidden)
    source = mx.sym.Variable("source")
    # source_length: (batch_size, )
    source_length = mx.sym.Variable("source_length")
    source_seq_len = 10

    config_coverage = sockeye.coverage.CoverageConfig(
        type=attention_coverage_type,
        num_hidden=attention_coverage_num_hidden,
        layer_normalization=False)
    config_attention = sockeye.rnn_attention.AttentionConfig(
        type="coverage",
        num_hidden=5,
        input_previous_word=False,
        source_num_hidden=encoder_num_hidden,
        query_num_hidden=decoder_num_hidden,
        layer_normalization=False,
        config_coverage=config_coverage)
    attention = sockeye.rnn_attention.get_attention(config_attention,
                                                    max_seq_len=source_seq_len)

    attention_state = attention.get_initial_state(source_length,
                                                  source_seq_len)
    attention_func = attention.on(source, source_length, source_seq_len)
    attention_input = attention.make_input(0, mx.sym.Variable("word_vec_prev"),
                                           mx.sym.Variable("decoder_state"))
    attention_state = attention_func(attention_input, attention_state)
    sym = mx.sym.Group([
        attention_state.context, attention_state.probs,
        attention_state.dynamic_source
    ])

    source_shape = (batch_size, source_seq_len, encoder_num_hidden)
    source_length_shape = (batch_size, )
    decoder_state_shape = (batch_size, decoder_num_hidden)

    executor = sym.simple_bind(ctx=mx.cpu(),
                               source=source_shape,
                               source_length=source_length_shape,
                               decoder_state=decoder_state_shape)

    source_length_vector = integer_vector(shape=source_length_shape,
                                          max_value=source_seq_len)
    executor.arg_dict["source"][:] = gaussian_vector(shape=source_shape)
    executor.arg_dict["source_length"][:] = source_length_vector
    executor.arg_dict["decoder_state"][:] = gaussian_vector(
        shape=decoder_state_shape)
    exec_output = executor.forward()
    context_result = exec_output[0].asnumpy()
    attention_prob_result = exec_output[1].asnumpy()
    dynamic_source_result = exec_output[2].asnumpy()

    expected_probs = (1. / source_length_vector).reshape((batch_size, 1))

    assert context_result.shape == (batch_size, encoder_num_hidden)
    assert attention_prob_result.shape == (batch_size, source_seq_len)
    assert dynamic_source_result.shape == (batch_size, source_seq_len,
                                           attention_coverage_num_hidden)
    assert (np.sum(np.isclose(attention_prob_result, expected_probs),
                   axis=1) == source_length_vector).all()
Exemple #44
0
def train_net(args):
    ctx = []
    cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
    if len(cvd) > 0:
        for i in range(len(cvd.split(','))):
            ctx.append(mx.gpu(i))
    if len(ctx) == 0:
        ctx = [mx.cpu()]
        print('use cpu')
    else:
        print('gpu num:', len(ctx))
    prefix = args.prefix
    prefix_dir = os.path.dirname(prefix)
    if not os.path.exists(prefix_dir):
        os.makedirs(prefix_dir)
    end_epoch = args.end_epoch
    args.ctx_num = len(ctx)
    args.num_layers = int(args.network[1:])
    print('num_layers', args.num_layers)
    if args.per_batch_size == 0:
        args.per_batch_size = 128
        if args.loss_type == 10:
            args.per_batch_size = 256
    args.batch_size = args.per_batch_size * args.ctx_num
    args.rescale_threshold = 0
    args.image_channel = 3
    ppatch = [int(x) for x in args.patch.split('_')]
    assert len(ppatch) == 5

    os.environ['BETA'] = str(args.beta)
    data_dir_list = args.data_dir.split(',')
    if args.loss_type != 12 and args.loss_type != 13:
        assert len(data_dir_list) == 1
    data_dir = data_dir_list[0]
    args.use_val = False
    path_imgrec = None
    path_imglist = None
    val_rec = None
    prop = face_image.load_property(data_dir)
    args.num_classes = prop.num_classes
    image_size = prop.image_size
    args.image_h = image_size[0]
    args.image_w = image_size[1]
    print('image_size', image_size)

    assert (args.num_classes > 0)
    print('num_classes', args.num_classes)
    args.coco_scale = 0.5 * math.log(float(args.num_classes - 1)) + 3

    # path_imglist = "/raid5data/dplearn/MS-Celeb-Aligned/lst2"
    path_imgrec = os.path.join(data_dir, "train.rec")
    val_rec = os.path.join(data_dir, "val.rec")
    if os.path.exists(val_rec) and args.loss_type < 10:
        args.use_val = True
    else:
        val_rec = None
    # args.use_val = False

    if args.loss_type == 1 and args.num_classes > 20000:
        args.beta_freeze = 5000
        args.gamma = 0.06

    if args.loss_type < 9:
        assert args.images_per_identity == 0
    else:
        if args.images_per_identity == 0:
            if args.loss_type == 11:
                args.images_per_identity = 2
            elif args.loss_type == 10 or args.loss_type == 9:
                args.images_per_identity = 16
            elif args.loss_type == 12 or args.loss_type == 13:
                args.images_per_identity = 5
                assert args.per_batch_size % 3 == 0
        assert args.images_per_identity >= 2
        args.per_identities = int(args.per_batch_size / args.images_per_identity)

    print('Called with argument:', args)

    data_shape = (args.image_channel, image_size[0], image_size[1])
    mean = None

    begin_epoch = 0
    base_lr = args.lr
    base_wd = args.wd
    base_mom = args.mom
    if len(args.pretrained) == 0:
        arg_params = None
        aux_params = None
        sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params)
    else:
        vec = args.pretrained.split(',')
        print('loading', vec)
        _, arg_params, aux_params = mx.model.load_checkpoint(vec[0], int(vec[1]))
        sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params)
    if args.network[0] == 's':
        data_shape_dict = {'data': (args.per_batch_size,) + data_shape}
        spherenet.init_weights(sym, data_shape_dict, args.num_layers)

    data_extra = None
    hard_mining = False
    triplet_params = None
    coco_mode = False
    if args.loss_type == 10:
        hard_mining = True
        _shape = (args.batch_size, args.per_batch_size)
        data_extra = np.full(_shape, -1.0, dtype=np.float32)
        c = 0
        while c < args.batch_size:
            a = 0
            while a < args.per_batch_size:
                b = a + args.images_per_identity
                data_extra[(c + a):(c + b), a:b] = 1.0
                # print(c+a, c+b, a, b)
                a = b
            c += args.per_batch_size
    elif args.loss_type == 11:
        data_extra = np.zeros((args.batch_size, args.per_identities), dtype=np.float32)
        c = 0
        while c < args.batch_size:
            for i in range(args.per_identities):
                data_extra[c + i][i] = 1.0
            c += args.per_batch_size
    elif args.loss_type == 12 or args.loss_type == 13:
        triplet_params = [args.triplet_bag_size, args.triplet_alpha, args.triplet_max_ap]
    elif args.loss_type == 9:
        coco_mode = True

    label_name = 'softmax_label'
    label_shape = (args.batch_size,)
    if args.output_c2c:
        label_shape = (args.batch_size, 2)
    if data_extra is None:
        model = mx.mod.Module(
            context=ctx,
            symbol=sym,
        )
    else:
        data_names = ('data', 'extra')
        # label_name = ''
        model = mx.mod.Module(
            context=ctx,
            symbol=sym,
            data_names=data_names,
            label_names=(label_name,),
        )

    if args.use_val:
        val_dataiter = FaceImageIter(
            batch_size=args.batch_size,
            data_shape=data_shape,
            path_imgrec=val_rec,
            # path_imglist         = val_path,
            shuffle=False,
            rand_mirror=False,
            mean=mean,
            ctx_num=args.ctx_num,
            data_extra=data_extra,
        )
    else:
        val_dataiter = None

    if len(data_dir_list) == 1 and args.loss_type != 12 and args.loss_type != 13:
        train_dataiter = FaceImageIter(
            batch_size=args.batch_size,
            data_shape=data_shape,
            path_imgrec=path_imgrec,
            shuffle=True,
            rand_mirror=args.rand_mirror,
            mean=mean,
            cutoff=args.cutoff,
            c2c_threshold=args.c2c_threshold,
            output_c2c=args.output_c2c,
            c2c_mode=args.c2c_mode,
            limit=args.train_limit,
            ctx_num=args.ctx_num,
            images_per_identity=args.images_per_identity,
            data_extra=data_extra,
            hard_mining=hard_mining,
            triplet_params=triplet_params,
            coco_mode=coco_mode,
            mx_model=model,
            label_name=label_name,
        )
    else:
        iter_list = []
        for _data_dir in data_dir_list:
            _path_imgrec = os.path.join(_data_dir, "train.rec")
            _dataiter = FaceImageIter(
                batch_size=args.batch_size,
                data_shape=data_shape,
                path_imgrec=_path_imgrec,
                shuffle=True,
                rand_mirror=args.rand_mirror,
                mean=mean,
                cutoff=args.cutoff,
                c2c_threshold=args.c2c_threshold,
                output_c2c=args.output_c2c,
                c2c_mode=args.c2c_mode,
                limit=args.train_limit,
                ctx_num=args.ctx_num,
                images_per_identity=args.images_per_identity,
                data_extra=data_extra,
                hard_mining=hard_mining,
                triplet_params=triplet_params,
                coco_mode=coco_mode,
                mx_model=model,
                label_name=label_name,
            )
            iter_list.append(_dataiter)
        iter_list.append(_dataiter)
        train_dataiter = FaceImageIterList(iter_list)

    if args.loss_type < 10:
        _metric = AccMetric()
    else:
        _metric = LossValueMetric()
    eval_metrics = [mx.metric.create(_metric)]

    if args.network[0] == 'r':
        initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2)  # resnet style
    elif args.network[0] == 'i' or args.network[0] == 'x':
        initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2)  # inception
    else:
        initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2)
    _rescale = 1.0 / args.ctx_num
    if args.noise_sgd > 0.0:
        print('use noise sgd')
        opt = NoiseSGD(scale=args.noise_sgd, learning_rate=base_lr, momentum=base_mom, wd=base_wd,
                       rescale_grad=_rescale)
    else:
        opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale)
    som = 20
    if args.loss_type == 12 or args.loss_type == 13:
        som = 2
    _cb = mx.callback.Speedometer(args.batch_size, som)

    ver_list = []
    ver_name_list = []
    for name in args.target.split(','):
        path = os.path.join(data_dir, name + ".bin")
        if os.path.exists(path):
            data_set = verification.load_bin(path, image_size)
            ver_list.append(data_set)
            ver_name_list.append(name)
            print('ver', name)

    def ver_test(nbatch):
        results = []
        for i in range(len(ver_list)):
            acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, 10,
                                                                               data_extra, label_shape)
            print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
            # print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1))
            print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2))
            results.append(acc2)
        return results

    def val_test():
        acc = AccMetric()
        val_metric = mx.metric.create(acc)
        val_metric.reset()
        val_dataiter.reset()
        for i, eval_batch in enumerate(val_dataiter):
            model.forward(eval_batch, is_train=False)
            model.update_metric(val_metric, eval_batch.label)
        acc_value = val_metric.get_name_value()[0][1]
        print('VACC: %f' % (acc_value))

    highest_acc = [0.0, 0.0]  # lfw and target
    # for i in range(len(ver_list)):
    #  highest_acc.append(0.0)
    global_step = [0]
    save_step = [0]
    if len(args.lr_steps) == 0:
        lr_steps = [40000, 60000, 80000]
        if args.loss_type >= 1 and args.loss_type <= 7:
            lr_steps = [100000, 140000, 160000]
        p = 512.0 / args.batch_size
        for l in range(len(lr_steps)):
            lr_steps[l] = int(lr_steps[l] * p)
    else:
        lr_steps = [int(x) for x in args.lr_steps.split(',')]
    print('lr_steps', lr_steps)

    def _batch_callback(param):
        # global global_step
        global_step[0] += 1
        mbatch = global_step[0]
        for _lr in lr_steps:
            if mbatch == args.beta_freeze + _lr:
                opt.lr *= 0.1
                print('lr change to', opt.lr)
                break

        _cb(param)
        if mbatch % 1000 == 0:
            print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch)

        if mbatch >= 0 and mbatch % args.verbose == 0:
            acc_list = ver_test(mbatch)
            save_step[0] += 1
            msave = save_step[0]
            do_save = False
            if len(acc_list) > 0:
                lfw_score = acc_list[0]
                if lfw_score > highest_acc[0]:
                    highest_acc[0] = lfw_score
                    if lfw_score >= 0.998:
                        do_save = True
                if acc_list[-1] >= highest_acc[-1]:
                    highest_acc[-1] = acc_list[-1]
                    if lfw_score >= 0.99:
                        do_save = True
            if args.ckpt == 0:
                do_save = False
            elif args.ckpt > 1:
                do_save = True
            # for i in range(len(acc_list)):
            #  acc = acc_list[i]
            #  if acc>=highest_acc[i]:
            #    highest_acc[i] = acc
            #    if lfw_score>=0.99:
            #      do_save = True
            # if args.loss_type==1 and mbatch>lr_steps[-1] and mbatch%10000==0:
            #  do_save = True
            if do_save:
                print('saving', msave)
                if val_dataiter is not None:
                    val_test()
                arg, aux = model.get_params()
                mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux)
                # if acc>=highest_acc[0]:
                #  lfw_npy = "%s-lfw-%04d" % (prefix, msave)
                #  X = np.concatenate(embeddings_list, axis=0)
                #  print('saving lfw npy', X.shape)
                #  np.save(lfw_npy, X)
            print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1]))
        if mbatch <= args.beta_freeze:
            _beta = args.beta
        else:
            move = max(0, mbatch - args.beta_freeze)
            _beta = max(args.beta_min, args.beta * math.pow(1 + args.gamma * move, -1.0 * args.power))
        # print('beta', _beta)
        os.environ['BETA'] = str(_beta)
        if args.max_steps > 0 and mbatch > args.max_steps:
            sys.exit(0)

    # epoch_cb = mx.callback.do_checkpoint(prefix, 1)
    epoch_cb = None

    # def _epoch_callback(epoch, sym, arg_params, aux_params):
    #  print('epoch-end', epoch)

    model.fit(train_dataiter,
              begin_epoch=begin_epoch,
              num_epoch=end_epoch,
              eval_data=val_dataiter,
              eval_metric=eval_metrics,
              kvstore='device',
              optimizer=opt,
              # optimizer_params   = optimizer_params,
              initializer=initializer,
              arg_params=arg_params,
              aux_params=aux_params,
              allow_missing=True,
              batch_end_callback=_batch_callback,
              epoch_end_callback=epoch_cb)
Exemple #45
0
    # sub mean
    normed_img = sample - 128
    normed_img /= 128.
    return np.reshape(normed_img, (1, 3, 299, 299))


start_time = datetime.datetime.fromtimestamp(
    time.time()).strftime('%Y-%m-%d %H:%M:%S')

prefix = "model/inception-7/Inception-7"

num_round = 1

network = model.FeedForward.load(prefix,
                                 num_round,
                                 ctx=mx.cpu(),
                                 numpy_batch_size=bs)

inner = network.symbol.get_internals()

inner_feature = inner['flatten_output']

fea_ext = model.FeedForward(ctx=mx.cpu(),
                            symbol=inner_feature,
                            numpy_batch_size=bs,
                            arg_params=network.arg_params,
                            aux_params=network.aux_params,
                            allow_extra_params=True)

# biz_ph = pd.read_csv('../data/train_id.csv')
def main():
    opt = parse_args()

    makedirs(opt.save_dir)

    filehandler = logging.FileHandler(os.path.join(opt.save_dir, opt.logging_file))
    streamhandler = logging.StreamHandler()
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)
    logger.info(opt)

    sw = SummaryWriter(logdir=opt.save_dir, flush_secs=5)

    batch_size = opt.batch_size
    classes = opt.num_classes

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    logger.info('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))
    context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers

    lr_decay = opt.lr_decay
    lr_decay_period = opt.lr_decay_period
    if opt.lr_decay_period > 0:
        lr_decay_epoch = list(range(lr_decay_period, opt.num_epochs, lr_decay_period))
    else:
        lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')]
    lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch]

    optimizer = 'sgd'
    if opt.clip_grad > 0:
        optimizer_params = {'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum, 'clip_gradient': opt.clip_grad}
    else:
        optimizer_params = {'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum}

    model_name = opt.model
    net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained,
                    tsn=opt.use_tsn, num_segments=opt.num_segments, partial_bn=opt.partial_bn)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    logger.info(net)

    if opt.resume_params is not '':
        net.load_parameters(opt.resume_params, ctx=context)

    train_data, val_data, batch_fn = get_data_loader(opt, batch_size, num_workers, logger)

    train_metric = mx.metric.Accuracy()
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)

    def test(ctx, val_data):
        acc_top1.reset()
        acc_top5.reset()
        for i, batch in enumerate(val_data):
            data, label = batch_fn(batch, ctx)
            outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        return (top1, top5)

    def train(ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]

        if opt.no_wd:
            for k, v in net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        if opt.partial_bn:
            train_patterns = None
            if 'inceptionv3' in opt.model:
                train_patterns = '.*weight|.*bias|inception30_batchnorm0_gamma|inception30_batchnorm0_beta|inception30_batchnorm0_running_mean|inception30_batchnorm0_running_var'
            else:
                logger.info('Current model does not support partial batch normalization.')
            trainer = gluon.Trainer(net.collect_params(train_patterns), optimizer, optimizer_params)
        else:
            trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

        if opt.resume_states is not '':
            trainer.load_states(opt.resume_states)

        L = gluon.loss.SoftmaxCrossEntropyLoss()

        best_val_score = 0
        lr_decay_count = 0

        for epoch in range(opt.resume_epoch, opt.num_epochs):
            tic = time.time()
            train_metric.reset()
            btic = time.time()

            if epoch == lr_decay_epoch[lr_decay_count]:
                trainer.set_learning_rate(trainer.learning_rate * lr_decay)
                lr_decay_count += 1

            for i, batch in enumerate(train_data):
                data, label = batch_fn(batch, ctx)

                with ag.record():
                    outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                    loss = [L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label)]

                for l in loss:
                    l.backward()

                trainer.step(batch_size)
                train_metric.update(label, outputs)

                if opt.log_interval and not (i+1) % opt.log_interval:
                    train_metric_name, train_metric_score = train_metric.get()
                    logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f' % (
                                epoch, i, batch_size*opt.log_interval/(time.time()-btic),
                                train_metric_name, train_metric_score*100, trainer.learning_rate))
                    btic = time.time()

            train_metric_name, train_metric_score = train_metric.get()
            throughput = int(batch_size * i /(time.time() - tic))

            acc_top1_val, acc_top5_val = test(ctx, val_data)

            logger.info('[Epoch %d] training: %s=%f'%(epoch, train_metric_name, train_metric_score*100))
            logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f'%(epoch, throughput, time.time()-tic))
            logger.info('[Epoch %d] validation: acc-top1=%f acc-top5=%f'%(epoch, acc_top1_val*100, acc_top5_val*100))

            sw.add_scalar(tag='train_acc', value=train_metric_score*100, global_step=epoch)
            sw.add_scalar(tag='valid_acc', value=acc_top1_val*100, global_step=epoch)

            if acc_top1_val > best_val_score:
                best_val_score = acc_top1_val
                if opt.use_tsn:
                    net.basenet.save_parameters('%s/%.4f-ucf101-%s-%03d-best.params'%(opt.save_dir, best_val_score, model_name, epoch))
                else:
                    net.save_parameters('%s/%.4f-ucf101-%s-%03d-best.params'%(opt.save_dir, best_val_score, model_name, epoch))
                trainer.save_states('%s/%.4f-ucf101-%s-%03d-best.states'%(opt.save_dir, best_val_score, model_name, epoch))

            if opt.save_frequency and opt.save_dir and (epoch + 1) % opt.save_frequency == 0:
                if opt.use_tsn:
                    net.basenet.save_parameters('%s/ucf101-%s-%03d.params'%(opt.save_dir, model_name, epoch))
                else:
                    net.save_parameters('%s/ucf101-%s-%03d.params'%(opt.save_dir, model_name, epoch))
                trainer.save_states('%s/ucf101-%s-%03d.states'%(opt.save_dir, model_name, epoch))

        # save the last model
        if opt.use_tsn:
            net.basenet.save_parameters('%s/ucf101-%s-%03d.params'%(opt.save_dir, model_name, opt.num_epochs-1))
        else:
            net.save_parameters('%s/ucf101-%s-%03d.params'%(opt.save_dir, model_name, opt.num_epochs-1))
        trainer.save_states('%s/ucf101-%s-%03d.states'%(opt.save_dir, model_name, opt.num_epochs-1))

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)

    train(context)
    sw.close()
#### 使用softmax cross entropy loss算法 
# Softmax和交叉熵损失函数
# softmax 回归实现  exp(Xi)/(sum(exp(Xi))) 归一化概率 使得 10类概率之和为1
# 交叉熵损失函数  将两个概率分布的负交叉熵作为目标值,最小化这个值等价于最大化这两个概率的相似度 
# 计算模型的预测能力 
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() 

### 优化模型 
# 使用随机梯度下降算法(sgd)进行训练 
# 并且将学习率的超参数设置为 .1 
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1}) 

epochs = 10 ##训练
for e in range(epochs):#每一次训练整个训练集
    train_loss = 0.# 损失
    train_acc = 0. #准确度
    for i, (data, label) in enumerate(train_data): ##训练集里的 每一批次样本和标签
        data = data.as_in_context(mx.cpu()).reshape((-1, 784)) ## 28*28 转成 1*784
        label = label.as_in_context(mx.cpu()) 
        with autograd.record(): # 自动求微分
            output = net(data)  # 模型输出 向前传播 
            loss = softmax_cross_entropy(output, label)## 计算误差
        loss.backward()     # 向后传播
        trainer.step(data.shape[0]) # 优化模型参数 data.shape[0] = batch_size 
        # Provide stats on the improvement of the model over each epoch 
        train_loss += ndarray.mean(loss).asscalar() ## 当前的误差损失 均值
        train_acc += utils.accuracy(output, label)  #准确度
    test_acc = utils.evaluate_accuracy(test_data, net)#验证数据集的准确度
    print("遍历训练集次数 {}. 训练误差: {}. 训练准确度: {}. 测试准确度: {}.".format(
        e, train_loss/len(train_data),train_acc/len(train_data), test_acc)) 
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

image, label = cifar_train[0]
plt.figure(class_names[int(label.item())])
plt.imshow(image.asnumpy())
plt.show()

#
# Get GPU num and split the data to each gpu
#
num_gpus = mx.context.num_gpus()
if num_gpus != 0:
    ctx = [mx.gpu(i) for i in range(num_gpus)]
else:
    ctx = cpu()

per_device_batch_size = 128
num_workers = 2
batch_size = per_device_batch_size * num_gpus  # laod n batches which will be splited by split_and_load

train_data = gluon.data.DataLoader(
    cifar_train.transform_first(transform_train),
    batch_size=batch_size,
    shuffle=True,
    last_batch="rollover",
    num_workers=num_workers)

val_data = gluon.data.DataLoader(cifar_test.transform_first(transform_test),
                                 batch_size=batch_size,
                                 shuffle=False,
if __name__ == "__main__":
    from data_processing import PreprocessContentImage, PreprocessStyleImage
    from data_processing import PostprocessImage, SaveImage
    vgg_params = mx.nd.load("./model/vgg19.params")
    style_weight = 2
    content_weight = 10
    long_edge = 384
    content_np = PreprocessContentImage("./input/IMG_4343.jpg", long_edge)
    style_np = PreprocessStyleImage("./input/starry_night.jpg", shape=content_np.shape)
    dshape = content_np.shape
    ctx = mx.gpu()
    # style
    style_mod = get_style_module("style", dshape, ctx, vgg_params)
    style_mod.forward(mx.io.DataBatch([mx.nd.array(style_np)], [0]), is_train=False)
    style_array = [arr.copyto(mx.cpu()) for arr in style_mod.get_outputs()]
    del style_mod
    # content
    content_mod = get_content_module("content", dshape, ctx, vgg_params)
    content_mod.forward(mx.io.DataBatch([mx.nd.array(content_np)], [0]), is_train=False)
    content_array = content_mod.get_outputs()[0].copyto(mx.cpu())
    del content_mod
    # loss
    mod, gscale = get_loss_module("loss", dshape, ctx, vgg_params)
    extra_args = {"target_gram_%d" % i : style_array[i] for i in range(len(style_array))}
    extra_args["target_content"] = content_array
    mod.set_params(extra_args, {}, True, True)
    grad_array = []
    for i in range(len(style_array)):
        grad_array.append(mx.nd.ones((1,), ctx) * (float(style_weight) / gscale[i]))
    grad_array.append(mx.nd.ones((1,), ctx) * (float(content_weight)))
Exemple #50
0
def train_net(args):
    ctx = []
    cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
    if len(cvd) > 0:
        for i in xrange(len(cvd.split(','))):
            ctx.append(mx.gpu(i))
    if len(ctx) == 0:
        ctx = [mx.cpu()]
        print('use cpu')
    else:
        print('gpu num:', len(ctx))
    prefix = os.path.join(args.models_root,
                          '%s-%s-%s' % (args.network, args.loss, args.dataset),
                          'model')
    prefix_dir = os.path.dirname(prefix)
    print('prefix', prefix)
    if not os.path.exists(prefix_dir):
        os.makedirs(prefix_dir)
    end_epoch = args.end_epoch
    args.ctx_num = len(ctx)
    args.batch_size = args.per_batch_size * args.ctx_num
    args.rescale_threshold = 0
    args.image_channel = config.image_shape[2]

    data_dir = config.dataset_path
    path_imgrec = None
    path_imglist = None
    image_size = config.image_shape[0:2]
    assert len(image_size) == 2
    assert image_size[0] == image_size[1]
    print('image_size', image_size)
    print('num_classes', config.num_classes)
    path_imgrec = os.path.join(data_dir, "train.rec")

    print('Called with argument:', args, config)
    data_shape = (args.image_channel, image_size[0], image_size[1])
    mean = None

    begin_epoch = 0
    if len(args.pretrained) == 0:
        arg_params = None
        aux_params = None
        sym = get_symbol(args)
        if config.net_name == 'spherenet':
            data_shape_dict = {'data': (args.per_batch_size, ) + data_shape}
            spherenet.init_weights(sym, data_shape_dict, args.num_layers)
    else:
        vec = args.pretrained.split(',')
        print('loading', vec)
        _, arg_params, aux_params = mx.model.load_checkpoint(
            vec[0], int(vec[1]))
        sym = get_symbol(args)

    #label_name = 'softmax_label'
    #label_shape = (args.batch_size,)
    model = mx.mod.Module(
        context=ctx,
        symbol=sym,
    )
    val_dataiter = None

    if config.loss_name.find('triplet') >= 0:
        from triplet_image_iter import FaceImageIter
        triplet_params = [
            config.triplet_bag_size, config.triplet_alpha,
            config.triplet_max_ap
        ]
        train_dataiter = FaceImageIter(
            batch_size=args.batch_size,
            data_shape=data_shape,
            path_imgrec=path_imgrec,
            shuffle=True,
            rand_mirror=args.rand_mirror,
            mean=mean,
            cutoff=args.cutoff,
            ctx_num=args.ctx_num,
            images_per_identity=config.images_per_identity,
            triplet_params=triplet_params,
            mx_model=model,
        )
        _metric = LossValueMetric()
        eval_metrics = [mx.metric.create(_metric)]
    else:
        from image_iter import FaceImageIter
        train_dataiter = FaceImageIter(
            batch_size=args.batch_size,
            data_shape=data_shape,
            path_imgrec=path_imgrec,
            shuffle=True,
            rand_mirror=args.rand_mirror,
            mean=mean,
            cutoff=args.cutoff,
            color_jittering=args.color,
            images_filter=args.images_filter,
        )
        metric1 = AccMetric()
        eval_metrics = [mx.metric.create(metric1)]
        if args.ce_loss:
            metric2 = LossValueMetric()
            eval_metrics.append(mx.metric.create(metric2))

    if config.net_name == 'fresnet' or config.net_name == 'fmobilefacenet':
        initializer = mx.init.Xavier(rnd_type='gaussian',
                                     factor_type="out",
                                     magnitude=2)  #resnet style
    else:
        initializer = mx.init.Xavier(rnd_type='uniform',
                                     factor_type="in",
                                     magnitude=2)
    #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style
    _rescale = 1.0 / args.ctx_num
    opt = optimizer.SGD(learning_rate=args.lr,
                        momentum=args.mom,
                        wd=args.wd,
                        rescale_grad=_rescale)
    _cb = mx.callback.Speedometer(args.batch_size, args.frequent)

    ver_list = []
    ver_name_list = []
    for name in config.val_targets:
        path = os.path.join(data_dir, name + ".bin")
        if os.path.exists(path):
            data_set = verification.load_bin(path, image_size)
            ver_list.append(data_set)
            ver_name_list.append(name)
            print('ver', name)

    def ver_test(nbatch):
        results = []
        for i in xrange(len(ver_list)):
            acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(
                ver_list[i], model, args.batch_size, 10, None, None)
            print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
            #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1))
            print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' %
                  (ver_name_list[i], nbatch, acc2, std2))
            results.append(acc2)
        return results

    highest_acc = [0.0, 0.0]  #lfw and target
    #for i in xrange(len(ver_list)):
    #  highest_acc.append(0.0)
    global_step = [0]
    save_step = [0]
    lr_steps = [int(x) for x in args.lr_steps.split(',')]
    print('lr_steps', lr_steps)

    def _batch_callback(param):
        #global global_step
        global_step[0] += 1
        mbatch = global_step[0]
        for step in lr_steps:
            if mbatch == step:
                opt.lr *= 0.1
                print('lr change to', opt.lr)
                break

        _cb(param)
        if mbatch % 1000 == 0:
            print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch)

        if mbatch >= 0 and mbatch % args.verbose == 0:
            acc_list = ver_test(mbatch)
            save_step[0] += 1
            msave = save_step[0]
            do_save = False
            is_highest = False
            if len(acc_list) > 0:
                #lfw_score = acc_list[0]
                #if lfw_score>highest_acc[0]:
                #  highest_acc[0] = lfw_score
                #  if lfw_score>=0.998:
                #    do_save = True
                score = sum(acc_list)
                if acc_list[-1] >= highest_acc[-1]:
                    if acc_list[-1] > highest_acc[-1]:
                        is_highest = True
                    else:
                        if score >= highest_acc[0]:
                            is_highest = True
                            highest_acc[0] = score
                    highest_acc[-1] = acc_list[-1]
                    #if lfw_score>=0.99:
                    #  do_save = True
            if is_highest:
                do_save = True
            if args.ckpt == 0:
                do_save = False
            elif args.ckpt == 2:
                do_save = True
            elif args.ckpt == 3:
                msave = 1

            if do_save:
                print('saving', msave)
                arg, aux = model.get_params()
                mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux)
            print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1]))
        if args.max_steps > 0 and mbatch > args.max_steps:
            sys.exit(0)

    epoch_cb = None
    #train_dataiter = mx.io.PrefetchingIter(train_dataiter)

    model.fit(
        train_dataiter,
        begin_epoch=begin_epoch,
        num_epoch=end_epoch,
        eval_data=val_dataiter,
        eval_metric=eval_metrics,
        kvstore='device',
        optimizer=opt,
        #optimizer_params   = optimizer_params,
        initializer=initializer,
        arg_params=arg_params,
        aux_params=aux_params,
        allow_missing=True,
        batch_end_callback=_batch_callback,
        epoch_end_callback=epoch_cb)
Exemple #51
0
def get_train_context(num_cpus, num_gpus):
    if num_gpus > 0:
        return mx.gpu()
    return mx.cpu()
 def _set_ctx(self):
     try:
         a = mx.nd.zeros((1,), ctx=mx.gpu(0))
         self.ctx = [mx.gpu(0)]
     except:
         self.ctx = [mx.cpu()]
Exemple #53
0
# shuffle data
X, y = shuffle(mnist.data, mnist.target)
# split dataset
train_data = X[:50000, :].astype('float32')
train_label = y[:50000]
val_data = X[50000: 60000, :].astype('float32')
val_label = y[50000:60000]
# Normalize data
train_data[:] /= 256.0
val_data[:] /= 256.0


batch_size = 100
# or you can use numpy iterator, which make using model easier
train_iter = mx.io.NDArrayIter(data=train_data, label=train_label, batch_size=batch_size, shuffle=True)
val_iter = mx.io.NDArrayIter(data=val_data, label=val_label, batch_size=batch_size)


logging.basicConfig(level=logging.DEBUG)

model = mx.model.FeedForward(
    ctx = mx.cpu(), symbol = mlp, num_round = 20,
    learning_rate = 0.1, momentum = 0.9, wd = 0.00001)

# train by using Numpy ndarray direcly
model.fit(X=train_data, y=train_label)

# train by using Numpy Iterator
# model.fit(X=train_iter, eval_data=val_iter)

Exemple #54
0
train_data = data[:2000]
dev_data = data[-2000:]
train_y = y[:2000]
dev_y = y[-2000:]
model_name = 'bert_12_768_12'
dataset = 'book_corpus_wiki_en_uncased'
batch_size = 32
seq_len = 64
pad = True
tr_ds = ArrayDataset(train_data, train_y)
dev_ds = ArrayDataset(dev_data, dev_y)

vectorizer = TMNTVectorizer(vocab_size=2000)
vectorizer.fit_transform(train_data)

ctx = mx.cpu()  ## or mx.gpu(N) if using GPU device=N

#num_classes = int(np.max(y) + 1)
num_classes = 0

tr_dataset, dev_dataset, num_examples, bert_base, bert_vocab, _ = get_bert_datasets(
    None,
    vectorizer,
    tr_ds,
    dev_ds,
    batch_size,
    seq_len,
    bert_model_name=model_name,
    bert_dataset=dataset,
    num_classes=num_classes,
    ctx=ctx)
Exemple #55
0
def create(style_dataset, content_dataset, style_feature=None,
        content_feature=None, max_iterations=None, model='resnet-16',
        verbose=True, batch_size = 6, **kwargs):
    """
    Create a :class:`StyleTransfer` model.

    Parameters
    ----------
    style_dataset: SFrame
        Input style images. The columns named by the ``style_feature`` parameters will
        be extracted for training the model.

    content_dataset : SFrame
        Input content images. The columns named by the ``content_feature`` parameters will
        be extracted for training the model.

    style_feature: string
        Name of the column containing the input images in style SFrame.
        'None' (the default) indicates the only image column in the style SFrame
        should be used as the feature.

    content_feature: string
        Name of the column containing the input images in content SFrame.
        'None' (the default) indicates the only image column in the content
        SFrame should be used as the feature.

    max_iterations : int
        The number of training iterations. If 'None' (the default), then it will
        be automatically determined based on the amount of data you provide.

    model : string optional
        Style transfer model to use:

            - "resnet-16" : Fast and small-sized residual network that uses
                            VGG-16 as reference network during training.

    batch_size : int, optional
        If you are getting memory errors, try decreasing this value. If you
        have a powerful computer, increasing this value may improve training
        throughput.

    verbose : bool, optional
        If True, print progress updates and model details.


    Returns
    -------
    out : StyleTransfer
        A trained :class:`StyleTransfer` model.

    See Also
    --------
    StyleTransfer

    Examples
    --------
    .. sourcecode:: python

        # Create datasets
        >>> content_dataset = turicreate.image_analysis.load_images('content_images/')
        >>> style_dataset = turicreate.image_analysis.load_images('style_images/')

        # Train a style transfer model
        >>> model = turicreate.style_transfer.create(content_dataset, style_dataset)

        # Stylize an image on all styles
        >>> stylized_images = model.stylize(data)

        # Visualize the stylized images
        >>> stylized_images.explore()

    """
    if len(style_dataset) == 0:
        raise _ToolkitError("style_dataset SFrame cannot be empty")
    if len(content_dataset) == 0:
        raise _ToolkitError("content_dataset SFrame cannot be empty")
    if(batch_size < 1):
        raise _ToolkitError("'batch_size' must be greater than or equal to 1")
    if max_iterations is not None and (not isinstance(max_iterations, int) or max_iterations < 0):
        raise _ToolkitError("'max_iterations' must be an integer greater than or equal to 0")

    from ._sframe_loader import SFrameSTIter as _SFrameSTIter
    import mxnet as _mx
    from .._mxnet import _mxnet_utils

    if style_feature is None:
        style_feature = _tkutl._find_only_image_column(style_dataset)
    
    if content_feature is None:
        content_feature = _tkutl._find_only_image_column(content_dataset)
    if verbose:
        print("Using '{}' in style_dataset as feature column and using "
              "'{}' in content_dataset as feature column".format(style_feature, content_feature))

    _raise_error_if_not_training_sframe(style_dataset, style_feature)
    _raise_error_if_not_training_sframe(content_dataset, content_feature)
    _tkutl._handle_missing_values(style_dataset, style_feature, 'style_dataset')
    _tkutl._handle_missing_values(content_dataset, content_feature, 'content_dataset')
        
    params = {
        'batch_size': batch_size,
        'vgg16_content_loss_layer': 2,  # conv3_3 layer
        'lr': 0.001,
        'content_loss_mult': 1.0,
        'style_loss_mult': [1e-4, 1e-4, 1e-4, 1e-4],  # conv 1-4 layers
        'finetune_all_params': True,
        'pretrained_weights': False,
        'print_loss_breakdown': False,
        'input_shape': (256, 256),
        'training_content_loader_type': 'stretch',
        'use_augmentation': False,
        'sequential_image_processing': False,
        # Only used if use_augmentaion is True
        'aug_resize': 0,
        'aug_min_object_covered': 0,
        'aug_rand_crop': 0.9,
        'aug_rand_pad': 0.9,
        'aug_rand_gray': 0.0,
        'aug_aspect_ratio': 1.25,
        'aug_hue': 0.05,
        'aug_brightness': 0.05,
        'aug_saturation': 0.05,
        'aug_contrast': 0.05,
        'aug_horizontal_flip': True,
        'aug_area_range': (.05, 1.5),
        'aug_pca_noise': 0.0,
        'aug_max_attempts': 20,
        'aug_inter_method': 2,
        'checkpoint': False,
        'checkpoint_prefix': 'style_transfer',
        'checkpoint_increment': 1000
    }

    if '_advanced_parameters' in kwargs:
        # Make sure no additional parameters are provided
        new_keys = set(kwargs['_advanced_parameters'].keys())
        set_keys = set(params.keys())
        unsupported = new_keys - set_keys
        if unsupported:
            raise _ToolkitError('Unknown advanced parameters: {}'.format(unsupported))

        params.update(kwargs['_advanced_parameters'])

    _content_loss_mult = params['content_loss_mult']
    _style_loss_mult = params['style_loss_mult']

    num_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=params['batch_size'])
    batch_size_each = params['batch_size'] // max(num_gpus, 1)
    batch_size = max(num_gpus, 1) * batch_size_each
    input_shape = params['input_shape']

    iterations = 0
    if max_iterations is None or max_iterations==0:
        max_iterations = len(style_dataset) * 10000
        if verbose:
            print('Setting max_iterations to be {}'.format(max_iterations))

    # data loader
    if params['use_augmentation']:
        content_loader_type = '%s-with-augmentation' % params['training_content_loader_type']
    else:
        content_loader_type = params['training_content_loader_type']

    content_images_loader = _SFrameSTIter(content_dataset, batch_size, shuffle=True,
                                  feature_column=content_feature, input_shape=input_shape,
                                  loader_type=content_loader_type, aug_params=params,
                                  sequential=params['sequential_image_processing'])
    ctx = _mxnet_utils.get_mxnet_context(max_devices=params['batch_size'])

    num_styles = len(style_dataset)

    # TRANSFORMER MODEL
    from ._model import Transformer as _Transformer
    transformer_model_path = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS[model]().get_model_path()
    transformer = _Transformer(num_styles, batch_size_each)
    transformer.collect_params().initialize(ctx=ctx)

    if params['pretrained_weights']:
        transformer.load_params(transformer_model_path, ctx, allow_missing=True)

    # For some reason, the transformer fails to hybridize for training, so we
    # avoid this until resolved
    # transformer.hybridize()

    # VGG MODEL
    from ._model import Vgg16 as _Vgg16
    vgg_model_path = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS['Vgg16']().get_model_path()
    vgg_model = _Vgg16()
    vgg_model.collect_params().initialize(ctx=ctx)
    vgg_model.load_params(vgg_model_path, ctx=ctx, ignore_extra=True)
    vgg_model.hybridize()

    # TRAINER
    from mxnet import gluon as _gluon
    from ._model import gram_matrix as _gram_matrix

    if params['finetune_all_params']:
        trainable_params = transformer.collect_params()
    else:
        trainable_params = transformer.collect_params('.*gamma|.*beta')

    trainer = _gluon.Trainer(trainable_params, 'adam', {'learning_rate': params['lr']})
    mse_loss = _gluon.loss.L2Loss()
    start_time = _time.time()
    smoothed_loss = None
    last_time = 0

    cuda_gpus = _mxnet_utils.get_gpus_in_use(max_devices=params['batch_size'])
    num_mxnet_gpus = len(cuda_gpus)

    if verbose:
        # Estimate memory usage (based on experiments)
        cuda_mem_req = 260 + batch_size_each * 880 + num_styles * 1.4

        _tkutl._print_neural_compute_device(cuda_gpus=cuda_gpus, use_mps=False,
                                            cuda_mem_req=cuda_mem_req, has_mps_impl=False)
    #
    # Pre-compute gram matrices for style images
    #
    if verbose:
        print('Analyzing visual features of the style images')

    style_images_loader = _SFrameSTIter(style_dataset, batch_size, shuffle=False, num_epochs=1,
                                        feature_column=style_feature, input_shape=input_shape,
                                        loader_type='stretch',
                                        sequential=params['sequential_image_processing'])
    num_layers = len(params['style_loss_mult'])
    gram_chunks = [[] for _ in range(num_layers)]
    for s_batch in style_images_loader:
        s_data = _gluon.utils.split_and_load(s_batch.data[0], ctx_list=ctx, batch_axis=0)
        for s in s_data:
            vgg16_s = _vgg16_data_prep(s)
            ret = vgg_model(vgg16_s)
            grams = [_gram_matrix(x) for x in ret]
            for i, gram in enumerate(grams):
                if gram.context != _mx.cpu(0):
                    gram = gram.as_in_context(_mx.cpu(0))
                gram_chunks[i].append(gram)
    del style_images_loader

    grams = [
        # The concatenated styles may be padded, so we slice overflow
        _mx.nd.concat(*chunks, dim=0)[:num_styles]
        for chunks in gram_chunks
    ]

    # A context->grams look-up table, where all the gram matrices have been
    # distributed
    ctx_grams = {}
    if ctx[0] == _mx.cpu(0):
        ctx_grams[_mx.cpu(0)] = grams
    else:
        for ctx0 in ctx:
            ctx_grams[ctx0] = [gram.as_in_context(ctx0) for gram in grams]

    style_sa = style_dataset[style_feature]
    idx_column = _tc.SArray(range(0, style_sa.shape[0]))
    style_sframe = _tc.SFrame({"style": idx_column, style_feature: style_sa})

    #
    # Training loop
    #

    vgg_content_loss_layer = params['vgg16_content_loss_layer']
    rs = _np.random.RandomState(1234)
    while iterations < max_iterations:
        content_images_loader.reset()
        for c_batch in content_images_loader:
            c_data = _gluon.utils.split_and_load(c_batch.data[0], ctx_list=ctx, batch_axis=0)

            Ls = []
            curr_content_loss = []
            curr_style_loss = []
            with _mx.autograd.record():
                for c in c_data:
                    # Randomize styles to train
                    indices = _mx.nd.array(rs.randint(num_styles, size=batch_size_each),
                                           dtype=_np.int64, ctx=c.context)

                    # Generate pastiche
                    p = transformer(c, indices)

                    # mean subtraction
                    vgg16_p = _vgg16_data_prep(p)
                    vgg16_c = _vgg16_data_prep(c)

                    # vgg forward
                    p_vgg_outputs = vgg_model(vgg16_p)

                    c_vgg_outputs = vgg_model(vgg16_c)
                    c_content_layer = c_vgg_outputs[vgg_content_loss_layer]
                    p_content_layer = p_vgg_outputs[vgg_content_loss_layer]

                    # Calculate Loss
                    # Style Loss between style image and stylized image
                    # Ls = sum of L2 norm of gram matrix of vgg16's conv layers
                    style_losses = []
                    for gram, p_vgg_output, style_loss_mult in zip(ctx_grams[c.context], p_vgg_outputs, _style_loss_mult):
                        gram_s_vgg = gram[indices]
                        gram_p_vgg = _gram_matrix(p_vgg_output)

                        style_losses.append(style_loss_mult * mse_loss(gram_s_vgg, gram_p_vgg))

                    style_loss = _mx.nd.add_n(*style_losses)

                    # Content Loss between content image and stylized image
                    # Lc = L2 norm at a single layer in vgg16
                    content_loss = _content_loss_mult * mse_loss(c_content_layer,
                                                                 p_content_layer)

                    curr_content_loss.append(content_loss)
                    curr_style_loss.append(style_loss)
                    # Divide loss by large number to get into a more legible
                    # range
                    total_loss = (content_loss + style_loss) / 10000.0
                    Ls.append(total_loss)
                for L in Ls:
                    L.backward()

            cur_loss = _np.mean([L.asnumpy()[0] for L in Ls])

            if smoothed_loss is None:
                smoothed_loss = cur_loss
            else:
                smoothed_loss = 0.9 * smoothed_loss + 0.1 * cur_loss
            iterations += 1

            if params['checkpoint'] and iterations % params['checkpoint_increment'] == 0:
                checkpoint_filename = params['checkpoint_prefix'] + "-" + str(iterations) + ".model"
                training_time = _time.time() - start_time
                state = {
                    '_model': transformer,
                    '_training_time_as_string': _seconds_as_string(training_time),
                    'batch_size': batch_size,
                    'num_styles': num_styles,
                    'model': model,
                    'input_image_shape': input_shape,
                    'styles': style_sframe,
                    'num_content_images': len(content_dataset),
                    'training_time': training_time,
                    'max_iterations': max_iterations,
                    'training_iterations': iterations,
                    'training_epochs': content_images_loader.cur_epoch,
                    'style_feature': style_feature,
                    'content_feature': content_feature,
                    "_index_column": "style",
                    'training_loss': smoothed_loss,
                }
                st_model = StyleTransfer(state)
                st_model.save(checkpoint_filename)

            trainer.step(batch_size)

            if verbose and iterations == 1:
                # Print progress table header
                column_names = ['Iteration', 'Loss', 'Elapsed Time']
                num_columns = len(column_names)
                column_width = max(map(lambda x: len(x), column_names)) + 2
                hr = '+' + '+'.join(['-' * column_width] * num_columns) + '+'
                print(hr)
                print(('| {:<{width}}' * num_columns + '|').format(*column_names, width=column_width-1))
                print(hr)

            cur_time = _time.time()
            if verbose and (cur_time > last_time + 10 or iterations == max_iterations):
                # Print progress table row
                elapsed_time = cur_time - start_time
                print("| {cur_iter:<{width}}| {loss:<{width}.3f}| {time:<{width}.1f}|".format(
                    cur_iter = iterations, loss = smoothed_loss,
                    time = elapsed_time , width = column_width-1))
                if params['print_loss_breakdown']:
                    print_content_loss = _np.mean([L.asnumpy()[0] for L in curr_content_loss])
                    print_style_loss = _np.mean([L.asnumpy()[0] for L in curr_style_loss])
                    print('Total Loss: {:6.3f} | Content Loss: {:6.3f} | Style Loss: {:6.3f}'.format(cur_loss, print_content_loss, print_style_loss))
                last_time = cur_time
            if iterations == max_iterations:
                print(hr)
                break

    training_time = _time.time() - start_time

    # Save the model state
    state = {
        '_model': transformer,
        '_training_time_as_string': _seconds_as_string(training_time),
        'batch_size': batch_size,
        'num_styles': num_styles,
        'model': model,
        'input_image_shape': input_shape,
        'styles': style_sframe,
        'num_content_images': len(content_dataset),
        'training_time': training_time,
        'max_iterations': max_iterations,
        'training_iterations': iterations,
        'training_epochs': content_images_loader.cur_epoch,
        'style_feature': style_feature,
        'content_feature': content_feature,
        "_index_column": "style",
        'training_loss': smoothed_loss,
    }

    return StyleTransfer(state)
Exemple #56
0
def run_train_translate(
        train_params: str,
        translate_params: str,
        translate_params_equiv: Optional[str],
        train_source_path: str,
        train_target_path: str,
        dev_source_path: str,
        dev_target_path: str,
        test_source_path: str,
        test_target_path: str,
        train_source_factor_paths: Optional[List[str]] = None,
        dev_source_factor_paths: Optional[List[str]] = None,
        test_source_factor_paths: Optional[List[str]] = None,
        use_prepared_data: bool = False,
        max_seq_len: int = 10,
        restrict_lexicon: bool = False,
        work_dir: Optional[str] = None,
        seed: int = 13,
        quiet: bool = False) -> Tuple[float, float, float, float]:
    """
    Train a model and translate a dev set.  Report validation perplexity and BLEU.

    :param train_params: Command line args for model training.
    :param translate_params: First command line args for translation.
    :param translate_params_equiv: Second command line args for translation. Should produce the same outputs
    :param train_source_path: Path to the source file.
    :param train_target_path: Path to the target file.
    :param dev_source_path: Path to the development source file.
    :param dev_target_path: Path to the development target file.
    :param test_source_path: Path to the test source file.
    :param test_target_path: Path to the test target file.
    :param train_source_factor_paths: Optional list of paths to training source factor files.
    :param dev_source_factor_paths: Optional list of paths to dev source factor files.
    :param test_source_factor_paths: Optional list of paths to test source factor files.
    :param use_prepared_data: Whether to use the prepared data functionality.
    :param max_seq_len: The maximum sequence length.
    :param restrict_lexicon: Additional translation run with top-k lexicon-based vocabulary restriction.
    :param work_dir: The directory to store the model and other outputs in.
    :param seed: The seed used for training.
    :param quiet: Suppress the console output of training and decoding.
    :return: A tuple containing perplexity, bleu scores for standard and reduced vocab decoding, chrf score.
    """
    if quiet:
        quiet_arg = "--quiet"
    else:
        quiet_arg = ""
    with TemporaryDirectory(dir=work_dir,
                            prefix="test_train_translate.") as work_dir:
        # Optionally create prepared data directory
        if use_prepared_data:
            prepared_data_path = os.path.join(work_dir, "prepared_data")
            params = "{} {}".format(
                sockeye.prepare_data.__file__,
                _PREPARE_DATA_COMMON.format(train_source=train_source_path,
                                            train_target=train_target_path,
                                            output=prepared_data_path,
                                            max_len=max_seq_len,
                                            quiet=quiet_arg))
            if train_source_factor_paths is not None:
                params += _TRAIN_WITH_FACTORS_COMMON.format(
                    source_factors=" ".join(train_source_factor_paths))

            logger.info("Creating prepared data folder.")
            with patch.object(sys, "argv", params.split()):
                sockeye.prepare_data.main()
            # Train model
            model_path = os.path.join(work_dir, "model")
            params = "{} {} {}".format(
                sockeye.train.__file__,
                _TRAIN_PARAMS_PREPARED_DATA_COMMON.format(
                    prepared_data=prepared_data_path,
                    dev_source=dev_source_path,
                    dev_target=dev_target_path,
                    model=model_path,
                    max_len=max_seq_len,
                    quiet=quiet_arg), train_params)

            if dev_source_factor_paths is not None:
                params += _DEV_WITH_FACTORS_COMMON.format(
                    dev_source_factors=" ".join(dev_source_factor_paths))

            logger.info("Starting training with parameters %s.", train_params)
            with patch.object(sys, "argv", params.split()):
                sockeye.train.main()
        else:
            # Train model
            model_path = os.path.join(work_dir, "model")
            params = "{} {} {}".format(
                sockeye.train.__file__,
                _TRAIN_PARAMS_COMMON.format(train_source=train_source_path,
                                            train_target=train_target_path,
                                            dev_source=dev_source_path,
                                            dev_target=dev_target_path,
                                            model=model_path,
                                            max_len=max_seq_len,
                                            seed=seed,
                                            quiet=quiet_arg), train_params)

            if train_source_factor_paths is not None:
                params += _TRAIN_WITH_FACTORS_COMMON.format(
                    source_factors=" ".join(train_source_factor_paths))
            if dev_source_factor_paths is not None:
                params += _DEV_WITH_FACTORS_COMMON.format(
                    dev_source_factors=" ".join(dev_source_factor_paths))

            logger.info("Starting training with parameters %s.", train_params)
            with patch.object(sys, "argv", params.split()):
                sockeye.train.main()

        # run checkpoint decoder on 1% of dev data
        with open(dev_source_path) as dev_fd:
            num_dev_sent = sum(1 for _ in dev_fd)
        sample_size = min(1, int(num_dev_sent * 0.01))
        cp_decoder = sockeye.checkpoint_decoder.CheckpointDecoder(
            context=mx.cpu(),
            inputs=[dev_source_path],
            references=dev_target_path,
            model=model_path,
            sample_size=sample_size,
            batch_size=2,
            beam_size=2)
        cp_metrics = cp_decoder.decode_and_evaluate()
        logger.info("Checkpoint decoder metrics: %s", cp_metrics)

        logger.info("Translating with parameters %s.", translate_params)
        # Translate corpus with the 1st params
        out_path = os.path.join(work_dir, "out.txt")
        params = "{} {} {}".format(
            sockeye.translate.__file__,
            _TRANSLATE_PARAMS_COMMON.format(model=model_path,
                                            input=test_source_path,
                                            output=out_path,
                                            quiet=quiet_arg), translate_params)

        if test_source_factor_paths is not None:
            params += _TRANSLATE_WITH_FACTORS_COMMON.format(
                input_factors=" ".join(test_source_factor_paths))

        with patch.object(sys, "argv", params.split()):
            sockeye.translate.main()

        # Translate corpus with the 2nd params
        if translate_params_equiv is not None:
            out_path_equiv = os.path.join(work_dir, "out_equiv.txt")
            params = "{} {} {}".format(
                sockeye.translate.__file__,
                _TRANSLATE_PARAMS_COMMON.format(model=model_path,
                                                input=test_source_path,
                                                output=out_path_equiv,
                                                quiet=quiet_arg),
                translate_params_equiv)

            if test_source_factor_paths is not None:
                params += _TRANSLATE_WITH_FACTORS_COMMON.format(
                    input_factors=" ".join(test_source_factor_paths))

            with patch.object(sys, "argv", params.split()):
                sockeye.translate.main()

            # read-in both outputs, ensure they are the same
            with open(out_path, 'rt') as f:
                lines = f.readlines()
            with open(out_path_equiv, 'rt') as f:
                lines_equiv = f.readlines()
            assert all(a == b for a, b in zip(lines, lines_equiv))

        # Test restrict-lexicon
        out_restrict_path = os.path.join(work_dir, "out-restrict.txt")
        if restrict_lexicon:
            # fast_align lex table
            ttable_path = os.path.join(work_dir, "ttable")
            generate_fast_align_lex(ttable_path)
            # Top-K lexicon
            lexicon_path = os.path.join(work_dir, "lexicon")
            params = "{} {}".format(
                sockeye.lexicon.__file__,
                _LEXICON_CREATE_PARAMS_COMMON.format(input=ttable_path,
                                                     model=model_path,
                                                     topk=20,
                                                     lexicon=lexicon_path,
                                                     quiet=quiet_arg))
            with patch.object(sys, "argv", params.split()):
                sockeye.lexicon.main()
            # Translate corpus with restrict-lexicon
            params = "{} {} {} {}".format(
                sockeye.translate.__file__,
                _TRANSLATE_PARAMS_COMMON.format(model=model_path,
                                                input=test_source_path,
                                                output=out_restrict_path,
                                                quiet=quiet_arg),
                translate_params,
                _TRANSLATE_PARAMS_RESTRICT.format(lexicon=lexicon_path,
                                                  topk=1))

            if test_source_factor_paths is not None:
                params += _TRANSLATE_WITH_FACTORS_COMMON.format(
                    input_factors=" ".join(test_source_factor_paths))

            with patch.object(sys, "argv", params.split()):
                sockeye.translate.main()

        # test averaging
        points = sockeye.average.find_checkpoints(model_path=model_path,
                                                  size=1,
                                                  strategy='best',
                                                  metric=C.PERPLEXITY)
        assert len(points) > 0
        averaged_params = sockeye.average.average(points)
        assert averaged_params

        # get best validation perplexity
        metrics = sockeye.utils.read_metrics_file(
            path=os.path.join(model_path, C.METRICS_NAME))
        perplexity = min(m[C.PERPLEXITY + '-val'] for m in metrics)

        with open(out_path, "r") as out:
            hypotheses = out.readlines()
        with open(test_target_path, "r") as ref:
            references = ref.readlines()
        assert len(hypotheses) == len(references)

        # compute metrics
        bleu = raw_corpus_bleu(hypotheses=hypotheses,
                               references=references,
                               offset=0.01)
        chrf = raw_corpus_chrf(hypotheses=hypotheses, references=references)

        bleu_restrict = None
        if restrict_lexicon:
            bleu_restrict = raw_corpus_bleu(hypotheses=hypotheses,
                                            references=references,
                                            offset=0.01)

        # Run BLEU cli
        eval_params = "{} {} ".format(
            sockeye.evaluate.__file__,
            _EVAL_PARAMS_COMMON.format(hypotheses=out_path,
                                       references=test_target_path,
                                       metrics="bleu chrf",
                                       quiet=quiet_arg),
        )
        with patch.object(sys, "argv", eval_params.split()):
            sockeye.evaluate.main()

        return perplexity, bleu, bleu_restrict, chrf
Exemple #57
0
                    help='directory of saved models')
parser.add_argument('--resume-from',
                    type=str,
                    help='resume training from the model')
parser.add_argument('--save-plot-dir',
                    type=str,
                    default='.',
                    help='the path to save the history plot')
opt = parser.parse_args()

batch_size = opt.batch_size
classes = 10

num_gpus = opt.num_gpus
batch_size *= max(1, num_gpus)
context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
num_workers = opt.num_workers

lr_decay = opt.lr_decay
lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] + [np.inf]

model_name = opt.model
if model_name.startswith('cifar_wideresnet'):
    kwargs = {'classes': classes, 'drop_rate': opt.drop_rate}
else:
    kwargs = {'classes': classes}
net = get_model(model_name, **kwargs)
if opt.resume_from:
    net.load_params(opt.resume_from, ctx=context)
optimizer = 'nag'
Exemple #58
0
def compare(reqObj, para, root, img_to_compare, step, image_64_decode,
            actual_img_id):
    _, model_args, model_auxs = para
    ctx = mx.cpu(0)
    symbol = lightened_cnn_b_feature()
    sub_folders = os.listdir(root)
    # print("root",root)
    # print("sub_folders",sub_folders)
    is_match_found = False
    if_class_found = False
    if (len(sub_folders) > 0):
        for folder in sub_folders:  # loop through all the files and folders
            if os.path.isdir(
                    os.path.join(root, folder)
            ):  # check whether the current object is a folder or not
                sub_folder = os.path.join(root, folder)
                # print("subfolder", sub_folder)
                classId = folder
                # print("folder",folder)

                for img in os.listdir(sub_folder):
                    imgpath = os.path.join(sub_folder, img)
                    pathB = imgpath
                    model_args['data'] = mx.nd.array(
                        read2img(root, img_to_compare, pathB, 128, ctx), ctx)
                    exector = symbol.bind(ctx,
                                          model_args,
                                          args_grad=None,
                                          grad_req="null",
                                          aux_states=model_auxs)
                    exector.forward(is_train=False)
                    exector.outputs[0].wait_to_read()
                    output = exector.outputs[0].asnumpy()
                    dis = np.dot(output[0], output[1]) / np.linalg.norm(
                        output[0]) / np.linalg.norm(output[1])
                    # print("--------Score------", dis)

                    if (dis > 0.60):
                        if step == 2:
                            # s3url = uploadImageToS3(image_64_decode,actual_img_id)
                            stoteMainImageOnDisk(
                                os.environ.get("MAIN_IMAGES_STORE_PATH"),
                                folder, image_64_decode)
                            col.generateCollage(folderPath=os.environ.get(
                                "MAIN_IMAGES_STORE_PATH") + '/' + classId,
                                                width=800,
                                                height=250,
                                                shuffle=True,
                                                classid=classId)
                            # classId = insertInImageByName(reqObj,folder,s3url)
                        is_match_found = True
                        if_class_found = True
                        # print("matched Class",classId)
                        break
                if is_match_found == True:
                    break
        if is_match_found == False and (step == 1):
            classId = storeImageOnDisk(root, img_to_compare, step)
            if_class_found = True

    elif (step == 1):
        classId = storeImageOnDisk(root, img_to_compare, step)
        if_class_found = True
    if if_class_found == True:
        # print("step",step)
        resp = {}
        resp['status'] = 'success'
        if step == 1:
            resp['classId'] = classId
            # b64 = base64.b64encode(img_to_compare)
            # b64decodestring = base64.decodestring(b64)
            # q = np.frombuffer(b64decodestring, dtype=np.float64)
            # resp['thumbnail'] = q

        if step == 2:
            resp['classId'] = classId
            # resp['image_url'] = s3url
        if step == 3:
            resp['classId'] = classId
            resp['folderPath'] = os.environ.get(
                "MAIN_IMAGES_STORE_PATH") + '/' + classId
    else:
        resp = {}
        resp['status'] = 'error'
        resp['message'] = "No Class Found"
    return resp
    parser.add_argument(
        '--pretrained',
        type=str,
        default='True',
        help=
        'Load weights from previously saved parameters. You can specify parameter file name.'
    )
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    # context list
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
    ctx = [mx.cpu()] if not ctx else ctx

    # grab some image if not specified
    if not args.images.strip():
        gcv.utils.download(
            'https://github.com/dmlc/web-data/blob/master/' +
            'gluoncv/detection/biking.jpg?raw=true', 'biking.jpg')
        image_list = ['biking.jpg']
    else:
        image_list = [x.strip() for x in args.images.split(',') if x.strip()]

    if args.pretrained.lower() in ['true', '1', 'yes', 't']:
        net = gcv.model_zoo.get_model(args.network, pretrained=True)
    else:
        net = gcv.model_zoo.get_model(args.network,
                                      pretrained=False,
Exemple #60
0
def main():
    opt = parse_args()

    filehandler = logging.FileHandler(opt.logging_file)
    streamhandler = logging.StreamHandler()

    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)

    logger.info(opt)

    batch_size = opt.batch_size
    classes = 1000
    num_training_samples = 1281167

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers

    # epoch_start_cs controls that before this epoch, use all channels, while, after this epoch, use channel selection.
    if opt.epoch_start_cs != -1:
        opt.use_all_channels = True

    lr_decay = opt.lr_decay
    lr_decay_period = opt.lr_decay_period
    if opt.lr_decay_period > 0:
        lr_decay_epoch = list(
            range(lr_decay_period, opt.num_epochs, lr_decay_period))
    else:
        lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')]
    lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch]
    num_batches = num_training_samples // batch_size

    lr_scheduler = LRSequential([
        LRScheduler('linear',
                    base_lr=0,
                    target_lr=opt.lr,
                    nepochs=opt.warmup_epochs,
                    iters_per_epoch=num_batches),
        LRScheduler(opt.lr_mode,
                    base_lr=opt.lr,
                    target_lr=0,
                    nepochs=opt.num_epochs - opt.warmup_epochs,
                    iters_per_epoch=num_batches,
                    step_epoch=lr_decay_epoch,
                    step_factor=lr_decay,
                    power=2)
    ])

    model_name = opt.model

    kwargs = {
        'ctx': context,
        'pretrained': opt.use_pretrained,
        'classes': classes
    }
    if opt.use_gn:
        from gluoncv.nn import GroupNorm
        kwargs['norm_layer'] = GroupNorm
    if model_name.startswith('vgg'):
        kwargs['batch_norm'] = opt.batch_norm
    elif model_name.startswith('resnext'):
        kwargs['use_se'] = opt.use_se

    if opt.last_gamma:
        kwargs['last_gamma'] = True

    optimizer = 'nag'
    optimizer_params = {
        'wd': opt.wd,
        'momentum': opt.momentum,
        'lr_scheduler': lr_scheduler
    }
    if opt.dtype != 'float32':
        optimizer_params['multi_precision'] = True

    if model_name == 'ShuffleNas_fixArch':
        architecture = [
            0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2
        ]
        scale_ids = [
            6, 5, 3, 5, 2, 6, 3, 4, 2, 5, 7, 5, 4, 6, 7, 4, 4, 5, 4, 3
        ]
        net = get_shufflenas_oneshot(
            architecture=architecture,
            n_class=classes,
            scale_ids=scale_ids,
            use_se=opt.use_se,
            last_conv_after_pooling=opt.last_conv_after_pooling)
    elif model_name == 'ShuffleNas':
        net = get_shufflenas_oneshot(
            n_class=classes,
            use_all_blocks=opt.use_all_blocks,
            use_se=opt.use_se,
            last_conv_after_pooling=opt.last_conv_after_pooling)
    else:
        net = get_model(model_name, **kwargs)

    net.cast(opt.dtype)
    if opt.resume_params is not '':
        net.load_parameters(opt.resume_params, ctx=context)

    # teacher model for distillation training
    if opt.teacher is not None and opt.hard_weight < 1.0:
        teacher_name = opt.teacher
        teacher = get_model(teacher_name,
                            pretrained=True,
                            classes=classes,
                            ctx=context)
        teacher.cast(opt.dtype)
        distillation = True
    else:
        distillation = False

    # Two functions for reading data from record file or raw images
    def get_data_rec(rec_train, rec_train_idx, rec_val, rec_val_idx,
                     batch_size, num_workers):
        rec_train = os.path.expanduser(rec_train)
        rec_train_idx = os.path.expanduser(rec_train_idx)
        rec_val = os.path.expanduser(rec_val)
        rec_val_idx = os.path.expanduser(rec_val_idx)
        jitter_param = 0.4
        lighting_param = 0.1
        input_size = opt.input_size
        crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
        resize = int(math.ceil(input_size / crop_ratio))
        mean_rgb = [123.68, 116.779, 103.939]
        std_rgb = [58.393, 57.12, 57.375]

        def batch_fn(batch, ctx):
            data = gluon.utils.split_and_load(batch.data[0],
                                              ctx_list=ctx,
                                              batch_axis=0)
            label = gluon.utils.split_and_load(batch.label[0],
                                               ctx_list=ctx,
                                               batch_axis=0)
            return data, label

        train_data = mx.io.ImageRecordIter(
            path_imgrec=rec_train,
            path_imgidx=rec_train_idx,
            preprocess_threads=num_workers,
            shuffle=True,
            batch_size=batch_size,
            data_shape=(3, input_size, input_size),
            mean_r=mean_rgb[0],
            mean_g=mean_rgb[1],
            mean_b=mean_rgb[2],
            std_r=std_rgb[0],
            std_g=std_rgb[1],
            std_b=std_rgb[2],
            rand_mirror=True,
            random_resized_crop=True,
            max_aspect_ratio=4. / 3.,
            min_aspect_ratio=3. / 4.,
            max_random_area=1,
            min_random_area=0.08,
            brightness=jitter_param,
            saturation=jitter_param,
            contrast=jitter_param,
            pca_noise=lighting_param,
        )
        val_data = mx.io.ImageRecordIter(
            path_imgrec=rec_val,
            path_imgidx=rec_val_idx,
            preprocess_threads=num_workers,
            shuffle=False,
            batch_size=batch_size,
            resize=resize,
            data_shape=(3, input_size, input_size),
            mean_r=mean_rgb[0],
            mean_g=mean_rgb[1],
            mean_b=mean_rgb[2],
            std_r=std_rgb[0],
            std_g=std_rgb[1],
            std_b=std_rgb[2],
        )
        return train_data, val_data, batch_fn

    def get_data_loader(data_dir, batch_size, num_workers):
        normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                         [0.229, 0.224, 0.225])
        jitter_param = 0.4
        lighting_param = 0.1
        input_size = opt.input_size
        crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
        resize = int(math.ceil(input_size / crop_ratio))

        def batch_fn(batch, ctx):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0)
            return data, label

        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=jitter_param,
                                         contrast=jitter_param,
                                         saturation=jitter_param),
            transforms.RandomLighting(lighting_param),
            transforms.ToTensor(), normalize
        ])
        transform_test = transforms.Compose([
            transforms.Resize(resize, keep_ratio=True),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(), normalize
        ])

        train_data = gluon.data.DataLoader(imagenet.classification.ImageNet(
            data_dir, train=True).transform_first(transform_train),
                                           batch_size=batch_size,
                                           shuffle=True,
                                           last_batch='discard',
                                           num_workers=num_workers)
        val_data = gluon.data.DataLoader(imagenet.classification.ImageNet(
            data_dir, train=False).transform_first(transform_test),
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers)

        return train_data, val_data, batch_fn

    if opt.use_rec:
        train_data, val_data, batch_fn = get_data_rec(opt.rec_train,
                                                      opt.rec_train_idx,
                                                      opt.rec_val,
                                                      opt.rec_val_idx,
                                                      batch_size, num_workers)
    else:
        train_data, val_data, batch_fn = get_data_loader(
            opt.data_dir, batch_size, num_workers)

    if opt.mixup:
        train_metric = mx.metric.RMSE()
    else:
        train_metric = mx.metric.Accuracy()
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)

    save_frequency = opt.save_frequency
    if opt.save_dir and save_frequency:
        save_dir = opt.save_dir
        makedirs(save_dir)
    else:
        save_dir = ''
        save_frequency = 0

    def mixup_transform(label, classes, lam=1, eta=0.0):
        if isinstance(label, nd.NDArray):
            label = [label]
        res = []
        for l in label:
            y1 = l.one_hot(classes,
                           on_value=1 - eta + eta / classes,
                           off_value=eta / classes)
            y2 = l[::-1].one_hot(classes,
                                 on_value=1 - eta + eta / classes,
                                 off_value=eta / classes)
            res.append(lam * y1 + (1 - lam) * y2)
        return res

    def smooth(label, classes, eta=0.1):
        if isinstance(label, nd.NDArray):
            label = [label]
        smoothed = []
        for l in label:
            res = l.one_hot(classes,
                            on_value=1 - eta + eta / classes,
                            off_value=eta / classes)
            smoothed.append(res)
        return smoothed

    def test(ctx, val_data, epoch):
        if opt.use_rec:
            val_data.reset()
        acc_top1.reset()
        acc_top5.reset()
        for i, batch in enumerate(val_data):
            data, label = batch_fn(batch, ctx)
            if model_name == 'ShuffleNas':
                # For evaluating validation accuracy, random select block and channels.
                block_choices = net.random_block_choices(
                    select_predefined_block=False, dtype=opt.dtype)
                if opt.cs_warm_up:
                    full_channel_mask, _ = net.random_channel_mask(
                        select_all_channels=opt.use_all_channels,
                        epoch_after_cs=epoch - opt.epoch_start_cs,
                        dtype=opt.dtype)
                else:
                    full_channel_mask, _ = net.random_channel_mask(
                        select_all_channels=opt.use_all_channels,
                        dtype=opt.dtype)
                outputs = [
                    net(X.astype(opt.dtype, copy=False), block_choices,
                        full_channel_mask) for X in data
                ]
            else:
                outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        return 1 - top1, 1 - top5

    def train(ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]
        if opt.resume_params is '':
            if 'ShuffleNas' in model_name:
                net._initialize(ctx=ctx)
            else:
                net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

        if opt.no_wd:
            for k, v in net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        trainer = gluon.Trainer(net.collect_params(), optimizer,
                                optimizer_params)
        if opt.resume_states is not '':
            trainer.load_states(opt.resume_states)

        if opt.label_smoothing or opt.mixup:
            sparse_label_loss = False
        else:
            sparse_label_loss = True
        if distillation:
            L = gcv.loss.DistillationSoftmaxCrossEntropyLoss(
                temperature=opt.temperature,
                hard_weight=opt.hard_weight,
                sparse_label=sparse_label_loss)
        else:
            L = gluon.loss.SoftmaxCrossEntropyLoss(
                sparse_label=sparse_label_loss)

        best_val_score = 1

        for epoch in range(opt.resume_epoch, opt.num_epochs):
            if epoch == opt.epoch_start_cs:
                opt.use_all_channels = False
            tic = time.time()
            if opt.use_rec:
                train_data.reset()
            train_metric.reset()
            btic = time.time()

            for i, batch in enumerate(train_data):
                data, label = batch_fn(batch, ctx)

                if opt.mixup:
                    lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha)
                    if epoch >= opt.num_epochs - opt.mixup_off_epoch:
                        lam = 1
                    data = [lam * X + (1 - lam) * X[::-1] for X in data]

                    if opt.label_smoothing:
                        eta = 0.1
                    else:
                        eta = 0.0
                    label = mixup_transform(label, classes, lam, eta)

                elif opt.label_smoothing:
                    hard_label = label
                    label = smooth(label, classes)

                if distillation:
                    teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \
                                    for X in data]

                with ag.record():
                    if model_name == 'ShuffleNas':
                        block_choices = net.random_block_choices(
                            select_predefined_block=False, dtype=opt.dtype)
                        if opt.cs_warm_up:
                            full_channel_mask, _ = net.random_channel_mask(
                                select_all_channels=opt.use_all_channels,
                                epoch_after_cs=epoch - opt.epoch_start_cs,
                                dtype=opt.dtype)
                        else:
                            full_channel_mask, _ = net.random_channel_mask(
                                select_all_channels=opt.use_all_channels,
                                dtype=opt.dtype)
                        outputs = [
                            net(X.astype(opt.dtype, copy=False), block_choices,
                                full_channel_mask) for X in data
                        ]
                    else:
                        outputs = [
                            net(X.astype(opt.dtype, copy=False)) for X in data
                        ]
                    if distillation:
                        loss = [
                            L(yhat.astype('float32', copy=False),
                              y.astype('float32', copy=False),
                              p.astype('float32', copy=False))
                            for yhat, y, p in zip(outputs, label, teacher_prob)
                        ]
                    else:
                        loss = [
                            L(yhat, y.astype(opt.dtype, copy=False))
                            for yhat, y in zip(outputs, label)
                        ]
                for l in loss:
                    l.backward()
                trainer.step(batch_size, ignore_stale_grad=True)

                if opt.mixup:
                    output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \
                                    for out in outputs]
                    train_metric.update(label, output_softmax)
                else:
                    if opt.label_smoothing:
                        train_metric.update(hard_label, outputs)
                    else:
                        train_metric.update(label, outputs)

                if opt.log_interval and not (i + 1) % opt.log_interval:
                    train_metric_name, train_metric_score = train_metric.get()
                    logger.info(
                        'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f'
                        % (epoch, i, batch_size * opt.log_interval /
                           (time.time() - btic), train_metric_name,
                           train_metric_score, trainer.learning_rate))
                    btic = time.time()

            train_metric_name, train_metric_score = train_metric.get()
            throughput = int(batch_size * i / (time.time() - tic))

            err_top1_val, err_top5_val = test(ctx, val_data, epoch)

            logger.info('[Epoch %d] training: %s=%f' %
                        (epoch, train_metric_name, train_metric_score))
            logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f' %
                        (epoch, throughput, time.time() - tic))
            logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f' %
                        (epoch, err_top1_val, err_top5_val))

            if err_top1_val < best_val_score:
                best_val_score = err_top1_val
                net.save_parameters(
                    '%s/%.4f-imagenet-%s-%d-best.params' %
                    (save_dir, best_val_score, model_name, epoch))
                trainer.save_states(
                    '%s/%.4f-imagenet-%s-%d-best.states' %
                    (save_dir, best_val_score, model_name, epoch))

            if save_frequency and save_dir and (epoch +
                                                1) % save_frequency == 0:
                net.save_parameters('%s/imagenet-%s-%d.params' %
                                    (save_dir, model_name, epoch))
                trainer.save_states('%s/imagenet-%s-%d.states' %
                                    (save_dir, model_name, epoch))

        if save_frequency and save_dir:
            net.save_parameters('%s/imagenet-%s-%d.params' %
                                (save_dir, model_name, opt.num_epochs - 1))
            trainer.save_states('%s/imagenet-%s-%d.states' %
                                (save_dir, model_name, opt.num_epochs - 1))

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
        if distillation:
            teacher.hybridize(static_alloc=True, static_shape=True)
    train(context)