Beispiel #1
0
    def __init__(self, ntokens, rescale_loss, bptt, emsize,
                 nhid, nlayers, dropout, num_proj, batch_size, k):
        out = rnn(bptt, ntokens, emsize, nhid, nlayers,
                  dropout, num_proj, batch_size)
        rnn_out, self.last_states, self.lstm_args, self.state_names = out
        # decoder weight and bias
        decoder_w = S.var("decoder_weight", stype='row_sparse')
        decoder_b = S.var("decoder_bias", shape=(ntokens, 1), stype='row_sparse')

        # sampled softmax for training
        sample = S.var('sample', shape=(k,))
        prob_sample = S.var("prob_sample", shape=(k,))
        prob_target = S.var("prob_target")
        self.sample_names = ['sample', 'prob_sample', 'prob_target']
        logits, new_targets = sampled_softmax(ntokens, k, num_proj,
                                              rnn_out, decoder_w, decoder_b,
                                              [sample, prob_sample, prob_target])
        self.train_loss = cross_entropy_loss(logits, new_targets, rescale_loss=rescale_loss)

        # full softmax for testing
        eval_logits = S.FullyConnected(data=rnn_out, weight=decoder_w,
                                       num_hidden=ntokens, name='decode_fc', bias=decoder_b)
        label = S.Variable('label')
        label = S.reshape(label, shape=(-1,))
        self.eval_loss = cross_entropy_loss(eval_logits, label)
Beispiel #2
0
def rnn(bptt, vocab_size, num_embed, nhid, num_layers, dropout, num_proj, batch_size):
    """ word embedding + LSTM Projected """
    state_names = []
    data = S.var('data')
    weight = S.var("encoder_weight", stype='row_sparse')
    embed = S.sparse.Embedding(data=data, weight=weight, input_dim=vocab_size,
                               output_dim=num_embed, name='embed', sparse_grad=True)
    states = []
    outputs = S.Dropout(embed, p=dropout)
    for i in range(num_layers):
        prefix = 'lstmp%d_' % i
        init_h = S.var(prefix + 'init_h', shape=(batch_size, num_proj), init=mx.init.Zero())
        init_c = S.var(prefix + 'init_c', shape=(batch_size, nhid), init=mx.init.Zero())
        state_names += [prefix + 'init_h', prefix + 'init_c']
        lstmp = mx.gluon.contrib.rnn.LSTMPCell(nhid, num_proj)
        outputs, next_states = lstmp.unroll(bptt, outputs, begin_state=[init_h, init_c], \
                                            layout='NTC', merge_outputs=True)
        outputs = S.Dropout(outputs, p=dropout)
        states += [S.stop_gradient(s) for s in next_states]
    outputs = S.reshape(outputs, shape=(-1, num_proj))

    trainable_lstm_args = []
    for arg in outputs.list_arguments():
        if 'lstmp' in arg and 'init' not in arg:
            trainable_lstm_args.append(arg)
    return outputs, states, trainable_lstm_args, state_names
Beispiel #3
0
def sampled_softmax(num_classes, num_samples, in_dim, inputs, weight, bias,
                    sampled_values, remove_accidental_hits=True):
        """ Sampled softmax via importance sampling.
            This under-estimates the full softmax and is only used for training.
        """
        # inputs = (n, in_dim)
        sample, prob_sample, prob_target = sampled_values

        # (num_samples, )
        sample = S.var('sample', shape=(num_samples,), dtype='float32')
        # (n, )
        label = S.var('label')
        label = S.reshape(label, shape=(-1,), name="label_reshape")
        # (num_samples+n, )
        sample_label = S.concat(sample, label, dim=0)
        # lookup weights and biases
        # (num_samples+n, dim)
        sample_target_w = S.sparse.Embedding(data=sample_label, weight=weight,
                                             input_dim=num_classes, output_dim=in_dim,
                                             sparse_grad=True)
        # (num_samples+n, 1)
        sample_target_b = S.sparse.Embedding(data=sample_label, weight=bias,
                                             input_dim=num_classes, output_dim=1,
                                             sparse_grad=True)
        # (num_samples, dim)
        sample_w = S.slice(sample_target_w, begin=(0, 0), end=(num_samples, None))
        target_w = S.slice(sample_target_w, begin=(num_samples, 0), end=(None, None))
        sample_b = S.slice(sample_target_b, begin=(0, 0), end=(num_samples, None))
        target_b = S.slice(sample_target_b, begin=(num_samples, 0), end=(None, None))

        # target
        # (n, 1)
        true_pred = S.sum(target_w * inputs, axis=1, keepdims=True) + target_b
        # samples
        # (n, num_samples)
        sample_b = S.reshape(sample_b, (-1,))
        sample_pred = S.FullyConnected(inputs, weight=sample_w, bias=sample_b,
                                       num_hidden=num_samples)

        # remove accidental hits
        if remove_accidental_hits:
            label_v = S.reshape(label, (-1, 1))
            sample_v = S.reshape(sample, (1, -1))
            neg = S.broadcast_equal(label_v, sample_v) * -1e37
            sample_pred = sample_pred + neg

        prob_sample = S.reshape(prob_sample, shape=(1, num_samples))
        p_target = true_pred - S.log(prob_target)
        p_sample = S.broadcast_sub(sample_pred, S.log(prob_sample))

        # return logits and new_labels
        # (n, 1+num_samples)
        logits = S.concat(p_target, p_sample, dim=1)
        new_targets = S.zeros_like(label)
        return logits, new_targets
Beispiel #4
0
def cross_entropy_loss(inputs, labels, rescale_loss=1):
    """ cross entropy loss with a mask """
    criterion = mx.gluon.loss.SoftmaxCrossEntropyLoss(weight=rescale_loss)
    loss = criterion(inputs, labels)
    mask = S.var('mask')
    loss = loss * S.reshape(mask, shape=(-1,))
    return S.make_loss(loss.mean())
Beispiel #5
0
def get_symbol():
    data = sym.var('data')
    out1 = get_conv(data, 'conv1-1', 64, (2, 2))
    out = get_conv(out1, 'conv1-2', 64)
    out = get_conv(out, 'conv1-3', 64) + out1

    out1 = get_conv(out, 'conv2-1', 128, (2, 2))
    out = get_conv(out1, 'conv2-2', 128)
    out1 = get_conv(out, 'conv2-3', 128) + out1

    out = get_conv(out1, 'conv2-4', 128)
    out1 = get_conv(out, 'conv2-5', 128) + out1

    out1 = get_conv(out1, 'conv3-1', 256, (2, 2))
    out = get_conv(out1, 'conv3-2', 256)
    out1 = get_conv(out, 'conv3-3', 256) + out1

    out = get_conv(out1, 'conv3-4', 256)
    out1 = get_conv(out, 'conv3-5', 256) + out1

    out = get_conv(out1, 'conv3-6', 256)
    out1 = get_conv(out, 'conv3-7', 256) + out1

    out = get_conv(out1, 'conv3-8', 256)
    out1 = get_conv(out, 'conv3-9', 256) + out1

    out1 = get_conv(out1, 'conv4-1', 512, (2, 2))
    out = get_conv(out1, 'conv4-2', 512)
    out = get_conv(out, 'conv4-3', 512) + out1

    return sym.FullyConnected(out, name='fc5', num_hidden=512)
Beispiel #6
0
def get_conv(inpt, name, num_filter, stride=(1, 1), act_type='prelu'):
    act_name = name.replace('conv', act_type)
    return sym.LeakyReLU(sym.Convolution(inpt,
                                         name=name,
                                         num_filter=num_filter,
                                         stride=stride,
                                         kernel=(3, 3),
                                         pad=(1, 1)),
                         name=act_name,
                         act_type=act_type,
                         gamma=sym.var(act_name))
Beispiel #7
0
def debug_net(net):
    data = symbol.var('flow')
    internals = net(data).get_internals()
    hooks = [internals[i] for i in params.debug_nodes]
    new = gluon.SymbolBlock(hooks, data, params=net.collect_params())
    return new
Beispiel #8
0
        "alexnet0_conv2_fwd_output", "alexnet0_conv3_fwd_output",
        "alexnet0_conv4_fwd_output", "alexnet0_dense0_fwd_output",
        "alexnet0_dense1_fwd_output", "alexnet0_dense2_fwd_output",
        "alexnet0_dropout0_fwd_output", "alexnet0_dropout1_fwd_output",
        "alexnet0_flatten0_flatten0_output", "alexnet0_pool0_fwd_output",
        "alexnet0_pool1_fwd_output", "alexnet0_pool2_fwd_output"
    ])

# 预处理模型以及图片
net = get_model(params.model, pretrained=True)
img = image.imread(params.input_pic)
img = transform_eval(img, resize_short=224, crop_size=224)
wxf.export(img.asnumpy(), 'input.wxf', target_format='wxf')

# 列出可选的输出节点
nodes = net(symbol.var('flow')).get_internals().list_outputs()
wxf.export(nodes, 'nodes.wxf', target_format='wxf')


def debug_net(net):
    data = symbol.var('flow')
    internals = net(data).get_internals()
    hooks = [internals[i] for i in params.debug_nodes]
    new = gluon.SymbolBlock(hooks, data, params=net.collect_params())
    return new


debug = debug_net(net)
ndarray = [i.asnumpy() for i in debug(img)]
wxf.export(ndarray, 'debug.wxf', target_format='wxf')
Beispiel #9
0
                                          kernel=(1, 1), stride=(1, 1), no_bias=True)
    conv2_relu = mx.symbol.Activation(name='res2a_branch2a_relu' + suffix, data=conv2, act_type='relu')
    emb = mx.symbol.flatten(conv2_relu)
    return emb


im_1 = cv2.imread('/home/alex/image1.png')
im_tr_1 = transform(im_1)
im_tr_1 = mx.nd.array(im_tr_1, mx.сpu(0))

im_2 = cv2.imread('/home/alex/image1.png')
im_tr_2 = transform(im_2)
im_tr_2 = mx.nd.array(im_tr_2, mx.сpu(0))


d1 = mxs.var('data_a')
emb1 = embedder(d1, '_a')
infer_shape(emb1, data_a=(1, 3, 32, 32), data=None)
emb1_arguments = emb1.list_arguments()
emb1_arguments.pop(0)
emb1_auxiliary = emb1.list_auxiliary_states()

mod = mx.module.Module(emb1, ['data_a'], context=[mx.сpu(0)])
mod.bind([('data_a', (1, 3, 32, 32))])
mod.init_params()
arg_params, aux_params = mod.get_params()

d2 = mxs.var('data_b')
emb2 = embedder(d2, '_b')
infer_shape(emb2, data_b=(1, 3, 32, 32), data=None)
emb2_arguments = emb2.list_arguments()
Beispiel #10
0
def _infer_weight_shape(op_name, data_shape, kwargs):
    op = getattr(symbol, op_name)
    sym = op(symbol.var('data', shape=data_shape), **kwargs)
    return sym.infer_shape_partial()[0]