def __init__(self, ntokens, rescale_loss, bptt, emsize, nhid, nlayers, dropout, num_proj, batch_size, k): out = rnn(bptt, ntokens, emsize, nhid, nlayers, dropout, num_proj, batch_size) rnn_out, self.last_states, self.lstm_args, self.state_names = out # decoder weight and bias decoder_w = S.var("decoder_weight", stype='row_sparse') decoder_b = S.var("decoder_bias", shape=(ntokens, 1), stype='row_sparse') # sampled softmax for training sample = S.var('sample', shape=(k,)) prob_sample = S.var("prob_sample", shape=(k,)) prob_target = S.var("prob_target") self.sample_names = ['sample', 'prob_sample', 'prob_target'] logits, new_targets = sampled_softmax(ntokens, k, num_proj, rnn_out, decoder_w, decoder_b, [sample, prob_sample, prob_target]) self.train_loss = cross_entropy_loss(logits, new_targets, rescale_loss=rescale_loss) # full softmax for testing eval_logits = S.FullyConnected(data=rnn_out, weight=decoder_w, num_hidden=ntokens, name='decode_fc', bias=decoder_b) label = S.Variable('label') label = S.reshape(label, shape=(-1,)) self.eval_loss = cross_entropy_loss(eval_logits, label)
def rnn(bptt, vocab_size, num_embed, nhid, num_layers, dropout, num_proj, batch_size): """ word embedding + LSTM Projected """ state_names = [] data = S.var('data') weight = S.var("encoder_weight", stype='row_sparse') embed = S.sparse.Embedding(data=data, weight=weight, input_dim=vocab_size, output_dim=num_embed, name='embed', sparse_grad=True) states = [] outputs = S.Dropout(embed, p=dropout) for i in range(num_layers): prefix = 'lstmp%d_' % i init_h = S.var(prefix + 'init_h', shape=(batch_size, num_proj), init=mx.init.Zero()) init_c = S.var(prefix + 'init_c', shape=(batch_size, nhid), init=mx.init.Zero()) state_names += [prefix + 'init_h', prefix + 'init_c'] lstmp = mx.gluon.contrib.rnn.LSTMPCell(nhid, num_proj) outputs, next_states = lstmp.unroll(bptt, outputs, begin_state=[init_h, init_c], \ layout='NTC', merge_outputs=True) outputs = S.Dropout(outputs, p=dropout) states += [S.stop_gradient(s) for s in next_states] outputs = S.reshape(outputs, shape=(-1, num_proj)) trainable_lstm_args = [] for arg in outputs.list_arguments(): if 'lstmp' in arg and 'init' not in arg: trainable_lstm_args.append(arg) return outputs, states, trainable_lstm_args, state_names
def sampled_softmax(num_classes, num_samples, in_dim, inputs, weight, bias, sampled_values, remove_accidental_hits=True): """ Sampled softmax via importance sampling. This under-estimates the full softmax and is only used for training. """ # inputs = (n, in_dim) sample, prob_sample, prob_target = sampled_values # (num_samples, ) sample = S.var('sample', shape=(num_samples,), dtype='float32') # (n, ) label = S.var('label') label = S.reshape(label, shape=(-1,), name="label_reshape") # (num_samples+n, ) sample_label = S.concat(sample, label, dim=0) # lookup weights and biases # (num_samples+n, dim) sample_target_w = S.sparse.Embedding(data=sample_label, weight=weight, input_dim=num_classes, output_dim=in_dim, sparse_grad=True) # (num_samples+n, 1) sample_target_b = S.sparse.Embedding(data=sample_label, weight=bias, input_dim=num_classes, output_dim=1, sparse_grad=True) # (num_samples, dim) sample_w = S.slice(sample_target_w, begin=(0, 0), end=(num_samples, None)) target_w = S.slice(sample_target_w, begin=(num_samples, 0), end=(None, None)) sample_b = S.slice(sample_target_b, begin=(0, 0), end=(num_samples, None)) target_b = S.slice(sample_target_b, begin=(num_samples, 0), end=(None, None)) # target # (n, 1) true_pred = S.sum(target_w * inputs, axis=1, keepdims=True) + target_b # samples # (n, num_samples) sample_b = S.reshape(sample_b, (-1,)) sample_pred = S.FullyConnected(inputs, weight=sample_w, bias=sample_b, num_hidden=num_samples) # remove accidental hits if remove_accidental_hits: label_v = S.reshape(label, (-1, 1)) sample_v = S.reshape(sample, (1, -1)) neg = S.broadcast_equal(label_v, sample_v) * -1e37 sample_pred = sample_pred + neg prob_sample = S.reshape(prob_sample, shape=(1, num_samples)) p_target = true_pred - S.log(prob_target) p_sample = S.broadcast_sub(sample_pred, S.log(prob_sample)) # return logits and new_labels # (n, 1+num_samples) logits = S.concat(p_target, p_sample, dim=1) new_targets = S.zeros_like(label) return logits, new_targets
def cross_entropy_loss(inputs, labels, rescale_loss=1): """ cross entropy loss with a mask """ criterion = mx.gluon.loss.SoftmaxCrossEntropyLoss(weight=rescale_loss) loss = criterion(inputs, labels) mask = S.var('mask') loss = loss * S.reshape(mask, shape=(-1,)) return S.make_loss(loss.mean())
def get_symbol(): data = sym.var('data') out1 = get_conv(data, 'conv1-1', 64, (2, 2)) out = get_conv(out1, 'conv1-2', 64) out = get_conv(out, 'conv1-3', 64) + out1 out1 = get_conv(out, 'conv2-1', 128, (2, 2)) out = get_conv(out1, 'conv2-2', 128) out1 = get_conv(out, 'conv2-3', 128) + out1 out = get_conv(out1, 'conv2-4', 128) out1 = get_conv(out, 'conv2-5', 128) + out1 out1 = get_conv(out1, 'conv3-1', 256, (2, 2)) out = get_conv(out1, 'conv3-2', 256) out1 = get_conv(out, 'conv3-3', 256) + out1 out = get_conv(out1, 'conv3-4', 256) out1 = get_conv(out, 'conv3-5', 256) + out1 out = get_conv(out1, 'conv3-6', 256) out1 = get_conv(out, 'conv3-7', 256) + out1 out = get_conv(out1, 'conv3-8', 256) out1 = get_conv(out, 'conv3-9', 256) + out1 out1 = get_conv(out1, 'conv4-1', 512, (2, 2)) out = get_conv(out1, 'conv4-2', 512) out = get_conv(out, 'conv4-3', 512) + out1 return sym.FullyConnected(out, name='fc5', num_hidden=512)
def get_conv(inpt, name, num_filter, stride=(1, 1), act_type='prelu'): act_name = name.replace('conv', act_type) return sym.LeakyReLU(sym.Convolution(inpt, name=name, num_filter=num_filter, stride=stride, kernel=(3, 3), pad=(1, 1)), name=act_name, act_type=act_type, gamma=sym.var(act_name))
def debug_net(net): data = symbol.var('flow') internals = net(data).get_internals() hooks = [internals[i] for i in params.debug_nodes] new = gluon.SymbolBlock(hooks, data, params=net.collect_params()) return new
"alexnet0_conv2_fwd_output", "alexnet0_conv3_fwd_output", "alexnet0_conv4_fwd_output", "alexnet0_dense0_fwd_output", "alexnet0_dense1_fwd_output", "alexnet0_dense2_fwd_output", "alexnet0_dropout0_fwd_output", "alexnet0_dropout1_fwd_output", "alexnet0_flatten0_flatten0_output", "alexnet0_pool0_fwd_output", "alexnet0_pool1_fwd_output", "alexnet0_pool2_fwd_output" ]) # 预处理模型以及图片 net = get_model(params.model, pretrained=True) img = image.imread(params.input_pic) img = transform_eval(img, resize_short=224, crop_size=224) wxf.export(img.asnumpy(), 'input.wxf', target_format='wxf') # 列出可选的输出节点 nodes = net(symbol.var('flow')).get_internals().list_outputs() wxf.export(nodes, 'nodes.wxf', target_format='wxf') def debug_net(net): data = symbol.var('flow') internals = net(data).get_internals() hooks = [internals[i] for i in params.debug_nodes] new = gluon.SymbolBlock(hooks, data, params=net.collect_params()) return new debug = debug_net(net) ndarray = [i.asnumpy() for i in debug(img)] wxf.export(ndarray, 'debug.wxf', target_format='wxf')
kernel=(1, 1), stride=(1, 1), no_bias=True) conv2_relu = mx.symbol.Activation(name='res2a_branch2a_relu' + suffix, data=conv2, act_type='relu') emb = mx.symbol.flatten(conv2_relu) return emb im_1 = cv2.imread('/home/alex/image1.png') im_tr_1 = transform(im_1) im_tr_1 = mx.nd.array(im_tr_1, mx.сpu(0)) im_2 = cv2.imread('/home/alex/image1.png') im_tr_2 = transform(im_2) im_tr_2 = mx.nd.array(im_tr_2, mx.сpu(0)) d1 = mxs.var('data_a') emb1 = embedder(d1, '_a') infer_shape(emb1, data_a=(1, 3, 32, 32), data=None) emb1_arguments = emb1.list_arguments() emb1_arguments.pop(0) emb1_auxiliary = emb1.list_auxiliary_states() mod = mx.module.Module(emb1, ['data_a'], context=[mx.сpu(0)]) mod.bind([('data_a', (1, 3, 32, 32))]) mod.init_params() arg_params, aux_params = mod.get_params() d2 = mxs.var('data_b') emb2 = embedder(d2, '_b') infer_shape(emb2, data_b=(1, 3, 32, 32), data=None) emb2_arguments = emb2.list_arguments()
def _infer_weight_shape(op_name, data_shape, kwargs): op = getattr(symbol, op_name) sym = op(symbol.var('data', shape=data_shape), **kwargs) return sym.infer_shape_partial()[0]