def __call__(self, inputs, states): self._counter += 1 name = '%st%d_' % (self._prefix, self._counter) i2h = symbol.FullyConnected(data=inputs, weight=self._iW, bias=self._iB, num_hidden=self._num_hidden, name='%si2h' % name) h2h = symbol.FullyConnected(data=states[0], weight=self._hW, bias=self._hB, num_hidden=self._num_hidden, name='%sh2h' % name) state = i2h + h2h # customized by JG '''output = self._get_activation(i2h + h2h, self._activation, name='%sout'%name)''' # Transform output size and output value to the motion network. output = symbol.FullyConnected( data=state, weight=self._oW, bias=self._oB, num_hidden=self._num_output) # customized by JG return output, [state]
def __call__(self, inputs, reset_gate, update_gate, states): # pylint: disable=too-many-locals self._counter += 1 seq_idx = self._counter name = '%st%d_' % (self._prefix, seq_idx) prev_state_h = states[0] i2h = symbol.FullyConnected(data=inputs, weight=self._iW, bias=self._iB, num_hidden=self._num_hidden, name="%s_i2h" % name) h2h = symbol.FullyConnected(data=prev_state_h, weight=self._hW, bias=self._hB, num_hidden=self._num_hidden, name="%s_h2h" % name) if not self._use_memory: update_gate = 0 reset_gate =0 next_h_tmp = symbol.Activation(i2h + reset_gate * h2h, act_type="tanh", name="%s_h_act" % name) next_h = symbol._internal._plus((1. - update_gate) * next_h_tmp, update_gate * prev_state_h, name='%sout' % name) return next_h, [next_h]
def __call__(self, inputs, states): self._counter += 1 name = '%st%d_' % (self._prefix, self._counter) i2h = symbol.FullyConnected(data=inputs, weight=self._iW, bias=self._iB, num_hidden=self._num_hidden * 4, name='%si2h' % name) h2h = symbol.FullyConnected(data=states[0], weight=self._hW, bias=self._hB, num_hidden=self._num_hidden * 4, name='%sh2h' % name) gates = i2h + h2h slice_gates = symbol.SliceChannel(gates, num_outputs=4, name="%sslice" % name) in_gate = symbol.Activation(slice_gates[0], act_type="sigmoid", name='%si' % name) forget_gate = symbol.Activation(slice_gates[1], act_type="sigmoid", name='%sf' % name) in_transform = symbol.Activation(slice_gates[2], act_type="tanh", name='%sc' % name) out_gate = symbol.Activation(slice_gates[3], act_type="sigmoid", name='%so' % name) next_c = symbol._internal._plus(forget_gate * states[1], in_gate * in_transform, name='%sstate' % name) '''next_h = symbol._internal._mul(out_gate, symbol.Activation(next_c, act_type="tanh") , name='%sout'%name)''' next_h = out_gate * next_c # Transform output size and output value to the motion network. -value : Linear , shape =(motion_shape) output = symbol.FullyConnected(data=next_h, weight=self._oW, bias=self._oB, num_hidden=self._num_output) # customized by JG return output, [next_h, next_c]
def getsymbol(num_classes=136): # define alexnet data = mxy.Variable(name="data") label = mxy.Variable(name="label") # group 1 conv1_1 = mxy.Convolution(data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1") relu1_1 = mxy.Activation(data=conv1_1, act_type="relu", name="relu1_1") pool1 = mxy.Pooling(data=relu1_1, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool1") # group 2 conv2_1 = mxy.Convolution(data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1") relu2_1 = mxy.Activation(data=conv2_1, act_type="relu", name="relu2_1") pool2 = mxy.Pooling(data=relu2_1, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool2") # group 3 conv3_1 = mxy.Convolution(data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1") relu3_1 = mxy.Activation(data=conv3_1, act_type="relu", name="relu3_1") conv3_2 = mxy.Convolution(data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2") relu3_2 = mxy.Activation(data=conv3_2, act_type="relu", name="relu3_2") pool3 = mxy.Pooling(data=relu3_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool3") # group 4 conv4_1 = mxy.Convolution(data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1") relu4_1 = mxy.Activation(data=conv4_1, act_type="relu", name="relu4_1") conv4_2 = mxy.Convolution(data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2") relu4_2 = mxy.Activation(data=conv4_2, act_type="relu", name="relu4_2") pool4 = mxy.Pooling(data=relu4_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool4") # group 5 conv5_1 = mxy.Convolution(data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1") relu5_1 = mxy.Activation(data=conv5_1, act_type="relu", name="relu5_1") conv5_2 = mxy.Convolution(data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2") relu5_2 = mxy.Activation(data=conv5_2, act_type="relu", name="conv1_2") pool5 = mxy.Pooling(data=relu5_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool5") # group 6 flatten = mxy.Flatten(data=pool5, name="flatten") fc6 = mxy.FullyConnected(data=flatten, num_hidden=4096, name="fc6") relu6 = mxy.Activation(data=fc6, act_type="relu", name="relu6") drop6 = mxy.Dropout(data=relu6, p=0.5, name="drop6") # group 7 fc7 = mxy.FullyConnected(data=drop6, num_hidden=4096, name="fc7") relu7 = mxy.Activation(data=fc7, act_type="relu", name="relu7") drop7 = mxy.Dropout(data=relu7, p=0.5, name="drop7") # output fc8 = mxy.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8") loc_loss = mxy.LinearRegressionOutput(data=fc8, label=label, name="loc_loss") #loc_loss_ = mxy.smooth_l1(name="loc_loss_", data=(fc8 - label), scalar=1.0) #loc_loss_ = mxy.smooth_l1(name="loc_loss_", data=fc8, scalar=1.0) #loc_loss = mx.sym.MakeLoss(name='loc_loss', data=loc_loss_) return loc_loss
def __call__(self, inputs, states): # pylint: disable=too-many-locals self._counter += 1 seq_idx = self._counter name = '%st%d_' % (self._prefix, seq_idx) prev_state_h = states[0] i2h = symbol.FullyConnected(data=inputs, weight=self._iW, bias=self._iB, num_hidden=self._num_hidden * 3, name="%s_i2h" % name) h2h = symbol.FullyConnected(data=prev_state_h, weight=self._hW, bias=self._hB, num_hidden=self._num_hidden * 3, name="%s_h2h" % name) i2h_r, i2h_z, i2h = symbol.SliceChannel(i2h, num_outputs=3, name="%s_i2h_slice" % name) h2h_r, h2h_z, h2h = symbol.SliceChannel(h2h, num_outputs=3, name="%s_h2h_slice" % name) reset_gate = symbol.Activation(i2h_r + h2h_r, act_type="sigmoid", name="%s_r_act" % name) update_gate = symbol.Activation(i2h_z + h2h_z, act_type="sigmoid", name="%s_z_act" % name) next_h_tmp = symbol.Activation(i2h + reset_gate * h2h, act_type="tanh", name="%s_h_act" % name) # not applying act_type : faster than original - very very important #next_h_tmp = i2h + reset_gate * h2h # customized by JG next_h = symbol._internal._plus((1. - update_gate) * next_h_tmp, update_gate * prev_state_h, name='%sout' % name) # Transform output size and output value to the motion network. -value : Linear , shape =(motion_shape) output = symbol.FullyConnected( data=next_h, weight=self._oW, bias=self._oB, num_hidden=self._num_output) # customized by JG return output, [next_h]
def __init__(self, ntokens, rescale_loss, bptt, emsize, nhid, nlayers, dropout, num_proj, batch_size, k): out = rnn(bptt, ntokens, emsize, nhid, nlayers, dropout, num_proj, batch_size) rnn_out, self.last_states, self.lstm_args, self.state_names = out # decoder weight and bias decoder_w = S.var("decoder_weight", stype='row_sparse') decoder_b = S.var("decoder_bias", shape=(ntokens, 1), stype='row_sparse') # sampled softmax for training sample = S.var('sample', shape=(k,)) prob_sample = S.var("prob_sample", shape=(k,)) prob_target = S.var("prob_target") self.sample_names = ['sample', 'prob_sample', 'prob_target'] logits, new_targets = sampled_softmax(ntokens, k, num_proj, rnn_out, decoder_w, decoder_b, [sample, prob_sample, prob_target]) self.train_loss = cross_entropy_loss(logits, new_targets, rescale_loss=rescale_loss) # full softmax for testing eval_logits = S.FullyConnected(data=rnn_out, weight=decoder_w, num_hidden=ntokens, name='decode_fc', bias=decoder_b) label = S.Variable('label') label = S.reshape(label, shape=(-1,)) self.eval_loss = cross_entropy_loss(eval_logits, label)
def get_symbol(): data = sym.var('data') out1 = get_conv(data, 'conv1-1', 64, (2, 2)) out = get_conv(out1, 'conv1-2', 64) out = get_conv(out, 'conv1-3', 64) + out1 out1 = get_conv(out, 'conv2-1', 128, (2, 2)) out = get_conv(out1, 'conv2-2', 128) out1 = get_conv(out, 'conv2-3', 128) + out1 out = get_conv(out1, 'conv2-4', 128) out1 = get_conv(out, 'conv2-5', 128) + out1 out1 = get_conv(out1, 'conv3-1', 256, (2, 2)) out = get_conv(out1, 'conv3-2', 256) out1 = get_conv(out, 'conv3-3', 256) + out1 out = get_conv(out1, 'conv3-4', 256) out1 = get_conv(out, 'conv3-5', 256) + out1 out = get_conv(out1, 'conv3-6', 256) out1 = get_conv(out, 'conv3-7', 256) + out1 out = get_conv(out1, 'conv3-8', 256) out1 = get_conv(out, 'conv3-9', 256) + out1 out1 = get_conv(out1, 'conv4-1', 512, (2, 2)) out = get_conv(out1, 'conv4-2', 512) out = get_conv(out, 'conv4-3', 512) + out1 return sym.FullyConnected(out, name='fc5', num_hidden=512)
def __call__(self, inputs, states): self._counter += 1 name = '%st%d_' % (self._prefix, self._counter) i2h = symbol.FullyConnected(data=inputs, weight=self._iW, bias=self._iB, num_hidden=self._num_hidden, name='%si2h' % name) h2h = symbol.FullyConnected(data=states[0], weight=self._hW, bias=self._hB, num_hidden=self._num_hidden, name='%sh2h' % name) output = self._get_activation(i2h + h2h, self._activation, name='%sout' % name) return output, [output]
def sampled_softmax(num_classes, num_samples, in_dim, inputs, weight, bias, sampled_values, remove_accidental_hits=True): """ Sampled softmax via importance sampling. This under-estimates the full softmax and is only used for training. """ # inputs = (n, in_dim) sample, prob_sample, prob_target = sampled_values # (num_samples, ) sample = S.var('sample', shape=(num_samples,), dtype='float32') # (n, ) label = S.var('label') label = S.reshape(label, shape=(-1,), name="label_reshape") # (num_samples+n, ) sample_label = S.concat(sample, label, dim=0) # lookup weights and biases # (num_samples+n, dim) sample_target_w = S.sparse.Embedding(data=sample_label, weight=weight, input_dim=num_classes, output_dim=in_dim, sparse_grad=True) # (num_samples+n, 1) sample_target_b = S.sparse.Embedding(data=sample_label, weight=bias, input_dim=num_classes, output_dim=1, sparse_grad=True) # (num_samples, dim) sample_w = S.slice(sample_target_w, begin=(0, 0), end=(num_samples, None)) target_w = S.slice(sample_target_w, begin=(num_samples, 0), end=(None, None)) sample_b = S.slice(sample_target_b, begin=(0, 0), end=(num_samples, None)) target_b = S.slice(sample_target_b, begin=(num_samples, 0), end=(None, None)) # target # (n, 1) true_pred = S.sum(target_w * inputs, axis=1, keepdims=True) + target_b # samples # (n, num_samples) sample_b = S.reshape(sample_b, (-1,)) sample_pred = S.FullyConnected(inputs, weight=sample_w, bias=sample_b, num_hidden=num_samples) # remove accidental hits if remove_accidental_hits: label_v = S.reshape(label, (-1, 1)) sample_v = S.reshape(sample, (1, -1)) neg = S.broadcast_equal(label_v, sample_v) * -1e37 sample_pred = sample_pred + neg prob_sample = S.reshape(prob_sample, shape=(1, num_samples)) p_target = true_pred - S.log(prob_target) p_sample = S.broadcast_sub(sample_pred, S.log(prob_sample)) # return logits and new_labels # (n, 1+num_samples) logits = S.concat(p_target, p_sample, dim=1) new_targets = S.zeros_like(label) return logits, new_targets
def __call__(self, inputs, states): # pylint: disable=too-many-locals self._counter += 1 seq_idx = self._counter name = '%st%d_' % (self._prefix, seq_idx) prev_state_h = states[0] i2h = symbol.FullyConnected(data=inputs, weight=self._iW, bias=self._iB, num_hidden=self._num_hidden * 3, name="%s_i2h" % name) h2h = symbol.FullyConnected(data=prev_state_h, weight=self._hW, bias=self._hB, num_hidden=self._num_hidden * 3, name="%s_h2h" % name) i2h_r, i2h_z, i2h = symbol.SliceChannel(i2h, num_outputs=3, name="%s_i2h_slice" % name) h2h_r, h2h_z, h2h = symbol.SliceChannel(h2h, num_outputs=3, name="%s_h2h_slice" % name) reset_gate = symbol.Activation(i2h_r + h2h_r, act_type="sigmoid", name="%s_r_act" % name) update_gate = symbol.Activation(i2h_z + h2h_z, act_type="sigmoid", name="%s_z_act" % name) next_h_tmp = symbol.Activation(i2h + reset_gate * h2h, act_type="tanh", name="%s_h_act" % name) next_h = symbol._internal._plus((1. - update_gate) * next_h_tmp, update_gate * prev_state_h, name='%sout' % name) return next_h, [next_h]
def __call__(self, inputs, states): self._counter += 1 name = '%st%d_' % (self._prefix, self._counter) i2h = symbol.FullyConnected(data=inputs, weight=self._iW, bias=self._iB, num_hidden=self._num_hidden * 4, name='%si2h' % name) h2h = symbol.FullyConnected(data=states[0], weight=self._hW, bias=self._hB, num_hidden=self._num_hidden * 4, name='%sh2h' % name) gates = i2h + h2h slice_gates = symbol.SliceChannel(gates, num_outputs=4, name="%sslice" % name) in_gate = symbol.Activation(slice_gates[0], act_type="sigmoid", name='%si' % name) forget_gate = symbol.Activation(slice_gates[1], act_type="sigmoid", name='%sf' % name) in_transform = symbol.Activation(slice_gates[2], act_type="tanh", name='%sc' % name) out_gate = symbol.Activation(slice_gates[3], act_type="sigmoid", name='%so' % name) next_c = symbol._internal._plus(forget_gate * states[1], in_gate * in_transform, name='%sstate' % name) next_h = symbol._internal._mul(out_gate, symbol.Activation(next_c, act_type="tanh"), name='%sout' % name) return next_h, [next_h, next_c]
def get_symbol(num_classes=136, image_shape=(3, 224, 224), **kwargs): (nchannel, height, width) = image_shape # attr = {'force_mirroring': 'true'} attr = {} # data data = mxy.Variable(name="data") label = mxy.Variable(name="label") if height <= 28: # a simper version conv1 = ConvFactory(data=data, kernel=(3, 3), pad=(1, 1), name="1", num_filter=96, attr=attr) in3a = SimpleFactory(conv1, 32, 32, 'in3a', attr) in3b = SimpleFactory(in3a, 32, 48, 'in3b', attr) in3c = DownsampleFactory(in3b, 80, 'in3c', attr) in4a = SimpleFactory(in3c, 112, 48, 'in4a', attr) in4b = SimpleFactory(in4a, 96, 64, 'in4b', attr) in4c = SimpleFactory(in4b, 80, 80, 'in4c', attr) in4d = SimpleFactory(in4c, 48, 96, 'in4d', attr) in4e = DownsampleFactory(in4d, 96, 'in4e', attr) in5a = SimpleFactory(in4e, 176, 160, 'in5a', attr) in5b = SimpleFactory(in5a, 176, 160, 'in5b', attr) pool = mxy.Pooling(data=in5b, pool_type="avg", kernel=(7, 7), name="global_pool", attr=attr) else: # stage 1 conv1 = ConvFactory(data=data, num_filter=64, kernel=(7, 7), stride=(2, 2), pad=(3, 3), name='1') pool1 = mxy.Pooling(data=conv1, kernel=(3, 3), stride=(2, 2), name='pool_1', pool_type='max') # stage 2 conv2red = ConvFactory(data=pool1, num_filter=64, kernel=(1, 1), stride=(1, 1), name='2_red') conv2 = ConvFactory(data=conv2red, num_filter=192, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='2') pool2 = mxy.Pooling(data=conv2, kernel=(3, 3), stride=(2, 2), name='pool_2', pool_type='max') # stage 2 in3a = InceptionFactoryA(pool2, 64, 64, 64, 64, 96, "avg", 32, '3a') in3b = InceptionFactoryA(in3a, 64, 64, 96, 64, 96, "avg", 64, '3b') in3c = InceptionFactoryB(in3b, 128, 160, 64, 96, '3c') # stage 3 in4a = InceptionFactoryA(in3c, 224, 64, 96, 96, 128, "avg", 128, '4a') in4b = InceptionFactoryA(in4a, 192, 96, 128, 96, 128, "avg", 128, '4b') in4c = InceptionFactoryA(in4b, 160, 128, 160, 128, 160, "avg", 128, '4c') in4d = InceptionFactoryA(in4c, 96, 128, 192, 160, 192, "avg", 128, '4d') in4e = InceptionFactoryB(in4d, 128, 192, 192, 256, '4e') # stage 4 in5a = InceptionFactoryA(in4e, 352, 192, 320, 160, 224, "avg", 128, '5a') in5b = InceptionFactoryA(in5a, 352, 192, 320, 192, 224, "max", 128, '5b') # global avg pooling pool = mxy.Pooling(data=in5b, kernel=(7, 7), stride=(1, 1), name="global_pool", pool_type='avg') # linear classifier flatten = mxy.Flatten(data=pool) fc1 = mxy.FullyConnected(data=flatten, num_hidden=num_classes) loc_loss = mxy.LinearRegressionOutput(data=fc1, label=label, name="loc_loss") return loc_loss
def symbol_mlp(): data = symbol.Variable("data") first_layer = symbol.FullyConnected(data=data, num_hidden=20) second_layer = symbol.FullyConnected(data=first_layer, num_hidden=3) return data, second_layer
import mxnet as mx from mxnet import sym from mxnet import symbol net = mx.sym.Variable('data') net = mx.sym.FullyConnected(data=net, weight=net, name='fc1', num_hidden=128) net2 = symbol.FullyConnected(data=net, weight=net, name='fc1', num_hidden=128) print(sym) print(symbol) mx.viz.plot_network(symbol=net).render()