def __init__(self): self.data = sym.Variable('image') self.label_box = sym.Variable('label_box') self.label_class = sym.Variable('label_class') self.label_score = sym.Variable('label_score') self.net = self.add_forward(self.data) self.error = self.add_loss(self.net)
def weight_sharing_residual_network(graph): X = symbol.Variable('data') for index, node in enumerate(graph): weight = symbol.Variable('convolution_weight_%d' % index) bias = symbol.Variable('convolution_bias_%d' % index) kwargs, activation, times = node for t in range(times): X = symbol.Convolution(data = X, weight = weight, bias = bias, **kwargs)
def batch_dot(left, right): # assert left.shape[0] == right.shape[0] and left.shape[2] == right.shape[1] left_symbol = symbol.Variable('left') right_symbol = symbol.Variable('right') result_symbol = symbol.batch_dot(left_symbol, right_symbol) shapes = {'left': left.shape, 'right': right.shape} kwargs = {'left': left, 'right': right} return Function(result_symbol, shapes)(**kwargs)
def batch_dot(left, right): # wraps mxnet.symbol.batch_dot left_symbol = symbol.Variable('left') right_symbol = symbol.Variable('right') result_symbol = symbol.batch_dot(left_symbol, right_symbol) shapes = {'left': left.shape, 'right': right.shape} kwargs = {'left': left, 'right': right} return Function(result_symbol, shapes)(**kwargs)
def getsymbol(num_classes=136): # define alexnet data = mxy.Variable(name="data") label = mxy.Variable(name="label") # group 1 conv1_1 = mxy.Convolution(data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1") relu1_1 = mxy.Activation(data=conv1_1, act_type="relu", name="relu1_1") pool1 = mxy.Pooling(data=relu1_1, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool1") # group 2 conv2_1 = mxy.Convolution(data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1") relu2_1 = mxy.Activation(data=conv2_1, act_type="relu", name="relu2_1") pool2 = mxy.Pooling(data=relu2_1, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool2") # group 3 conv3_1 = mxy.Convolution(data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1") relu3_1 = mxy.Activation(data=conv3_1, act_type="relu", name="relu3_1") conv3_2 = mxy.Convolution(data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2") relu3_2 = mxy.Activation(data=conv3_2, act_type="relu", name="relu3_2") pool3 = mxy.Pooling(data=relu3_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool3") # group 4 conv4_1 = mxy.Convolution(data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1") relu4_1 = mxy.Activation(data=conv4_1, act_type="relu", name="relu4_1") conv4_2 = mxy.Convolution(data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2") relu4_2 = mxy.Activation(data=conv4_2, act_type="relu", name="relu4_2") pool4 = mxy.Pooling(data=relu4_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool4") # group 5 conv5_1 = mxy.Convolution(data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1") relu5_1 = mxy.Activation(data=conv5_1, act_type="relu", name="relu5_1") conv5_2 = mxy.Convolution(data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2") relu5_2 = mxy.Activation(data=conv5_2, act_type="relu", name="conv1_2") pool5 = mxy.Pooling(data=relu5_2, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool5") # group 6 flatten = mxy.Flatten(data=pool5, name="flatten") fc6 = mxy.FullyConnected(data=flatten, num_hidden=4096, name="fc6") relu6 = mxy.Activation(data=fc6, act_type="relu", name="relu6") drop6 = mxy.Dropout(data=relu6, p=0.5, name="drop6") # group 7 fc7 = mxy.FullyConnected(data=drop6, num_hidden=4096, name="fc7") relu7 = mxy.Activation(data=fc7, act_type="relu", name="relu7") drop7 = mxy.Dropout(data=relu7, p=0.5, name="drop7") # output fc8 = mxy.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8") loc_loss = mxy.LinearRegressionOutput(data=fc8, label=label, name="loc_loss") #loc_loss_ = mxy.smooth_l1(name="loc_loss_", data=(fc8 - label), scalar=1.0) #loc_loss_ = mxy.smooth_l1(name="loc_loss_", data=fc8, scalar=1.0) #loc_loss = mx.sym.MakeLoss(name='loc_loss', data=loc_loss_) return loc_loss
def r_lstm(seq_len, num_hidden, C): import my_layer as ml T = seq_len cs = [S.Variable('c')] hs = [S.Variable('h')] preds = [] datas = [S.Variable('data%d' % i) for i in range(T)] param = LSTMParam(x2g_weight=S.Variable("x2g_weight"), x2g_bias=S.Variable("x2g_bias"), h2g_weight=S.Variable("h2g_weight"), h2g_bias=S.Variable("h2g_bias"), Y_weight=S.Variable("Y_weight"), Y_bias=S.Variable("Y_bias")) for t in range(T): pred, c, h = r_lstm_step(datas[t], num_hidden, C, c=cs[-1], h=hs[-1], param=param) pred = S.LogisticRegressionOutput(data=pred, name='logis%d' % t) if t != 0: useless = S.Custom(prev_data=preds[-1], this_data=pred, name='regloss', op_type='regloss') preds.append(pred) cs.append(c) hs.append(h) return S.Group(preds)
def unroll_lstm(seq_len, num_hidden, C, H, W): T = seq_len cs = [S.Variable('c')] hs = [S.Variable('h')] preds = [] datas = [S.Variable('data%d' % i) for i in range(T)] param = LSTMParam(i2h_weight=S.Variable("i2h_weight"), i2h_bias=S.Variable("i2h_bias"), h2h_weight=S.Variable("h2h_weight"), h2h_bias=S.Variable("h2h_bias"), Y_weight=S.Variable("Y_weight"), Y_bias=S.Variable("Y_bias")) for t in range(T): pred, c, h = LSTM(datas[t], num_hidden, C, H, W, c=cs[-1], h=hs[-1], param=param) pred = S.LogisticRegressionOutput(data=pred, name='logis%d' % t) preds.append(pred) cs.append(c) hs.append(h) return S.Group(preds)
def review_network(net, use_symbol=False, timing=True, num_rep=1, dir_out='', print_model_size=False): """inspect the network architecture & input - output use_symbol: set True to inspect the network in details timing: set True to estimate inference time of the network num_rep: number of inference""" # from my_func import get_model_size shape = (6, 4, 16, 160, 160) if use_symbol: x1 = symbol.Variable('x1') x2 = symbol.Variable('x2') y = net(x1, x2) if print_model_size: get_model_size(y, to_print=False) viz.plot_network(y, shape={ 'x1': shape, 'x2': shape }, node_attrs={ "fixedsize": "false" }).view('%sDenseMultipathNet' % dir_out) else: x1 = nd.random_normal(0.1, 0.02, shape=shape, ctx=ctx) x2 = nd.random_normal(0.1, 0.02, shape=shape, ctx=ctx) net.collect_params().initialize(initializer.Xavier(magnitude=2), ctx=ctx) net.hybridize(static_alloc=True, static_shape=True) if timing: s1 = time.time() y = net(x1, x2) y.wait_to_read() print("First run: %.5f" % (time.time() - s1)) import numpy as np times = np.zeros(num_rep) for t in range(num_rep): x = nd.random_normal(0.1, 0.02, shape=shape, ctx=ctx) s2 = time.time() y = net(x1, x2) y.wait_to_read() times[t] = time.time() - s2 print("Run with hybrid network: %.5f" % times.mean()) else: y = net(x) print("Input size: ", x.shape) print("Output size: ", y.shape)
def __init__(self, ntokens, rescale_loss, bptt, emsize, nhid, nlayers, dropout, num_proj, batch_size, k): out = rnn(bptt, ntokens, emsize, nhid, nlayers, dropout, num_proj, batch_size) rnn_out, self.last_states, self.lstm_args, self.state_names = out # decoder weight and bias decoder_w = S.var("decoder_weight", stype='row_sparse') decoder_b = S.var("decoder_bias", shape=(ntokens, 1), stype='row_sparse') # sampled softmax for training sample = S.var('sample', shape=(k,)) prob_sample = S.var("prob_sample", shape=(k,)) prob_target = S.var("prob_target") self.sample_names = ['sample', 'prob_sample', 'prob_target'] logits, new_targets = sampled_softmax(ntokens, k, num_proj, rnn_out, decoder_w, decoder_b, [sample, prob_sample, prob_target]) self.train_loss = cross_entropy_loss(logits, new_targets, rescale_loss=rescale_loss) # full softmax for testing eval_logits = S.FullyConnected(data=rnn_out, weight=decoder_w, num_hidden=ntokens, name='decode_fc', bias=decoder_b) label = S.Variable('label') label = S.reshape(label, shape=(-1,)) self.eval_loss = cross_entropy_loss(eval_logits, label)
def tmpnet(): x = sym.Variable('data') y = sym.Convolution(x, kernel=(3, 3), num_filter=32) y = sym.Activation(y, 'relu') y = sym.Convolution(y, kernel=(3, 3), num_filter=64, stride=(2, 2), num_group=2) y = sym.softmax(y) return y
def get(self, name, **kwargs): """Get the variable given a name if one exists or create a new one if missing. Parameters ---------- name : str name of the variable **kwargs : more arguments that's passed to symbol.Variable """ name = self._prefix + name if name not in self._params: self._params[name] = symbol.Variable(name, **kwargs) return self._params[name]
def create_symbol(symbol_name, net_define_json): symbol = None try: net_definition = json.loads(net_define_json) for component in net_definition: if component['type'] == 'Input': component_name = component['name'] symbol = sym.Variable(component_name) symbol_json_store_root = mxserver_storage_config['symbol-json-root'] if not os.path.exists(symbol_json_store_root): os.mkdir(symbol_json_store_root) symbol_store_path = symbol_json_store_root + symbol_name symbol.save(fname=symbol_store_path) return symbol_store_path except StandardError: return None
def siamese(): labels = mxs.Variable(name='label') flat_a, flat_b = siamese_simp_net() distance = mxs.sqrt(mxs.sum(mxs.square(flat_a - flat_b), axis=1)) cl1 = labels * mxs.square(distance) cl2 = (1 - labels) * mxs.square(mxs.maximum(1 - distance, 0)) contrastive_loss = mxs.MakeLoss(mxs.mean(cl1 + cl2)) distance_output = mxs.BlockGrad(distance, name='distance') flat_a_output = mxs.BlockGrad(flat_a) flat_b_output = mxs.BlockGrad(flat_b) sym = mx.sym.Group( [contrastive_loss, distance_output, flat_a_output, flat_b_output]) mod = mx.mod.Module(symbol=sym, context=mx.gpu(), data_names=['data_a', 'data_b'], label_names=['label']) return mod
def lstm_unroll(num_lstm_layer, seq_len, num_hidden, num_label, dropout=0.): # embed_weight = mx.sym.Variable("embed_weight") cls_weight = mx.sym.Variable("cls_weight") cls_bias = mx.sym.Variable("cls_bias") param_cells = [] last_states = [] pred_all = [] for i in range(num_lstm_layer): param_cells.append( LSTMParam(i2h_weight=mx.sym.Variable("l%d_i2h_weight" % i), i2h_bias=mx.sym.Variable("l%d_i2h_bias" % i), h2h_weight=mx.sym.Variable("l%d_h2h_weight" % i), h2h_bias=mx.sym.Variable("l%d_h2h_bias" % i))) state = LSTMState(c=mx.sym.Variable("l%d_init_c" % i), h=mx.sym.Variable("l%d_init_h" % i)) last_states.append(state) assert (len(last_states) == num_lstm_layer) # embeding layer data = mx.sym.Variable('data') label = mx.sym.Variable('softmax_label') timeseq = mx.sym.SliceChannel(data=data, num_outputs=seq_len, squeeze_axis=1) labelseq = mx.sym.SliceChannel(data=label, num_outputs=seq_len, squeeze_axis=1) # CNN param layer_num = 10 P = [] for i in range(layer_num): P.append(S.Variable('c%d_weight' % i)) P.append(S.Variable('c%d_bias' % i)) P.append(S.Variable('bn%d_gamma' % i)) P.append(S.Variable('bn%d_beta' % i)) up_num = 3 D = [] for i in range(up_num): D.append(S.Variable('deconv%d_weight' % i)) # D.append( S.Variable('deconv%d_bias'%i) ) for seqidx in range(seq_len): hidden = timeseq[seqidx] # embed in CNN hidden = cnn_forward(hidden, P, D) # stack LSTM for i in range(num_lstm_layer): if i == 0: dp_ratio = 0. else: dp_ratio = dropout next_state = lstm(num_hidden, indata=hidden, prev_state=last_states[i], param=param_cells[i], seqidx=seqidx, layeridx=i, dropout=dp_ratio) hidden = next_state.h last_states[i] = next_state # decoder if dropout > 0.: hidden = mx.sym.Dropout(data=hidden, p=dropout) #hidden_all.append(hidden) pred = mx.sym.Convolution(data=hidden, weight=cls_weight, bias=cls_bias, name='pred%d' % seqidx, kernel=(1, 1), num_filter=num_label, pad=(0, 0)) pred = mx.sym.LogisticRegressionOutput(data=pred, label=labelseq[seqidx], name='logis%d' % seqidx) pred_all.append(pred) # hidden_concat = mx.sym.Concat(*hidden_all, dim=0) # pred = mx.sym.FullyConnected(data=hidden_concat, num_hidden=num_label, # weight=cls_weight, bias=cls_bias, name='pred') ################################################################################ # Make label the same shape as our produced data path # I did not observe big speed difference between the following two ways # label = mx.sym.transpose(data=label) # label = mx.sym.Reshape(data=label, target_shape=(0,)) #label_slice = mx.sym.SliceChannel(data=label, num_outputs=seq_len) #label = [label_slice[t] for t in range(seq_len)] #label = mx.sym.Concat(*label, dim=0) #label = mx.sym.Reshape(data=label, target_shape=(0,)) ################################################################################ # sm = mx.sym.LogisticRegressionOutput(data=pred, label=label, name='softmax') return mx.sym.Group(pred_all)
def get_symbol(num_classes=136, image_shape=(3, 224, 224), **kwargs): (nchannel, height, width) = image_shape # attr = {'force_mirroring': 'true'} attr = {} # data data = mxy.Variable(name="data") label = mxy.Variable(name="label") if height <= 28: # a simper version conv1 = ConvFactory(data=data, kernel=(3, 3), pad=(1, 1), name="1", num_filter=96, attr=attr) in3a = SimpleFactory(conv1, 32, 32, 'in3a', attr) in3b = SimpleFactory(in3a, 32, 48, 'in3b', attr) in3c = DownsampleFactory(in3b, 80, 'in3c', attr) in4a = SimpleFactory(in3c, 112, 48, 'in4a', attr) in4b = SimpleFactory(in4a, 96, 64, 'in4b', attr) in4c = SimpleFactory(in4b, 80, 80, 'in4c', attr) in4d = SimpleFactory(in4c, 48, 96, 'in4d', attr) in4e = DownsampleFactory(in4d, 96, 'in4e', attr) in5a = SimpleFactory(in4e, 176, 160, 'in5a', attr) in5b = SimpleFactory(in5a, 176, 160, 'in5b', attr) pool = mxy.Pooling(data=in5b, pool_type="avg", kernel=(7, 7), name="global_pool", attr=attr) else: # stage 1 conv1 = ConvFactory(data=data, num_filter=64, kernel=(7, 7), stride=(2, 2), pad=(3, 3), name='1') pool1 = mxy.Pooling(data=conv1, kernel=(3, 3), stride=(2, 2), name='pool_1', pool_type='max') # stage 2 conv2red = ConvFactory(data=pool1, num_filter=64, kernel=(1, 1), stride=(1, 1), name='2_red') conv2 = ConvFactory(data=conv2red, num_filter=192, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='2') pool2 = mxy.Pooling(data=conv2, kernel=(3, 3), stride=(2, 2), name='pool_2', pool_type='max') # stage 2 in3a = InceptionFactoryA(pool2, 64, 64, 64, 64, 96, "avg", 32, '3a') in3b = InceptionFactoryA(in3a, 64, 64, 96, 64, 96, "avg", 64, '3b') in3c = InceptionFactoryB(in3b, 128, 160, 64, 96, '3c') # stage 3 in4a = InceptionFactoryA(in3c, 224, 64, 96, 96, 128, "avg", 128, '4a') in4b = InceptionFactoryA(in4a, 192, 96, 128, 96, 128, "avg", 128, '4b') in4c = InceptionFactoryA(in4b, 160, 128, 160, 128, 160, "avg", 128, '4c') in4d = InceptionFactoryA(in4c, 96, 128, 192, 160, 192, "avg", 128, '4d') in4e = InceptionFactoryB(in4d, 128, 192, 192, 256, '4e') # stage 4 in5a = InceptionFactoryA(in4e, 352, 192, 320, 160, 224, "avg", 128, '5a') in5b = InceptionFactoryA(in5a, 352, 192, 320, 192, 224, "max", 128, '5b') # global avg pooling pool = mxy.Pooling(data=in5b, kernel=(7, 7), stride=(1, 1), name="global_pool", pool_type='avg') # linear classifier flatten = mxy.Flatten(data=pool) fc1 = mxy.FullyConnected(data=flatten, num_hidden=num_classes) loc_loss = mxy.LinearRegressionOutput(data=fc1, label=label, name="loc_loss") return loc_loss
def r_lstm_step(X, num_hidden, C, c=None, h=None, idx='', param=None): if not isinstance(idx, str): idx = str(idx) if not c: c = mx.sym.Variable(name='c%s' % idx) if not h: h = mx.sym.Variable(name='h%s' % idx) if not param: param = LSTMParam(x2g_weight=S.Variable("x2g_weight"), x2g_bias=S.Variable("x2g_bias"), h2g_weight=S.Variable("h2g_weight"), h2g_bias=S.Variable("h2g_bias"), Y_weight=S.Variable("Y_weight"), Y_bias=S.Variable("Y_bias")) x2g = S.Convolution(name='x2g%s' % idx, data=X, weight=param.x2g_weight, bias=param.x2g_bias, kernel=(5, 5), num_filter=num_hidden * 4, pad=(2, 2)) h2g = S.Convolution(name='h2g%s' % idx, data=h, weight=param.h2g_weight, bias=param.h2g_bias, kernel=(5, 5), num_filter=num_hidden * 4, pad=(2, 2)) gates = x2g + h2g slice_gates = mx.sym.SliceChannel(gates, num_outputs=4, name='rnn_slice%s' % idx) in_gate = mx.sym.Activation(slice_gates[0], act_type="sigmoid", name='in_gate%s' % idx) in_transform = mx.sym.Activation(slice_gates[1], act_type="tanh", name='in_transform%s' % idx) forget_gate = mx.sym.Activation(slice_gates[2], act_type="sigmoid", name='forget_gate%s' % idx) out_gate = mx.sym.Activation(slice_gates[3], act_type="sigmoid", name='out_gate%s' % idx) c_this = (forget_gate * c) + (in_gate * in_transform) h_this = out_gate * mx.sym.Activation( c_this, act_type="tanh", name='tanh2h%s' % idx) fc = S.Convolution(name='Y%s' % idx, data=h_this, weight=param.Y_weight, bias=param.Y_bias, kernel=(1, 1), num_filter=C, pad=(0, 0)) c_this = mx.sym.BlockGrad(data=c_this) h_this = mx.sym.BlockGrad(data=h_this) return fc, c_this, h_this
def siamese_simp_net(): def conv_bn_relu_pool_siamese(input_a, input_b, kernel, num_filter, pad, stride, name_postfix, use_pooling=False, p_kernel=None, p_stride=None, use_batch_norm=True): conv_weight = mxs.Variable(name='conv' + name_postfix + '_weight') conv_bias = mxs.Variable(name='conv' + name_postfix + '_bias') conv_a = mxs.Convolution(data=input_a, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride, name='conv' + name_postfix + "_a", weight=conv_weight, bias=conv_bias) conv_b = mxs.Convolution(data=input_b, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride, name='conv' + name_postfix + "_b", weight=conv_weight, bias=conv_bias) if use_batch_norm: bn_gamma = mxs.Variable(name='bn' + name_postfix + '_gamma') bn_beta = mxs.Variable(name='bn' + name_postfix + '_beta') bn_moving_mean = mxs.Variable(name='bn' + name_postfix + '_moving_mean') bn_moving_var = mxs.Variable(name='bn' + name_postfix + '_moving_var') batch_norm_a = mxs.BatchNorm(data=conv_a, name='bn' + name_postfix + '_a', gamma=bn_gamma, beta=bn_beta, moving_mean=bn_moving_mean, moving_var=bn_moving_var) batch_norm_b = mxs.BatchNorm(data=conv_b, name='bn' + name_postfix + '_b', gamma=bn_gamma, beta=bn_beta, moving_mean=bn_moving_mean, moving_var=bn_moving_var) else: batch_norm_a = conv_a batch_norm_b = conv_b relu_a = mxs.relu(data=batch_norm_a, name='relu' + name_postfix + '_a') relu_b = mxs.relu(data=batch_norm_b, name='relu' + name_postfix + '_b') if use_pooling: out_a = mxs.Pooling(data=relu_a, kernel=p_kernel, pool_type='max', stride=p_stride, name='pool' + name_postfix + '_a') out_b = mxs.Pooling(data=relu_b, kernel=p_kernel, pool_type='max', stride=p_stride, name='pool' + name_postfix + '_b') else: out_a = relu_a out_b = relu_b return out_a, out_b data_a = mxs.Variable('data_a') data_b = mxs.Variable('data_b') c1_a, c1_b = conv_bn_relu_pool_siamese(data_a, data_b, kernel=(3, 3), num_filter=64, pad=(1, 1), stride=(1, 1), name_postfix='1', use_pooling=False) c1_0_a, c1_0_b = conv_bn_relu_pool_siamese(c1_a, c1_b, kernel=(3, 3), num_filter=32, pad=(1, 1), stride=(1, 1), name_postfix='1_0', use_pooling=False) c2_a, c2_b = conv_bn_relu_pool_siamese(c1_0_a, c1_0_b, kernel=(3, 3), num_filter=32, pad=(1, 1), stride=(1, 1), name_postfix='2', use_pooling=False) c2_1_a, c2_1_b = conv_bn_relu_pool_siamese(c2_a, c2_b, kernel=(3, 3), num_filter=32, pad=(1, 1), stride=(1, 1), name_postfix='2_1', use_pooling=True, p_kernel=(2, 2), p_stride=(2, 2)) c2_2_a, c2_2_b = conv_bn_relu_pool_siamese(c2_1_a, c2_1_b, kernel=(3, 3), num_filter=32, pad=(1, 1), stride=(1, 1), name_postfix='2_2', use_pooling=False) c3_a, c3_b = conv_bn_relu_pool_siamese(c2_2_a, c2_2_b, kernel=(3, 3), num_filter=32, pad=(1, 1), stride=(1, 1), name_postfix='3', use_pooling=False) # conv4 conv4_weight = mxs.Variable(name='conv4_weight') conv4_bias = mxs.Variable(name='conv4_bias') conv4_a = mxs.Convolution(data=c3_a, kernel=(3, 3), num_filter=64, pad=(1, 1), stride=(1, 1), name='conv4_a', weight=conv4_weight, bias=conv4_bias) # xavier conv4_b = mxs.Convolution(data=c3_b, kernel=(3, 3), num_filter=64, pad=(1, 1), stride=(1, 1), name='conv4_b', weight=conv4_weight, bias=conv4_bias) # xavier maxp4_a = mxs.Pooling(data=conv4_a, kernel=(2, 2), pool_type='max', stride=(2, 2), name='pool4_a') maxp4_b = mxs.Pooling(data=conv4_b, kernel=(2, 2), pool_type='max', stride=(2, 2), name='pool4_b') bn4_gamma = mxs.Variable(name='bn4_gamma') bn4_beta = mxs.Variable(name='bn4_beta') bn4_moving_mean = mxs.Variable(name='bn4_moving_mean') bn4_moving_var = mxs.Variable(name='bn4_moving_var') batch_norm_4_a = mxs.BatchNorm(data=maxp4_a, name='bn4_a', gamma=bn4_gamma, beta=bn4_beta, moving_mean=bn4_moving_mean, moving_var=bn4_moving_var) batch_norm_4_b = mxs.BatchNorm(data=maxp4_b, name='bn4_b', gamma=bn4_gamma, beta=bn4_beta, moving_mean=bn4_moving_mean, moving_var=bn4_moving_var) relu4_a = mxs.relu(data=batch_norm_4_a, name='relu4') relu4_b = mxs.relu(data=batch_norm_4_b, name='relu4') c4_1_a, c4_1_b = conv_bn_relu_pool_siamese(relu4_a, relu4_b, kernel=(3, 3), num_filter=64, pad=(1, 1), stride=(1, 1), name_postfix='4_1', use_pooling=False) c4_2_a, c4_2_b = conv_bn_relu_pool_siamese(c4_1_a, c4_1_b, kernel=(3, 3), num_filter=64, pad=(1, 1), stride=(1, 1), name_postfix='4_2', use_pooling=True, p_kernel=(2, 2), p_stride=(2, 2)) c4_0_a, c4_0_b = conv_bn_relu_pool_siamese(c4_2_a, c4_2_b, kernel=(3, 3), num_filter=128, pad=(1, 1), stride=(1, 1), name_postfix='4_0', use_pooling=False) cccp4_a, cccp4_b = conv_bn_relu_pool_siamese(c4_0_a, c4_0_b, kernel=(1, 1), num_filter=256, pad=[], stride=(1, 1), name_postfix='_cccp4', use_pooling=False, use_batch_norm=False) cccp5_a, cccp5_b = conv_bn_relu_pool_siamese(cccp4_a, cccp4_b, kernel=(1, 1), num_filter=64, pad=[], stride=(1, 1), name_postfix='_cccp5', use_pooling=True, p_kernel=(2, 2), p_stride=(2, 2), use_batch_norm=False) cccp6_a, cccp6_b = conv_bn_relu_pool_siamese(cccp5_a, cccp5_b, kernel=(3, 3), num_filter=64, pad=(2, 2), stride=(1, 1), name_postfix='_cccp6', use_pooling=False, use_batch_norm=False) flat_a = mxs.flatten(cccp6_a) flat_b = mxs.flatten(cccp6_b) return flat_a, flat_b
import ipt import mxnet as mx import mxnet.symbol as S from my_layer import * data =S.Variable(name='data') #256,256 l1_1 = conv_relu('1', data=data, kernel=(5,5), num_filter=32, pad=(2,2)) l1_2 = conv_relu('2', data=l1_1, kernel=(3,3), num_filter=32, pad=(1,1)) l1_3 = conv_relu('3', data=l1_2, kernel=(3,3), num_filter=64, pad=(1,1), bn=True) # try overlap in future p1 = maxpool(l1_3) #128,128 l2_1 = conv_relu('4', data=p1, kernel=(3,3), num_filter=64, pad=(1,1)) l2_2 = conv_relu('5', data=l2_1, kernel=(3,3), num_filter=64, pad=(1,1)) l2_3 = conv_relu('6', data=l2_2, kernel=(3,3), num_filter=64, pad=(1,1), bn=True) p2 = maxpool(l2_3) p1_5 = maxpool(p1) p2 = p2+p1_5 #64,64 l3_1 = conv_relu('7', data=p2, kernel=(3,3), num_filter=64, pad=(1,1)) l3_2 = conv_relu('8', data=l3_1, kernel=(3,3), num_filter=16, pad=(1,1)) l3_3 = conv_relu('9', data=l3_2, kernel=(3,3), num_filter=16, pad=(1,1), bn=True) l3_4 = S.Convolution(name='c10', data=l3_3, kernel=(1,1), num_filter=1, pad=(0,0)) pred = S.LogisticRegressionOutput(data=l3_4, name='softmax') def e_net(*args):
def conv_bn_relu_pool_siamese(input_a, input_b, kernel, num_filter, pad, stride, name_postfix, use_pooling=False, p_kernel=None, p_stride=None, use_batch_norm=True): conv_weight = mxs.Variable(name='conv' + name_postfix + '_weight') conv_bias = mxs.Variable(name='conv' + name_postfix + '_bias') conv_a = mxs.Convolution(data=input_a, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride, name='conv' + name_postfix + "_a", weight=conv_weight, bias=conv_bias) conv_b = mxs.Convolution(data=input_b, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride, name='conv' + name_postfix + "_b", weight=conv_weight, bias=conv_bias) if use_batch_norm: bn_gamma = mxs.Variable(name='bn' + name_postfix + '_gamma') bn_beta = mxs.Variable(name='bn' + name_postfix + '_beta') bn_moving_mean = mxs.Variable(name='bn' + name_postfix + '_moving_mean') bn_moving_var = mxs.Variable(name='bn' + name_postfix + '_moving_var') batch_norm_a = mxs.BatchNorm(data=conv_a, name='bn' + name_postfix + '_a', gamma=bn_gamma, beta=bn_beta, moving_mean=bn_moving_mean, moving_var=bn_moving_var) batch_norm_b = mxs.BatchNorm(data=conv_b, name='bn' + name_postfix + '_b', gamma=bn_gamma, beta=bn_beta, moving_mean=bn_moving_mean, moving_var=bn_moving_var) else: batch_norm_a = conv_a batch_norm_b = conv_b relu_a = mxs.relu(data=batch_norm_a, name='relu' + name_postfix + '_a') relu_b = mxs.relu(data=batch_norm_b, name='relu' + name_postfix + '_b') if use_pooling: out_a = mxs.Pooling(data=relu_a, kernel=p_kernel, pool_type='max', stride=p_stride, name='pool' + name_postfix + '_a') out_b = mxs.Pooling(data=relu_b, kernel=p_kernel, pool_type='max', stride=p_stride, name='pool' + name_postfix + '_b') else: out_a = relu_a out_b = relu_b return out_a, out_b
def get_symbol(): num_classes = config.emb_size print('in_network', config) fc_type = config.net_output data = sym.Variable(name="data") data = data - 127.5 data = data * 0.0078125 blocks = config.net_blocks conv_1 = Conv(data, num_filter=48, kernel=(5, 5), pad=(2, 2), stride=(2, 2), name="conv_1") if blocks[0] == 1: conv_2_dw = Conv(conv_1, num_group=48, num_filter=48, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_2_dw") else: conv_2_dw = Residual(conv_1, num_block=blocks[0], num_out=48, kernel=(3, 3), stride=(1, 1), pad=(1, 1), num_group=48, name="res_2") conv_23 = DResidual(conv_2_dw, num_out=64, kernel=(5, 5), stride=(2, 2), pad=(2, 2), num_group=128, name="dconv_23") conv_3a = Residual(conv_23, num_block=blocks[1] // 2, num_out=64, kernel=(3, 3), stride=(1, 1), pad=(1, 1), num_group=128, name="res_3a") conv_3ase = SEModule(conv_3a, 64, "res_3ase") conv_3b = Residual(conv_3ase, num_block=blocks[1] - blocks[1] // 2, num_out=64, kernel=(5, 5), stride=(1, 1), pad=(2, 2), num_group=128, name="res_3b") conv_3bse = SEModule(conv_3b, 64, "res_3bse") conv_34 = DResidual(conv_3bse, num_out=128, kernel=(5, 5), stride=(2, 2), pad=(2, 2), num_group=256, name="dconv_34") conv_4a = Residual(conv_34, num_block=blocks[2] // 2, num_out=128, kernel=(3, 3), stride=(1, 1), pad=(1, 1), num_group=256, name="res_4a") conv_4ase = SEModule(conv_4a, 128, "res_4ase") conv_4b = Residual(conv_4ase, num_block=blocks[2] - blocks[2] // 2, num_out=128, kernel=(5, 5), stride=(1, 1), pad=(2, 2), num_group=256, name="res_4b") conv_4bse = SEModule(conv_4b, 128, "res_4bse") conv_45 = DResidual(conv_4bse, num_out=160, kernel=(5, 5), stride=(2, 2), pad=(2, 2), num_group=512, name="dconv_45") conv_5a = Residual(conv_45, num_block=blocks[3] // 2, num_out=160, kernel=(3, 3), stride=(1, 1), pad=(1, 1), num_group=480, name="res_5a") conv_5ase = SEModule(conv_5a, 160, "res_5ase") conv_5b = Residual(conv_5ase, num_block=blocks[3] - blocks[3] // 2, num_out=160, kernel=(5, 5), stride=(1, 1), pad=(2, 2), num_group=480, name="res_5b") conv_5bse = SEModule(conv_5b, 160, "res_5bse") conv_6_sep = Conv(conv_5bse, num_filter=640, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_6sep") fc1 = symbol_utils.get_fc1(conv_6_sep, num_classes, fc_type, input_channel=640) return fc1
def convert_model_to_layers(net, syms=None, input_shape=(1, 3, 224, 224), softmax=False, to_bgr=False, merge_bn=True): """ Convert Gluon model to Caffe. :param net: mxnet.gluon.nn.HybridBlock Gluon net to convert. :param syms: list of list of mxnet.symbol.Symbol (Optionally) if None, computation graph will constructed by net. :param input_shape: tuple Shape of inputs. :param softmax: bool Add softmax for model. :param to_bgr: bool Convert input_type from RGB to BGR. :param merge_bn: bool Merge BatchNorm and Scale layers to Convolution layers. :return: list of caffe_pb2.LayerParameter CaffeLayers from model. """ # A list to collect layers caffe_net = [] # A list to collect names of visited symbols visited = [] # Parameters from gluon model gluon_params = net.collect_params() """ Generate symbol model """ if syms is None: input_ = symbol.Variable("data", shape=input_shape) syms = net(input_) if softmax: assert type(syms) != tuple syms = symbol.softmax(syms) """ Convert data layer """ convert_fn = _converter.get("data") layer = convert_fn(input_shape) caffe_net.append(layer) """ Convert other layers """ if type(syms) not in (tuple, list): syms = (syms, ) for sym in syms: node_ops = _extract_node_ops(sym) for node in sym.get_internals(): if node.name in visited: # print(f"ignore symbol {node.name}") continue visited.append(node.name) # Basic attributes: name & op name = node.name op = node_ops[name] # Collect all children: inputs and parameters in_sym = node.get_children() if in_sym is None: # data layer continue # Collectors for bottoms and parameters bottoms = [] params = [] for s in in_sym: s_name = s.name if s_name != 'data' and node_ops[ s_name] == 'null': # Parameters params.append(gluon_params[s_name].data().asnumpy()) else: # Inputs bottoms.append(_clean_name(net, s_name)) # To bgr for first layer if all((to_bgr, op in ("Convolution", "FullyConnected"), "data" in bottoms)): print("To BGR:", node.name) params[0] = _weights_rgb2bgr(params[0]) # Collector for tops tops = [ _clean_name(net, out_name) for out_name in node.list_outputs() ] # Get attributes attrs = node.list_attr() # Convert ReLU6 to ReLU if op == 'clip': attrs = node.list_attr() if attrs['a_min'] == '0' and attrs['a_max'] == '6': op = 'Activation' attrs = {"act_type": "relu"} # Get convert function convert_fn = _converter.get(op, None) assert convert_fn is not None, f"unknown op: {op}" # Convert gluon layer to caffe and add to collector `caffe_net` layer = convert_fn(_clean_name(net, name), bottoms, tops, params, attrs) if op == "BatchNorm": # BatchNorm is converted into BatchNorm & Scale caffe_net.extend(layer) else: # Other layers caffe_net.append(layer) """ Set ReLU & BatchNorm inplace """ _in_place(caffe_net) """ Merge BatchNorm/Scale to Convolution """ if merge_bn: _merge_bn(caffe_net) return caffe_net
def lstm_unroll(num_lstm_layer, seq_len, num_hidden, dropout=0., shapes=None): # embed_weight = mx.sym.Variable("embed_weight") cls_weight = mx.sym.Variable("cls_weight") cls_bias = mx.sym.Variable("cls_bias") param_cells = [] last_states = [] pred_all = [] for i in range(num_lstm_layer): param_cells.append( LSTMParam(i2h_weight=mx.sym.Variable("l%d_i2h_weight" % i), i2h_bias=mx.sym.Variable("l%d_i2h_bias" % i), h2h_weight=mx.sym.Variable("l%d_h2h_weight" % i), h2h_bias=mx.sym.Variable("l%d_h2h_bias" % i))) state = LSTMState(c=mx.sym.Variable("l%d_init_c" % i), h=mx.sym.Variable("l%d_init_h" % i)) last_states.append(state) assert (len(last_states) == num_lstm_layer) # embeding layer data = mx.sym.Variable('data') label = mx.sym.Variable('softmax_label') timeseq = mx.sym.SliceChannel(data=data, num_outputs=seq_len, squeeze_axis=1) labelseq = mx.sym.SliceChannel(data=label, num_outputs=seq_len, squeeze_axis=1) # CNN param layer_num = 10 P = [] for i in range(layer_num): P.append(S.Variable('c%d_weight' % i)) P.append(S.Variable('c%d_bias' % i)) P.append(S.Variable('bn%d_gamma' % i)) P.append(S.Variable('bn%d_beta' % i)) up_num = 3 D = [] for i in range(up_num): D.append(S.Variable('deconv%d_weight' % i)) # D.append( S.Variable('deconv%d_bias'%i) ) for seqidx in range(seq_len): hidden = timeseq[seqidx] # embed in CNN hidden = cnn_forward(hidden, P, D) hidden = mx.sym.Reshape(data=hidden, target_shape=(0, 1 * 256 * 256)) #print seqidx #for _ in hidden.infer_shape(**shapes): # print _ # stack LSTM for i in range(num_lstm_layer): if i == 0: dp_ratio = 0. else: dp_ratio = dropout next_state = lstm(num_hidden, indata=hidden, prev_state=last_states[i], param=param_cells[i], seqidx=seqidx, layeridx=i, dropout=dp_ratio) hidden = next_state.h last_states[i] = next_state # decoder if dropout > 0.: hidden = mx.sym.Dropout(data=hidden, p=dropout) #hidden_all.append(hidden) pred = mx.sym.FullyConnected(data=hidden, weight=cls_weight, bias=cls_bias, name='pred%d' % seqidx, num_hidden=1 * 256 * 256) pred = mx.sym.Reshape(data=pred, target_shape=(0, 1, 256, 256)) pred = mx.sym.LogisticRegressionOutput(data=pred, label=labelseq[seqidx], name='logis%d' % seqidx) pred_all.append(pred) return mx.sym.Group(pred_all)
def symbol_mlp(): data = symbol.Variable("data") first_layer = symbol.FullyConnected(data=data, num_hidden=20) second_layer = symbol.FullyConnected(data=first_layer, num_hidden=3) return data, second_layer
def convert_ssd_model(net, input_shape=(1, 3, 512, 512), to_bgr=False, merge_bn=True): """ Convert SSD-like model to Caffe. :param net: mxnet.gluon.nn.HybridBlock Gluon net to convert. :param input_shape: tuple Shape of inputs. :param to_bgr: bool Convert input_type from RGB to BGR. :param merge_bn: bool Merge BatchNorm and Scale layers to Convolution layers. :return: (text_net, binary_weights) text_net: caffe_pb2.NetParameter Structure of net. binary_weights: caffe_pb2.NetParameter Weights of net. """ """ Create symbols """ in_ = symbol.Variable("data", shape=input_shape) __, scores_sym, __ = net(in_) """ Add symbols about box_predictors and cls_predictors """ # box_predictors box_pred_name = net.box_predictors[0].predictor.name box_transpose = _find_symbol_by_bottomname(scores_sym, f"{box_pred_name}_fwd") box_flatten = _find_symbol_by_bottomname(scores_sym, box_transpose.name) box_concat = _find_symbol_by_bottomname(scores_sym, box_flatten.name) # cls_prodictors cls_pred_name = net.class_predictors[0].predictor.name cls_transpose = _find_symbol_by_bottomname(scores_sym, f"{cls_pred_name}_fwd") cls_flatten = _find_symbol_by_bottomname(scores_sym, cls_transpose.name) cls_concat = _find_symbol_by_bottomname(scores_sym, cls_flatten.name) cls_reshape = _find_symbol_by_bottomname(scores_sym, cls_concat.name) cls_softmax = symbol.softmax(cls_reshape, axis=2) cls_flatten = symbol.flatten(cls_softmax) """ Collect attributes needed by Priorbox and DetectionOutput layers """ priorbox_attrs, detection_out_attrs = _extract_ssd_attrs(net) """ Create fake symbol for Priorbox layers """ priorboxes = [] for i, box_pred in enumerate(net.box_predictors): pred_sym = _find_symbol_by_name(scores_sym, f"{box_pred.predictor.name}_fwd") # (ugly) Get Convolution symbol of predictor for c in pred_sym.get_children(): if c.get_children() is not None: conv = c break # Create a new fake symbol for Priorbox priorbox = FakeSymbol(conv, name=f"{conv.name}_priorbox", _op="PriorBox", **priorbox_attrs[i]) priorboxes.append(priorbox) # Concat outputs of Priorbox symbol pbox_concat = symbol.concat(*priorboxes, dim=2) """ Create fake symbol for DetectionOutput layer """ detection_out = FakeSymbol(box_concat, cls_flatten, pbox_concat, _in_num=3, name="detection_out", _op="DetectionOutput", **detection_out_attrs) return convert_model(net, detection_out, input_shape=input_shape, to_bgr=to_bgr, merge_bn=merge_bn)