def _parse_proto(prototxt_fname): """Parse Caffe prototxt into symbol string """ proto = caffe_parser.read_prototxt(prototxt_fname) # process data layer input_name, input_dim, layers = _get_input(proto) # only support single input, so always use `data` as the input data mapping = {input_name: 'data'} need_flatten = {input_name: False} symbol_string = "import mxnet as mx\ndata = mx.symbol.Variable(name='data')\n" flatten_count = 0 output_name = "" prev_name = None _output_name = {} # convert reset layers one by one for i, layer in enumerate(layers): type_string = '' param_string = '' skip_layer = False name = re.sub('[-/]', '_', layer.name) for k in range(len(layer.bottom)): if layer.bottom[k] in _output_name: _output_name[layer.bottom[k]]['count'] = _output_name[ layer.bottom[k]]['count'] + 1 else: _output_name[layer.bottom[k]] = {'count': 0} for k in range(len(layer.top)): if layer.top[k] in _output_name: _output_name[layer.top[k]]['count'] = _output_name[ layer.top[k]]['count'] + 1 else: _output_name[layer.top[k]] = {'count': 0, 'name': name} if layer.type == 'Convolution' or layer.type == 4: type_string = 'mx.symbol.Convolution' param_string = _convert_conv_param(layer.convolution_param) need_flatten[name] = True if layer.type == 'Deconvolution' or layer.type == 39: type_string = 'mx.symbol.Deconvolution' param_string = _convert_conv_param(layer.convolution_param) need_flatten[name] = True if layer.type == 'Pooling' or layer.type == 17: type_string = 'mx.symbol.Pooling' param_string = _convert_pooling_param(layer.pooling_param) need_flatten[name] = True if layer.type == 'ReLU' or layer.type == 18: type_string = 'mx.symbol.Activation' param_string = "act_type='relu'" param = layer.relu_param if hasattr(param, 'negative_slope'): if param.negative_slope > 0: type_string = 'mx.symbol.LeakyReLU' param_string = "act_type='leaky', slope=%f" % param.negative_slope need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'TanH' or layer.type == 23: type_string = 'mx.symbol.Activation' param_string = "act_type='tanh'" need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Sigmoid' or layer.type == 19: type_string = 'mx.symbol.Activation' param_string = "act_type='sigmoid'" need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'LRN' or layer.type == 15: type_string = 'mx.symbol.LRN' param = layer.lrn_param param_string = "alpha=%f, beta=%f, knorm=%f, nsize=%d" % ( param.alpha, param.beta, param.k, param.local_size) need_flatten[name] = True if layer.type == 'InnerProduct' or layer.type == 14: type_string = 'mx.symbol.FullyConnected' param = layer.inner_product_param param_string = "num_hidden=%d, no_bias=%s" % (param.num_output, not param.bias_term) need_flatten[name] = False if layer.type == 'Dropout' or layer.type == 6: type_string = 'mx.symbol.Dropout' param = layer.dropout_param param_string = "p=%f" % param.dropout_ratio need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Softmax' or layer.type == 20: type_string = 'mx.symbol.SoftmaxOutput' if layer.type == 'Flatten' or layer.type == 8: type_string = 'mx.symbol.Flatten' need_flatten[name] = False if layer.type == 'Split' or layer.type == 22: type_string = 'split' # will process later if layer.type == 'Concat' or layer.type == 3: type_string = 'mx.symbol.Concat' need_flatten[name] = True if layer.type == 'Crop': type_string = 'mx.symbol.Crop' need_flatten[name] = True param_string = 'center_crop=True' if layer.type == 'BatchNorm': type_string = 'mx.symbol.BatchNorm' param = layer.batch_norm_param # CuDNN requires eps to be greater than 1e-05 # We compensate for this change in convert_model epsilon = param.eps if (epsilon <= 1e-05): epsilon = 1e-04 # if next layer is scale, don't fix gamma fix_gamma = layers[i + 1].type != 'Scale' param_string = 'use_global_stats=%s, fix_gamma=%s, eps=%f' % ( param.use_global_stats, fix_gamma, epsilon) need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Scale': assert layers[i - 1].type == 'BatchNorm' need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] skip_layer = True prev_name = re.sub('[-/]', '_', layers[i - 1].name) if layer.type == 'PReLU': type_string = 'mx.symbol.LeakyReLU' param = layer.prelu_param param_string = "act_type='prelu', slope=%f" % param.filler.value need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Eltwise': type_string = 'mx.symbol.broadcast_add' param = layer.eltwise_param param_string = "" need_flatten[name] = False if layer.type == 'Reshape': type_string = 'mx.symbol.Reshape' need_flatten[name] = False param = layer.reshape_param param_string = "shape=(%s)" % (','.join(param.shape.dim), ) if layer.type == 'AbsVal': type_string = 'mx.symbol.abs' need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if skip_layer: assert len(layer.bottom) == 1 symbol_string += "%s = %s\n" % (name, prev_name) elif type_string == '': raise ValueError('Unknown layer %s!' % layer.type) elif type_string != 'split': bottom = layer.bottom if param_string != "": param_string = ", " + param_string if len(bottom) == 1: if need_flatten[mapping[bottom[ 0]]] and type_string == 'mx.symbol.FullyConnected': flatten_name = "flatten_%d" % flatten_count symbol_string += "%s=mx.symbol.Flatten(name='%s', data=%s)\n" % ( flatten_name, flatten_name, mapping[bottom[0]]) flatten_count += 1 need_flatten[flatten_name] = False bottom[0] = flatten_name mapping[bottom[0]] = bottom[0] symbol_string += "%s = %s(name='%s', data=%s %s)\n" % ( name, type_string, name, mapping[bottom[0]], param_string) else: if layer.type == 'Eltwise' and param.operation == 1 and len( param.coeff) > 0: symbol_string += "%s = " % name symbol_string += " + ".join([ "%s * %s" % (mapping[bottom[i]], param.coeff[i]) for i in range(len(param.coeff)) ]) symbol_string += "\n" else: symbol_string += "%s = %s(name='%s', *[%s] %s)\n" % ( name, type_string, name, ','.join( [mapping[x] for x in bottom]), param_string) for j in range(len(layer.top)): mapping[layer.top[j]] = name output_name = name output_name = [] for i in _output_name: if 'name' in _output_name[i] and _output_name[i]['count'] == 0: output_name.append(_output_name[i]['name']) return symbol_string, output_name, input_dim
def convert_model(prototxt_fname, caffemodel_fname, output_prefix=None): """Convert caffe model Parameters ---------- prototxt_fname : str Filename of the prototxt model definition caffemodel_fname : str Filename of the binary caffe model output_prefix : str, optinoal If given, then save the converted MXNet into output_prefx+'.json' and output_prefx+'.params' Returns ------- sym : Symbol Symbol convereted from prototxt arg_params : list of NDArray Argument parameters aux_params : list of NDArray Aux parameters input_dim : tuple Input dimension """ sym, input_dim = convert_symbol(prototxt_fname) arg_shapes, _, aux_shapes = sym.infer_shape(data=tuple(input_dim)) arg_names = sym.list_arguments() aux_names = sym.list_auxiliary_states() arg_shape_dic = dict(zip(arg_names, arg_shapes)) aux_shape_dic = dict(zip(aux_names, aux_shapes)) arg_params = {} aux_params = {} first_conv = True layers, names = caffe_parser.read_caffemodel(prototxt_fname, caffemodel_fname) layer_iter = caffe_parser.layer_iter(layers, names) layers_proto = caffe_parser.get_layers( caffe_parser.read_prototxt(prototxt_fname)) for layer_name, layer_type, layer_blobs in layer_iter: if layer_type == 'Convolution' or layer_type == 'InnerProduct' \ or layer_type == 4 or layer_type == 14 or layer_type == 'PReLU' \ or layer_type == 'Deconvolution' or layer_type == 39: if layer_type == 'PReLU': assert (len(layer_blobs) == 1) weight_name = layer_name + '_gamma' wmat = np.array(layer_blobs[0].data).reshape( arg_shape_dic[weight_name]) arg_params[weight_name] = mx.nd.zeros(wmat.shape) arg_params[weight_name][:] = wmat continue wmat_dim = [] if getattr(layer_blobs[0].shape, 'dim', None) is not None: if len(layer_blobs[0].shape.dim) > 0: wmat_dim = layer_blobs[0].shape.dim else: wmat_dim = [ layer_blobs[0].num, layer_blobs[0].channels, layer_blobs[0].height, layer_blobs[0].width ] else: wmat_dim = list(layer_blobs[0].shape) wmat = np.array(layer_blobs[0].data).reshape(wmat_dim) channels = wmat_dim[1] if channels == 3 or channels == 4: # RGB or RGBA if first_conv: # Swapping BGR of caffe into RGB in mxnet wmat[:, [0, 2], :, :] = wmat[:, [2, 0], :, :] assert (wmat.flags['C_CONTIGUOUS'] is True) sys.stdout.write('converting layer {0}, wmat shape = {1}'.format( layer_name, wmat.shape)) if len(layer_blobs) == 2: bias = np.array(layer_blobs[1].data) bias = bias.reshape((bias.shape[0], 1)) assert (bias.flags['C_CONTIGUOUS'] is True) bias_name = layer_name + "_bias" if bias_name not in arg_shape_dic: print(bias_name + ' not found in arg_shape_dic.') continue bias = bias.reshape(arg_shape_dic[bias_name]) arg_params[bias_name] = mx.nd.zeros(bias.shape) arg_params[bias_name][:] = bias sys.stdout.write(', bias shape = {}'.format(bias.shape)) sys.stdout.write('\n') sys.stdout.flush() wmat = wmat.reshape((wmat.shape[0], -1)) weight_name = layer_name + "_weight" if weight_name not in arg_shape_dic: print(weight_name + ' not found in arg_shape_dic.') continue wmat = wmat.reshape(arg_shape_dic[weight_name]) arg_params[weight_name] = mx.nd.zeros(wmat.shape) arg_params[weight_name][:] = wmat if first_conv and (layer_type == 'Convolution' or layer_type == 4): first_conv = False elif layer_type == 'Scale': if 'scale' in layer_name: bn_name = layer_name.replace('scale', 'bn') elif 'sc' in layer_name: bn_name = layer_name.replace('sc', 'bn') else: assert False, 'Unknown name convention for bn/scale' gamma = np.array(layer_blobs[0].data) beta = np.array(layer_blobs[1].data) # beta = np.expand_dims(beta, 1) beta_name = '{}_beta'.format(bn_name) gamma_name = '{}_gamma'.format(bn_name) beta = beta.reshape(arg_shape_dic[beta_name]) gamma = gamma.reshape(arg_shape_dic[gamma_name]) arg_params[beta_name] = mx.nd.zeros(beta.shape) arg_params[gamma_name] = mx.nd.zeros(gamma.shape) arg_params[beta_name][:] = beta arg_params[gamma_name][:] = gamma assert gamma.flags['C_CONTIGUOUS'] is True assert beta.flags['C_CONTIGUOUS'] is True print('converting scale layer, beta shape = {}, gamma shape = {}'. format(beta.shape, gamma.shape)) elif layer_type == 'BatchNorm': bn_name = layer_name mean = np.array(layer_blobs[0].data) var = np.array(layer_blobs[1].data) rescale_factor = layer_blobs[2].data[0] if rescale_factor != 0: rescale_factor = 1 / rescale_factor mean_name = '{}_moving_mean'.format(bn_name) var_name = '{}_moving_var'.format(bn_name) mean = mean.reshape(aux_shape_dic[mean_name]) var = var.reshape(aux_shape_dic[var_name]) aux_params[mean_name] = mx.nd.zeros(mean.shape) aux_params[var_name] = mx.nd.zeros(var.shape) # Get the original epsilon for idx, layer in enumerate(layers_proto): if layer.name == bn_name or re.sub('[-/]', '_', layer.name) == bn_name: bn_index = idx eps_caffe = layers_proto[bn_index].batch_norm_param.eps # Compensate for the epsilon shift performed in convert_symbol eps_symbol = float(sym.attr_dict()[bn_name + '_moving_mean']['eps']) eps_correction = eps_caffe - eps_symbol # Fill parameters aux_params[mean_name][:] = mean * rescale_factor aux_params[var_name][:] = var * rescale_factor + eps_correction assert var.flags['C_CONTIGUOUS'] is True assert mean.flags['C_CONTIGUOUS'] is True print( 'converting batchnorm layer, mean shape = {}, var shape = {}'. format(mean.shape, var.shape)) fix_gamma = layers_proto[bn_index + 1].type != 'Scale' if fix_gamma: gamma_name = '{}_gamma'.format(bn_name) gamma = np.array(np.ones(arg_shape_dic[gamma_name])) beta_name = '{}_beta'.format(bn_name) beta = np.array(np.zeros(arg_shape_dic[beta_name])) arg_params[beta_name] = mx.nd.zeros(beta.shape) arg_params[gamma_name] = mx.nd.zeros(gamma.shape) arg_params[beta_name][:] = beta arg_params[gamma_name][:] = gamma assert gamma.flags['C_CONTIGUOUS'] is True assert beta.flags['C_CONTIGUOUS'] is True else: print('\tskipping layer {} of type {}'.format( layer_name, layer_type)) assert len(layer_blobs) == 0 if output_prefix is not None: model = mx.mod.Module(symbol=sym, label_names=[ prob_label(arg_names), ]) model.bind(data_shapes=[('data', tuple(input_dim))]) model.init_params(arg_params=arg_params, aux_params=aux_params) model.save_checkpoint(output_prefix, 0) return sym, arg_params, aux_params, input_dim
def convert_model(prototxt_fname, caffemodel_fname, output_prefix=None): """Convert caffe model Parameters ---------- prototxt_fname : str Filename of the prototxt model definition caffemodel_fname : str Filename of the binary caffe model output_prefix : str, optinoal If given, then save the converted MXNet into output_prefx+'.json' and output_prefx+'.params' Returns ------- sym : Symbol Symbol convereted from prototxt arg_params : list of NDArray Argument parameters aux_params : list of NDArray Aux parameters input_dim : tuple Input dimension """ sym, input_dim = convert_symbol(prototxt_fname) arg_shapes, _, aux_shapes = sym.infer_shape(data=tuple(input_dim)) arg_names = sym.list_arguments() aux_names = sym.list_auxiliary_states() arg_shape_dic = dict(zip(arg_names, arg_shapes)) aux_shape_dic = dict(zip(aux_names, aux_shapes)) arg_params = {} aux_params = {} first_conv = True layers, names = caffe_parser.read_caffemodel(prototxt_fname, caffemodel_fname) layer_iter = caffe_parser.layer_iter(layers, names) layers_proto = caffe_parser.get_layers(caffe_parser.read_prototxt(prototxt_fname)) for layer_name, layer_type, layer_blobs in layer_iter: if layer_type == 'Convolution' or layer_type == 'InnerProduct' \ or layer_type == 4 or layer_type == 14 or layer_type == 'PReLU': if layer_type == 'PReLU': assert (len(layer_blobs) == 1) wmat = layer_blobs[0].data weight_name = layer_name + '_gamma' arg_params[weight_name] = mx.nd.zeros(wmat.shape) arg_params[weight_name][:] = wmat continue wmat_dim = [] if getattr(layer_blobs[0].shape, 'dim', None) is not None: if len(layer_blobs[0].shape.dim) > 0: wmat_dim = layer_blobs[0].shape.dim else: wmat_dim = [layer_blobs[0].num, layer_blobs[0].channels, layer_blobs[0].height, layer_blobs[0].width] else: wmat_dim = list(layer_blobs[0].shape) wmat = np.array(layer_blobs[0].data).reshape(wmat_dim) channels = wmat_dim[1] if channels == 3 or channels == 4: # RGB or RGBA if first_conv: # Swapping BGR of caffe into RGB in mxnet wmat[:, [0, 2], :, :] = wmat[:, [2, 0], :, :] assert(wmat.flags['C_CONTIGUOUS'] is True) sys.stdout.write('converting layer {0}, wmat shape = {1}'.format( layer_name, wmat.shape)) if len(layer_blobs) == 2: bias = np.array(layer_blobs[1].data) bias = bias.reshape((bias.shape[0], 1)) assert(bias.flags['C_CONTIGUOUS'] is True) bias_name = layer_name + "_bias" bias = bias.reshape(arg_shape_dic[bias_name]) arg_params[bias_name] = mx.nd.zeros(bias.shape) arg_params[bias_name][:] = bias sys.stdout.write(', bias shape = {}'.format(bias.shape)) sys.stdout.write('\n') sys.stdout.flush() wmat = wmat.reshape((wmat.shape[0], -1)) weight_name = layer_name + "_weight" if weight_name not in arg_shape_dic: print(weight_name + ' not found in arg_shape_dic.') continue wmat = wmat.reshape(arg_shape_dic[weight_name]) arg_params[weight_name] = mx.nd.zeros(wmat.shape) arg_params[weight_name][:] = wmat if first_conv and (layer_type == 'Convolution' or layer_type == 4): first_conv = False elif layer_type == 'Scale': bn_name = layer_name.replace('scale', 'bn') gamma = layer_blobs[0].data beta = layer_blobs[1].data # beta = np.expand_dims(beta, 1) beta_name = '{}_beta'.format(bn_name) gamma_name = '{}_gamma'.format(bn_name) beta = beta.reshape(arg_shape_dic[beta_name]) gamma = gamma.reshape(arg_shape_dic[gamma_name]) arg_params[beta_name] = mx.nd.zeros(beta.shape) arg_params[gamma_name] = mx.nd.zeros(gamma.shape) arg_params[beta_name][:] = beta arg_params[gamma_name][:] = gamma assert gamma.flags['C_CONTIGUOUS'] is True assert beta.flags['C_CONTIGUOUS'] is True print('converting scale layer, beta shape = {}, gamma shape = {}'.format( beta.shape, gamma.shape)) elif layer_type == 'BatchNorm': bn_name = layer_name mean = layer_blobs[0].data var = layer_blobs[1].data rescale_factor = layer_blobs[2].data if rescale_factor != 0: rescale_factor = 1 / rescale_factor mean_name = '{}_moving_mean'.format(bn_name) var_name = '{}_moving_var'.format(bn_name) mean = mean.reshape(aux_shape_dic[mean_name]) var = var.reshape(aux_shape_dic[var_name]) aux_params[mean_name] = mx.nd.zeros(mean.shape) aux_params[var_name] = mx.nd.zeros(var.shape) # Get the original epsilon for idx, layer in enumerate(layers_proto): if layer.name == bn_name: bn_index = idx eps_caffe = layers_proto[bn_index].batch_norm_param.eps # Compensate for the epsilon shift performed in convert_symbol eps_symbol = float(sym.attr_dict()[bn_name + '_moving_mean']['eps']) eps_correction = eps_caffe - eps_symbol # Fill parameters aux_params[mean_name][:] = mean * rescale_factor aux_params[var_name][:] = var * rescale_factor + eps_correction assert var.flags['C_CONTIGUOUS'] is True assert mean.flags['C_CONTIGUOUS'] is True print('converting batchnorm layer, mean shape = {}, var shape = {}'.format( mean.shape, var.shape)) else: assert len(layer_blobs) == 0 print('\tskipping layer {} of type {}'.format(layer_name, layer_type)) if output_prefix is not None: model = mx.mod.Module(symbol=sym, label_names=['prob_label', ]) model.bind(data_shapes=[('data', tuple(input_dim))]) model.init_params(arg_params=arg_params, aux_params=aux_params) model.save_checkpoint(output_prefix, 0) return sym, arg_params, aux_params, input_dim
def _parse_proto(prototxt_fname): """Parse Caffe prototxt into symbol string """ proto = caffe_parser.read_prototxt(prototxt_fname) # process data layer input_name, input_dim, layers = _get_input(proto) # only support single input, so always use `data` as the input data mapping = {input_name: 'data'} need_flatten = {input_name: False} symbol_string = "import mxnet as mx\ndata = mx.symbol.Variable(name='data')\n" flatten_count = 0 output_name = "" prev_name = None # convert reset layers one by one for i, layer in enumerate(layers): type_string = '' param_string = '' skip_layer = False name = re.sub('[-/]', '_', layer.name) if layer.type == 'Convolution' or layer.type == 4: type_string = 'mx.symbol.Convolution' param_string = _convert_conv_param(layer.convolution_param) need_flatten[name] = True if layer.type == 'Deconvolution' or layer.type == 39: type_string = 'mx.symbol.Deconvolution' param_string = _convert_conv_param(layer.convolution_param) need_flatten[name] = True if layer.type == 'Pooling' or layer.type == 17: type_string = 'mx.symbol.Pooling' param_string = _convert_pooling_param(layer.pooling_param) need_flatten[name] = True if layer.type == 'ReLU' or layer.type == 18: type_string = 'mx.symbol.Activation' param_string = "act_type='relu'" need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'TanH' or layer.type == 23: type_string = 'mx.symbol.Activation' param_string = "act_type='tanh'" need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Sigmoid' or layer.type == 19: type_string = 'mx.symbol.Activation' param_string = "act_type='sigmoid'" need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'LRN' or layer.type == 15: type_string = 'mx.symbol.LRN' param = layer.lrn_param param_string = "alpha=%f, beta=%f, knorm=%f, nsize=%d" % ( param.alpha, param.beta, param.k, param.local_size) need_flatten[name] = True if layer.type == 'InnerProduct' or layer.type == 14: type_string = 'mx.symbol.FullyConnected' param = layer.inner_product_param param_string = "num_hidden=%d, no_bias=%s" % ( param.num_output, not param.bias_term) need_flatten[name] = False if layer.type == 'Dropout' or layer.type == 6: type_string = 'mx.symbol.Dropout' param = layer.dropout_param param_string = "p=%f" % param.dropout_ratio need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Softmax' or layer.type == 20: type_string = 'mx.symbol.SoftmaxOutput' if layer.type == 'Flatten' or layer.type == 8: type_string = 'mx.symbol.Flatten' need_flatten[name] = False if layer.type == 'Split' or layer.type == 22: type_string = 'split' # will process later if layer.type == 'Concat' or layer.type == 3: type_string = 'mx.symbol.Concat' need_flatten[name] = True if layer.type == 'Crop': type_string = 'mx.symbol.Crop' need_flatten[name] = True param_string = 'center_crop=True' if layer.type == 'BatchNorm': type_string = 'mx.symbol.BatchNorm' param = layer.batch_norm_param # CuDNN requires eps to be greater than 1e-05 # We compensate for this change in convert_model epsilon = param.eps if (epsilon <= 1e-05): epsilon = 1e-04 param_string = 'use_global_stats=%s, fix_gamma=False, eps=%f' % ( param.use_global_stats, epsilon) need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Scale': assert layers[i-1].type == 'BatchNorm' need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] skip_layer = True prev_name = re.sub('[-/]', '_', layers[i-1].name) if layer.type == 'PReLU': type_string = 'mx.symbol.LeakyReLU' param = layer.prelu_param param_string = "act_type='prelu', slope=%f" % param.filler.value need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Eltwise': type_string = 'mx.symbol.broadcast_add' param_string = "" need_flatten[name] = False if layer.type == 'Reshape': type_string = 'mx.symbol.Reshape' need_flatten[name] = False param = layer.reshape_param param_string = "shape=(%s)" % (','.join(param.shape.dim),) if layer.type == 'AbsVal': type_string = 'mx.symbol.abs' need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if skip_layer: assert len(layer.bottom) == 1 symbol_string += "%s = %s\n" % (name, prev_name) elif type_string == '': raise ValueError('Unknown layer %s!' % layer.type) elif type_string != 'split': bottom = layer.bottom if param_string != "": param_string = ", " + param_string if len(bottom) == 1: if need_flatten[mapping[bottom[0]]] and type_string == 'mx.symbol.FullyConnected': flatten_name = "flatten_%d" % flatten_count symbol_string += "%s=mx.symbol.Flatten(name='%s', data=%s)\n" % ( flatten_name, flatten_name, mapping[bottom[0]]) flatten_count += 1 need_flatten[flatten_name] = False bottom[0] = flatten_name mapping[bottom[0]] = bottom[0] symbol_string += "%s = %s(name='%s', data=%s %s)\n" % ( name, type_string, name, mapping[bottom[0]], param_string) else: symbol_string += "%s = %s(name='%s', *[%s] %s)\n" % ( name, type_string, name, ','.join([mapping[x] for x in bottom]), param_string) for j in range(len(layer.top)): mapping[layer.top[j]] = name output_name = name return symbol_string, output_name, input_dim
def _parse_proto(prototxt_fname): """Parse Caffe prototxt into symbol string """ proto = caffe_parser.read_prototxt(prototxt_fname) # process data layer input_name, input_dim, layers = _get_input(proto) # only support single input, so always use `data` as the input data mapping = {input_name: 'data'} need_flatten = {input_name: False} symbol_string = "import mxnet as mx\ndata = mx.symbol.Variable(name='data')\n" flatten_count = 0 output_name = "" prev_name = None # convert reset layers one by one for i, layer in enumerate(layers): type_string = '' param_string = '' skip_layer = False bottom_order = [] name = re.sub('[-/]', '_', layer.name) if layer.type == 'Convolution' or layer.type == 4: type_string = 'mx.symbol.Convolution' param_string = _convert_conv_param(layer.convolution_param) need_flatten[name] = True if layer.type == 'Deconvolution' or layer.type == 39: type_string = 'mx.symbol.Deconvolution' param_string = _convert_conv_param(layer.convolution_param) need_flatten[name] = True if layer.type == 'Pooling' or layer.type == 17: type_string = 'mx.symbol.Pooling' param_string = _convert_pooling_param(layer.pooling_param) need_flatten[name] = True if layer.type == 'ReLU' or layer.type == 18: type_string = 'mx.symbol.Activation' param_string = "act_type='relu'" need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'TanH' or layer.type == 23: type_string = 'mx.symbol.Activation' param_string = "act_type='tanh'" need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Sigmoid' or layer.type == 19: type_string = 'mx.symbol.Activation' param_string = "act_type='sigmoid'" need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'LRN' or layer.type == 15: type_string = 'mx.symbol.LRN' param = layer.lrn_param param_string = "alpha=%f, beta=%f, knorm=%f, nsize=%d" % ( param.alpha, param.beta, param.k, param.local_size) need_flatten[name] = True if layer.type == 'InnerProduct' or layer.type == 14: type_string = 'mx.symbol.FullyConnected' param = layer.inner_product_param param_string = "num_hidden=%d, no_bias=%s" % ( param.num_output, not param.bias_term) need_flatten[name] = False if layer.type == 'Dropout' or layer.type == 6: type_string = 'mx.symbol.Dropout' param = layer.dropout_param param_string = "p=%f" % param.dropout_ratio need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Softmax' or layer.type == 20: if layer.softmax_param.axis == 2: symbol_string += "%s = mx.symbol.transpose(%s, axes=(0,2,1))\n" %\ (mapping[layer.bottom[0]], mapping[layer.bottom[0]]) type_string = 'mx.symbol.SoftmaxActivation' param_string = "mode='channel'" need_flatten[name] = False else: type_string = 'mx.symbol.SoftmaxOutput' if layer.type == 'Flatten' or layer.type == 8: if 'softmax' in layer.bottom[0]: prev_name = re.sub('[-/]', '_', layers[i-1].name) skip_layer = True else: type_string = 'mx.symbol.Flatten' need_flatten[name] = False if layer.type == 'Split' or layer.type == 22: type_string = 'split' # will process later if layer.type == 'Concat' or layer.type == 3: type_string = 'mx.symbol.Concat' need_flatten[name] = True if layer.type == 'Crop': type_string = 'mx.symbol.Crop' need_flatten[name] = True param_string = 'center_crop=True' if layer.type == 'BatchNorm': type_string = 'mx.symbol.BatchNorm' param = layer.batch_norm_param # CuDNN requires eps to be greater than 1e-05 # We compensate for this change in convert_model epsilon = param.eps if (epsilon <= 1e-05): epsilon = 1e-04 # if next layer is scale, don't fix gamma fix_gamma = layers[i+1].type != 'Scale' param_string = 'use_global_stats=%s, fix_gamma=%s, eps=%f' % ( param.use_global_stats, fix_gamma, epsilon) need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Scale': assert layers[i-1].type == 'BatchNorm' need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] skip_layer = True prev_name = re.sub('[-/]', '_', layers[i-1].name) if layer.type == 'PReLU': type_string = 'mx.symbol.LeakyReLU' param = layer.prelu_param param_string = "act_type='prelu', slope=%f" % param.filler.value need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Eltwise': type_string = 'mx.symbol.broadcast_add' param_string = "" need_flatten[name] = False if layer.type == 'Reshape': type_string = 'mx.symbol.Reshape' param = layer.reshape_param param_string = 'shape=(' + ','.join([str(x) for x in list(param.shape.dim)]) + ')' need_flatten[name] = True if layer.type == 'AbsVal': type_string = 'mx.symbol.abs' need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Normalize': bottom = re.sub('[-/]', '_', layer.bottom[0]) conv_layer = _find_layer(layers, bottom) assert conv_layer is not None param = layer.norm_param assert not param.across_spatial and not param.channel_shared assert param.scale_filler.type == 'constant' if conv_layer.type == 'Convolution': scale_name = "%s_scale" % name symbol_string += "%s=mx.sym.Variable(name='%s', shape=(1, %d, 1, 1), init=mx.init.Constant(%f))\n" % \ (scale_name, scale_name, conv_layer.convolution_param.num_output, param.scale_filler.value) symbol_string += "%s=mx.symbol.L2Normalization(name='%s', data=%s, mode='channel')\n" %\ (name, name, mapping[layer.bottom[0]]) symbol_string += "%s=mx.symbol.broadcast_mul(lhs=%s, rhs=%s)\n" %\ (name, scale_name, name) type_string = 'split' need_flatten[name] = True else: raise ValueError('Unknown/Invalid normalize layer!') if layer.type == 'Permute': type_string = 'mx.symbol.transpose' param_string = "axes=(%s)" % (','.join([str(x) for x in layer.permute_param.order])) need_flatten[name] = True from_name = '' if layer.type == 'PriorBox': param = layer.prior_box_param if layer.bottom[0] == 'data': bottom_order = [1] else: bottom_order = [0] try: import math min_size = param.min_size[0] / input_dim[2] max_size = math.sqrt(param.min_size[0] * param.max_size[0]) / input_dim[2] sizes = '(%f, %f)' %(min_size, max_size) except AttributeError: min_size = param.min_size[0] / input_dim[2] sizes = '(%f)' %(min_size) ars = list(param.aspect_ratio) ratios = [1.] for ar in ars: ratios.append(ar) if param.flip: ratios.append(1. / ar) ratios_string = '(' + ','.join(str(x) for x in ratios) + ')' clip = param.clip if (param.step_h > 0 or param.step_w > 0): step_h = param.step_h step_w = param.step_w elif param.step > 0: step_h = param.step step_w = param.step else: step_h = -1 step_w = -1 finput_dimh = float(input_dim[2]) finput_dimw = float(input_dim[3]) step = '(%f, %f)' % (step_h / finput_dimh, step_w / finput_dimw) assert param.offset == 0.5, "currently only support offset = 0.5" symbol_string += '%s = mx.contrib.symbol.MultiBoxPrior(%s, sizes=%s, ratios=%s, clip=%s, steps=%s, name="%s")\n' % \ (name, mapping[layer.bottom[0]], sizes, ratios_string, clip, step, name) symbol_string += '%s = mx.symbol.Flatten(data=%s)\n' % (name, name) type_string = 'split' need_flatten[name] = False if layer.type == 'DetectionOutput': bottom_order = [1, 0, 2] param = layer.detection_output_param assert param.share_location == True assert param.background_label_id == 0 nms_param = param.nms_param type_string = 'mx.contrib.symbol.MultiBoxDetection' param_string = "nms_threshold=%f, nms_topk=%d, clip=False" % \ (nms_param.nms_threshold, nms_param.top_k) if skip_layer: assert len(layer.bottom) == 1 symbol_string += "%s = %s\n" % (name, prev_name) elif type_string == '': raise ValueError('Unknown layer %s!' % layer.type) elif type_string != 'split': bottom = layer.bottom if param_string != "": param_string = ", " + param_string if len(bottom) == 1: # print(need_flatten) if need_flatten[mapping[bottom[0]]] and type_string == 'mx.symbol.FullyConnected': flatten_name = "flatten_%d" % flatten_count symbol_string += "%s=mx.symbol.Flatten(name='%s', data=%s)\n" % ( flatten_name, flatten_name, mapping[bottom[0]]) flatten_count += 1 need_flatten[flatten_name] = False bottom[0] = flatten_name mapping[bottom[0]] = bottom[0] symbol_string += "%s = %s(name='%s', data=%s %s)\n" % ( name, type_string, name, mapping[bottom[0]], param_string) else: if not bottom_order: bottom_order = range(len(bottom)) symbol_string += "%s = %s(name='%s', *[%s] %s)\n" % \ (name, type_string, name, ','.join([mapping[bottom[x]] for x in bottom_order]), param_string) if layer.type == 'Concat' and layer.concat_param.axis == 2: symbol_string += "%s = mx.symbol.Reshape(data=%s, shape=(0, -1, 4), name='%s')\n" %\ (name, name, name) for j in range(len(layer.top)): mapping[layer.top[j]] = name output_name = name return symbol_string, output_name, input_dim
def _parse_proto(prototxt_fname): """Parse Caffe prototxt into symbol string """ proto = caffe_parser.read_prototxt(prototxt_fname) # process data layer input_name, input_dim, layers = _get_input(proto) # only support single input, so always use `data` as the input data mapping = {input_name: 'data'} need_flatten = {input_name: False} symbol_string = "import mxnet as mx\ndata = mx.symbol.Variable(name='data')\n" flatten_count = 0 output_name = "" prev_name = None # convert reset layers one by one for i, layer in enumerate(layers): type_string = '' param_string = '' skip_layer = False bottom_order = [] name = re.sub('[-/]', '_', layer.name) if layer.type == 'Convolution' or layer.type == 4: type_string = 'mx.symbol.Convolution' param_string = _convert_conv_param(layer.convolution_param) need_flatten[name] = True if layer.type == 'Deconvolution' or layer.type == 39: type_string = 'mx.symbol.Deconvolution' param_string = _convert_conv_param(layer.convolution_param) need_flatten[name] = True if layer.type == 'Pooling' or layer.type == 17: type_string = 'mx.symbol.Pooling' param_string = _convert_pooling_param(layer.pooling_param) need_flatten[name] = True if layer.type == 'ReLU' or layer.type == 18: type_string = 'mx.symbol.Activation' param_string = "act_type='relu'" need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'TanH' or layer.type == 23: type_string = 'mx.symbol.Activation' param_string = "act_type='tanh'" need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Sigmoid' or layer.type == 19: type_string = 'mx.symbol.Activation' param_string = "act_type='sigmoid'" need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'LRN' or layer.type == 15: type_string = 'mx.symbol.LRN' param = layer.lrn_param param_string = "alpha=%f, beta=%f, knorm=%f, nsize=%d" % ( param.alpha, param.beta, param.k, param.local_size) need_flatten[name] = True if layer.type == 'InnerProduct' or layer.type == 14: type_string = 'mx.symbol.FullyConnected' param = layer.inner_product_param param_string = "num_hidden=%d, no_bias=%s" % (param.num_output, not param.bias_term) need_flatten[name] = False if layer.type == 'Dropout' or layer.type == 6: type_string = 'mx.symbol.Dropout' param = layer.dropout_param param_string = "p=%f" % param.dropout_ratio need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Softmax' or layer.type == 20: if layer.softmax_param.axis == 2: symbol_string += "%s = mx.symbol.transpose(%s, axes=(0,2,1))\n" %\ (mapping[layer.bottom[0]], mapping[layer.bottom[0]]) type_string = 'mx.symbol.SoftmaxActivation' param_string = "mode='channel'" need_flatten[name] = False else: type_string = 'mx.symbol.SoftmaxOutput' if layer.type == 'Flatten' or layer.type == 8: if 'softmax' in layer.bottom[0]: prev_name = re.sub('[-/]', '_', layers[i - 1].name) skip_layer = True else: type_string = 'mx.symbol.Flatten' need_flatten[name] = False if layer.type == 'Split' or layer.type == 22: type_string = 'split' # will process later if layer.type == 'Concat' or layer.type == 3: type_string = 'mx.symbol.Concat' need_flatten[name] = True if layer.type == 'Crop': type_string = 'mx.symbol.Crop' need_flatten[name] = True param_string = 'center_crop=True' if layer.type == 'BatchNorm': type_string = 'mx.symbol.BatchNorm' param = layer.batch_norm_param # CuDNN requires eps to be greater than 1e-05 # We compensate for this change in convert_model epsilon = param.eps if (epsilon <= 1e-05): epsilon = 1e-04 # if next layer is scale, don't fix gamma fix_gamma = layers[i + 1].type != 'Scale' param_string = 'use_global_stats=%s, fix_gamma=%s, eps=%f' % ( param.use_global_stats, fix_gamma, epsilon) need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Scale': assert layers[i - 1].type == 'BatchNorm' need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] skip_layer = True prev_name = re.sub('[-/]', '_', layers[i - 1].name) if layer.type == 'PReLU': type_string = 'mx.symbol.LeakyReLU' param = layer.prelu_param param_string = "act_type='prelu', slope=%f" % param.filler.value need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Eltwise': type_string = 'mx.symbol.broadcast_add' param_string = "" need_flatten[name] = False if layer.type == 'Reshape': type_string = 'mx.symbol.Reshape' param = layer.reshape_param param_string = 'shape=(' + ','.join( [str(x) for x in list(param.shape.dim)]) + ')' need_flatten[name] = True if layer.type == 'AbsVal': type_string = 'mx.symbol.abs' need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if layer.type == 'Normalize': bottom = re.sub('[-/]', '_', layer.bottom[0]) conv_layer = _find_layer(layers, bottom) assert conv_layer is not None param = layer.norm_param assert not param.across_spatial and not param.channel_shared assert param.scale_filler.type == 'constant' if conv_layer.type == 'Convolution': scale_name = "%s_scale" % name symbol_string += "%s=mx.sym.Variable(name='%s', shape=(1, %d, 1, 1), init=mx.init.Constant(%f))\n" % \ (scale_name, scale_name, conv_layer.convolution_param.num_output, param.scale_filler.value) symbol_string += "%s=mx.symbol.L2Normalization(name='%s', data=%s, mode='channel')\n" %\ (name, name, mapping[layer.bottom[0]]) symbol_string += "%s=mx.symbol.broadcast_mul(lhs=%s, rhs=%s)\n" %\ (name, scale_name, name) type_string = 'split' need_flatten[name] = True else: raise ValueError('Unknown/Invalid normalize layer!') if layer.type == 'Permute': type_string = 'mx.symbol.transpose' param_string = "axes=(%s)" % (','.join( [str(x) for x in layer.permute_param.order])) need_flatten[name] = True from_name = '' if layer.type == 'PriorBox': param = layer.prior_box_param if layer.bottom[0] == 'data': bottom_order = [1] else: bottom_order = [0] try: import math min_size = param.min_size[0] / input_dim[2] max_size = math.sqrt( param.min_size[0] * param.max_size[0]) / input_dim[2] sizes = '(%f, %f)' % (min_size, max_size) except AttributeError: min_size = param.min_size[0] / input_dim[2] sizes = '(%f)' % (min_size) ars = list(param.aspect_ratio) ratios = [1.] for ar in ars: ratios.append(ar) if param.flip: ratios.append(1. / ar) ratios_string = '(' + ','.join(str(x) for x in ratios) + ')' clip = param.clip if (param.step_h > 0 or param.step_w > 0): step_h = param.step_h step_w = param.step_w elif param.step > 0: step_h = param.step step_w = param.step else: step_h = -1 step_w = -1 finput_dimh = float(input_dim[2]) finput_dimw = float(input_dim[3]) step = '(%f, %f)' % (step_h / finput_dimh, step_w / finput_dimw) assert param.offset == 0.5, "currently only support offset = 0.5" symbol_string += '%s = mx.contrib.symbol.MultiBoxPrior(%s, sizes=%s, ratios=%s, clip=%s, steps=%s, name="%s")\n' % \ (name, mapping[layer.bottom[0]], sizes, ratios_string, clip, step, name) symbol_string += '%s = mx.symbol.Flatten(data=%s)\n' % (name, name) type_string = 'split' need_flatten[name] = False if layer.type == 'DetectionOutput': bottom_order = [1, 0, 2] param = layer.detection_output_param assert param.share_location == True assert param.background_label_id == 0 nms_param = param.nms_param type_string = 'mx.contrib.symbol.MultiBoxDetection' param_string = "nms_threshold=%f, nms_topk=%d, clip=False" % \ (nms_param.nms_threshold, nms_param.top_k) if skip_layer: assert len(layer.bottom) == 1 symbol_string += "%s = %s\n" % (name, prev_name) elif type_string == '': raise ValueError('Unknown layer %s!' % layer.type) elif type_string != 'split': bottom = layer.bottom if param_string != "": param_string = ", " + param_string if len(bottom) == 1: # print(need_flatten) if need_flatten[mapping[bottom[ 0]]] and type_string == 'mx.symbol.FullyConnected': flatten_name = "flatten_%d" % flatten_count symbol_string += "%s=mx.symbol.Flatten(name='%s', data=%s)\n" % ( flatten_name, flatten_name, mapping[bottom[0]]) flatten_count += 1 need_flatten[flatten_name] = False bottom[0] = flatten_name mapping[bottom[0]] = bottom[0] symbol_string += "%s = %s(name='%s', data=%s %s)\n" % ( name, type_string, name, mapping[bottom[0]], param_string) else: if not bottom_order: bottom_order = range(len(bottom)) symbol_string += "%s = %s(name='%s', *[%s] %s)\n" % \ (name, type_string, name, ','.join([mapping[bottom[x]] for x in bottom_order]), param_string) if layer.type == 'Concat' and layer.concat_param.axis == 2: symbol_string += "%s = mx.symbol.Reshape(data=%s, shape=(0, -1, 4), name='%s')\n" %\ (name, name, name) for j in range(len(layer.top)): mapping[layer.top[j]] = name output_name = name return symbol_string, output_name, input_dim
def _parse_proto(prototxt_fname): """Parse Caffe prototxt into symbol string """ proto = caffe_parser.read_prototxt(prototxt_fname) # process data layer input_name, input_dim, layer = _get_input(proto) # only support single input, so always use `data` as the input data mapping = {input_name: 'data'} need_flatten = {input_name: False} symbol_string = "import mxnet as mx\n" \ + "data = mx.symbol.Variable(name='data')\n" connection = dict() symbols = dict() top = dict() flatten_count = 0 output_name = "" prev_name = None # convert reset layers one by one for i in range(len(layer)): type_string = '' param_string = '' skip_layer = False name = re.sub('[-/]', '_', layer[i].name) if layer[i].type == 'Convolution' or layer[i].type == 4: type_string = 'mx.symbol.Convolution' param_string = _convert_conv_param(layer[i].convolution_param) need_flatten[name] = True if layer[i].type == 'Deconvolution' or layer[i].type == 39: type_string = 'mx.symbol.Deconvolution' param_string = _convert_conv_param(layer[i].convolution_param) need_flatten[name] = True if layer[i].type == 'Pooling' or layer[i].type == 17: type_string = 'mx.symbol.Pooling' param_string = _convert_pooling_param(layer[i].pooling_param) need_flatten[name] = True if layer[i].type == 'ReLU' or layer[i].type == 18: type_string = 'mx.symbol.Activation' param_string = "act_type='relu'" need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] if layer[i].type == 'TanH' or layer[i].type == 23: type_string = 'mx.symbol.Activation' param_string = "act_type='tanh'" need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] if layer[i].type == 'Sigmoid' or layer[i].type == 19: type_string = 'mx.symbol.Activation' param_string = "act_type='sigmoid'" need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] if layer[i].type == 'LRN' or layer[i].type == 15: type_string = 'mx.symbol.LRN' param = layer[i].lrn_param param_string = "alpha=%f, beta=%f, knorm=%f, nsize=%d" % ( param.alpha, param.beta, param.k, param.local_size) need_flatten[name] = True if layer[i].type == 'InnerProduct' or layer[i].type == 14: type_string = 'mx.symbol.FullyConnected' param = layer[i].inner_product_param param_string = "num_hidden=%d, no_bias=%s" % (param.num_output, not param.bias_term) need_flatten[name] = False if layer[i].type == 'Dropout' or layer[i].type == 6: type_string = 'mx.symbol.Dropout' param = layer[i].dropout_param param_string = "p=%f" % param.dropout_ratio need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] if layer[i].type == 'Softmax' or layer[i].type == 20: type_string = 'mx.symbol.SoftmaxOutput' if layer[i].type == 'Flatten' or layer[i].type == 8: type_string = 'mx.symbol.Flatten' need_flatten[name] = False if layer[i].type == 'Split' or layer[i].type == 22: type_string = 'split' # will process later if layer[i].type == 'Concat' or layer[i].type == 3: type_string = 'mx.symbol.Concat' need_flatten[name] = True if layer[i].type == 'Crop': type_string = 'mx.symbol.Crop' need_flatten[name] = True param_string = 'center_crop=True' if layer[i].type == 'BatchNorm': type_string = 'mx.symbol.BatchNorm' param = layer[i].batch_norm_param param_string = 'use_global_stats=%s, fix_gamma=False' % param.use_global_stats need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] if layer[i].type == 'Scale': assert layer[i - 1].type == 'BatchNorm' need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] skip_layer = True prev_name = re.sub('[-/]', '_', layer[i - 1].name) if layer[i].type == 'PReLU': type_string = 'mx.symbol.LeakyReLU' param = layer[i].prelu_param param_string = "act_type='prelu', slope=%f" % param.filler.value need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] if layer[i].type == 'Eltwise': type_string = 'mx.symbol.broadcast_add' param_string = "" need_flatten[name] = False if layer[i].type == 'Reshape': type_string = 'mx.symbol.Reshape' need_flatten[name] = False param = layer[i].reshape_param param_string = "shape=(%s)" % (','.join(param.shape.dim), ) if skip_layer: assert len(layer[i].bottom) == 1 symbol_string += "%s = %s\n" % (name, prev_name) elif type_string == '': raise ValueError('Unknown layer %s!' % layer[i].type) elif type_string != 'split': bottom = layer[i].bottom if param_string != "": param_string = ", " + param_string if len(bottom) == 1: if need_flatten[mapping[bottom[ 0]]] and type_string == 'mx.symbol.FullyConnected': flatten_name = "flatten_%d" % flatten_count symbol_string += "%s=mx.symbol.Flatten(name='%s', data=%s)\n" % ( flatten_name, flatten_name, mapping[bottom[0]]) flatten_count += 1 need_flatten[flatten_name] = False bottom[0] = flatten_name mapping[bottom[0]] = bottom[0] symbol_string += "%s = %s(name='%s', data=%s %s)\n" % ( name, type_string, name, mapping[bottom[0]], param_string) else: symbol_string += "%s = %s(name='%s', *[%s] %s)\n" % ( name, type_string, name, ','.join( [mapping[x] for x in bottom]), param_string) for j in range(len(layer[i].top)): mapping[layer[i].top[j]] = name output_name = name return symbol_string, output_name, input_dim