class Keras2Emitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16 : "float16", graph_pb2.DT_FLOAT32 : "float32", graph_pb2.DT_FLOAT64 : "float64", graph_pb2.DT_INT16 : "int16", graph_pb2.DT_INT32 : "int32", graph_pb2.DT_INT64 : "int64", graph_pb2.DT_UINT8 : "uint8", graph_pb2.DT_UINT16 : "uint16" } def __init__(self, model): super(Keras2Emitter, self).__init__() from six import string_types as _string_types if isinstance(model, _string_types): network_path = model else: network_path = model[0] weight_path = model[1] self.IR_graph = IRGraph(network_path) self.IR_graph.build() @property def header_code(self): return """import keras from keras.models import Model from keras import layers import keras.backend as K def load_weights(model, weight_file): import numpy as np if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() for layer in model.layers: if layer.name in weights_dict: cur_dict = weights_dict[layer.name] current_layer_parameters = list() if layer.__class__.__name__ == "BatchNormalization": if 'scale' in cur_dict: current_layer_parameters.append(cur_dict['scale']) if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) current_layer_parameters.extend([cur_dict['mean'], cur_dict['var']]) elif layer.__class__.__name__ == "SeparableConv2D": current_layer_parameters = [cur_dict['depthwise_filter'], cur_dict['pointwise_filter']] if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) else: # rot weights current_layer_parameters = [cur_dict['weights']] if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) model.get_layer(layer.name).set_weights(current_layer_parameters) return model def KitModel(weight_file = None): """ def gen_code(self, phase): self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("KerasEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body(1, "{:<15} = Model(inputs = [{}], outputs = [{}])".format( "model", ', '.join([self.IR_graph.get_node(i).real_variable_name for i in self.IR_graph.input_layers]), ', '.join([self.IR_graph.get_node(i).real_variable_name for i in self.IR_graph.output_layers]))) self.add_body(1, ["load_weights(model, weight_file)", "return model"]) for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.body_code @staticmethod def shapeToStr(shapes): return ', '.join('%s' % i for i in filter(lambda x:x > 0, shapes)) def _emit_activation(self, IR_node, op): self.add_body(1, "{:<15} = layers.Activation(name = '{}', activation = '{}')({})".format( IR_node.variable_name, IR_node.name, op, self.parent_variable_name(IR_node))) def _emit_merge(self, IR_node, func): inputs = ', '.join('%s' % self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) axis = ' axis = {},'.format(IR_node.get_attr('axis')) if 'axis' in IR_node.layer.attr else "" self.add_body(1, "{:<15} = layers.{}(name = '{}',{} inputs = [{}])".format( IR_node.variable_name, func, IR_node.name, axis, inputs)) def _emit_convolution(self, IR_node, conv_type): filters = IR_node.IR_layer.attr["filter"].list.i[-1] filters_str = 'filters = {}'.format(filters) if conv_type.startswith('layer') else 'depth_multiplier = {}'.format(filters) kernel_size = ', '.join('%s' % i for i in IR_node.layer.attr['filter'].list.i[:-2]) strides = ','.join('%s' % i for i in IR_node.IR_layer.attr["strides"].list.i[1:-1]) use_bias = IR_node.IR_layer.attr["use_bias"].b padding = IR_node.IR_layer.attr["padding"].s.decode('utf-8') padding = padding.lower() self.add_body(1, "{:<15} = {}(name = '{}', {}, kernel_size = ({}), strides = ({}), padding = '{}', use_bias = {})({})".format( IR_node.variable_name, conv_type, IR_node.name, filters_str, kernel_size, strides, padding, use_bias, self.parent_variable_name(IR_node))) def emit_Convolution(self, IR_node): dim = len(IR_node.IR_layer.attr["strides"].list.i) - 2 return self._emit_convolution(IR_node, 'layers.Conv{}D'.format(dim)) def emit_Pool(self, IR_node): dim = len(IR_node.IR_layer.attr["strides"].list.i) - 2 if IR_node.layer.attr['pooling_type'].s == b"MAX": pool_name = "MaxPooling{}D".format(dim) elif IR_node.layer.attr['pooling_type'].s == b"AVG": pool_name = "AveragePooling{}D".format(dim) else: assert False if IR_node.layer.attr['global_pooling'].b: self.add_body(1, "{:<15} = layers.Global{}(name = '{}')({})".format( IR_node.variable_name, pool_name, IR_node.name, self.parent_variable_name(IR_node))) else: for e in IR_node.IR_layer.attr["dilation_rate"].list.i: assert e == 1 padding = IR_node.IR_layer.attr["padding"].s.decode('utf-8') padding = padding.lower() pool_size = IR_node.IR_layer.attr['window_shape'].list.i[1:-1] pool_size = ', '.join('%s' % i for i in pool_size) strides = IR_node.IR_layer.attr['strides'].list.i[1:-1] strides = ', '.join('%s' % i for i in strides) self.add_body(1, "{:<15} = layers.{}(name = '{}', pool_size = ({}), strides = ({}), padding = '{}')({})".format( IR_node.variable_name, pool_name, IR_node.name, pool_size, strides, padding, self.parent_variable_name(IR_node))) ############# # Operators # ############# def emit_UNKNOWN(self, IR_node): print (IR_node.name) def emit_Add(self, IR_node): self._emit_merge(IR_node, "add") def emit_DataInput(self, IR_node): shape_str = IRGraph.shapeToStr(IR_node.IR_layer.attr["shape"].shape) dtype_str = ", dtype = '{}'".format(self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'dtype' in IR_node.layer.attr else "" self.add_body(1, "{:<15} = layers.Input(name = '{}', shape = ({},) {})".format( IR_node.variable_name, IR_node.name, shape_str, dtype_str)) def emit_Dropout(self, IR_node): seed = 'None' if 'seed' in IR_node.IR_layer.attr: seed = IR_node.IR_layer.attr['seed'].i self.add_body(1, "{:<15} = layers.Dropout(name = '{}', rate = {}, seed = {})({})".format( IR_node.variable_name, IR_node.name, IR_node.IR_layer.attr["keep_prob"].f, seed, self.parent_variable_name(IR_node))) def emit_FullyConnected(self, IR_node): self.add_body(1, "{:<15} = layers.Dense(name = '{}', units = {}, use_bias = {})({})".format( IR_node.variable_name, IR_node.name, IR_node.layer.attr["units"].i, IR_node.layer.attr["use_bias"].b, self.parent_variable_name(IR_node))) def emit_Flatten(self, IR_node): self.used_layers.add('Flatten') self.add_body(1, "{:<15} = __flatten(name = '{}', input = {})".format( IR_node.variable_name, IR_node.name, self.parent_variable_name(IR_node))) def emit_Reshape(self, IR_node): shape_str = self.shapeToStr(IR_node.IR_layer.attr["shape"].list.i) self.add_body(1, "{:<15} = layers.Reshape(name = '{}', target_shape = ({},))({})".format( IR_node.variable_name, IR_node.name, shape_str, self.parent_variable_name(IR_node))) def emit_Tanh(self, IR_node): self._emit_activation(IR_node, 'tanh') def emit_Relu(self, IR_node): self._emit_activation(IR_node, 'relu') def emit_Softmax(self, IR_node): self._emit_activation(IR_node, 'softmax') def emit_Sigmoid(self, IR_node): self._emit_activation(IR_node, 'sigmoid') def emit_Embedding(self, IR_node): self.add_body(1, "{:<15} = layers.Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format( IR_node.variable_name, IR_node.get_attr('input_dim'), IR_node.IR_layer.attr['output_dim'].i, IR_node.IR_layer.attr['mask_zero'].b, IR_node.in_edges[0])) def emit_RNNs(self, IR_node, func): # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "{:<15} = layers.{}(units = {}, use_bias = {} {})({})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Concat(self, IR_node): self._emit_merge(IR_node, "concatenate") def emit_BatchNorm(self, IR_node): axis = IR_node.layer.attr['axis'].i if 'axis' in IR_node.layer.attr else -1 self.add_body(1, "{:<15} = layers.BatchNormalization(name = '{}', axis = {}, epsilon = {}, center = {}, scale = {})({})".format( IR_node.variable_name, IR_node.name, axis, IR_node.layer.attr['epsilon'].f, IR_node.layer.attr['bias'].b, IR_node.layer.attr['scale'].b, self.parent_variable_name(IR_node))) def emit_Pad(self, IR_node): if 'mode' not in IR_node.layer.attr or IR_node.IR_layer.attr['mode'].s == b"CONSTANT": func = "ZeroPadding" else: print (IR_node.IR_layer.attr['mode'].s) assert False dim = len(IR_node.IR_layer.attr['paddings'].list.i) // 2 - 2 padding_str = str() for idx in range(1, dim + 1): padding_str += "({}, {}),".format( IR_node.IR_layer.attr['paddings'].list.i[idx + idx], IR_node.IR_layer.attr['paddings'].list.i[idx + idx + 1]) self.add_body(1, "{:<15} = layers.{}{}D(name = '{}', padding = ({}))({})".format( IR_node.variable_name, func, dim, IR_node.name, padding_str, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)) def emit_Squeeze(self, IR_node): self.emit_Flatten(IR_node) def emit_ReduceMean(self, IR_node): axes = ', '.join('%s' % i for i in IR_node.layer.attr['axes'].list.i) self.add_body(1,"{:<15} = layers.Lambda(lambda x: K.mean(x, axis=[{}], keepdims = {}))({})".format( IR_node.variable_name, axes, IR_node.layer.attr['keepdims'].b, self.parent_variable_name(IR_node))) def emit_LRN(self, IR_node): self.used_layers.add(IR_node.type) code = "{:<15} = LRN(size = {}, alpha = {}, beta = {}, k = {}, name = '{}')({})".format( IR_node.variable_name, IR_node.layer.attr['size'].i, IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f, IR_node.layer.attr['k'].f, IR_node.name, self.IR_graph.get_parent(IR_node.name, [0]).variable_name) return code def emit_SeparableConv(self, IR_node): assert len(IR_node.layer.attr["strides"].list.i) == 4 return self._emit_convolution(IR_node, "layers.SeparableConv2D") def emit_Relu6(self, IR_node): self.add_body(1, "{:<15} = layers.Activation(keras.applications.mobilenet.relu6, name = '{}')({})".format( IR_node.variable_name, IR_node.name, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)) def emit_DepthwiseConv(self, IR_node): return self._emit_convolution(IR_node, 'keras.applications.mobilenet.DepthwiseConv2D') def _layer_Flatten(self): self.add_body(0, ''' def __flatten(name, input): if input.shape.ndims > 2: return layers.Flatten(name = name)(input) else: return input ''') def _layer_LRN(self): self.add_body(0, ''' from keras.layers.core import Layer class LRN(Layer): def __init__(self, size=5, alpha=0.0005, beta=0.75, k=2, **kwargs): self.n = size self.alpha = alpha self.beta = beta self.k = k super(LRN, self).__init__(**kwargs) def build(self, input_shape): self.shape = input_shape super(LRN, self).build(input_shape) def call(self, x, mask=None): half_n = self.n - 1 squared = K.square(x) scale = self.k norm_alpha = self.alpha / (2 * half_n + 1) if K.image_dim_ordering() == "th": b, f, r, c = self.shape squared = K.expand_dims(squared, 0) squared = K.spatial_3d_padding(squared, padding=((half_n, half_n), (0, 0), (0,0))) squared = K.squeeze(squared, 0) for i in range(half_n*2+1): scale += norm_alpha * squared[:, i:i+f, :, :] else: b, r, c, f = self.shape squared = K.expand_dims(squared, -1) squared = K.spatial_3d_padding(squared, padding=((0, 0), (0,0), (half_n, half_n))) squared = K.squeeze(squared, -1) for i in range(half_n*2+1): scale += norm_alpha * squared[:, :, :, i:i+f] scale = K.pow(scale, self.beta) return x / scale def compute_output_shape(self, input_shape): return input_shape''')
class Keras2Emitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16 : "float16", graph_pb2.DT_FLOAT32 : "float32", graph_pb2.DT_FLOAT64 : "float64", graph_pb2.DT_INT16 : "int16", graph_pb2.DT_INT32 : "int32", graph_pb2.DT_INT64 : "int64", graph_pb2.DT_UINT8 : "uint8", graph_pb2.DT_UINT16 : "uint16" } def __init__(self, model): super(Keras2Emitter, self).__init__() from six import string_types as _string_types if isinstance(model, _string_types): network_path = model else: network_path = model[0] weight_path = model[1] self.IR_graph = IRGraph(network_path) self.IR_graph.build() self.yolo_parameter = [] @property def header_code(self): return """import keras from keras.models import Model from keras import layers import keras.backend as K import numpy as np def load_weights_from_file(weight_file): try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def set_layer_weights(model, weights_dict): for layer in model.layers: if layer.name in weights_dict: cur_dict = weights_dict[layer.name] current_layer_parameters = list() if layer.__class__.__name__ == "BatchNormalization": if 'scale' in cur_dict: current_layer_parameters.append(cur_dict['scale']) if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) current_layer_parameters.extend([cur_dict['mean'], cur_dict['var']]) elif layer.__class__.__name__ == "Scale": if 'scale' in cur_dict: current_layer_parameters.append(cur_dict['scale']) if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) elif layer.__class__.__name__ == "SeparableConv2D": current_layer_parameters = [cur_dict['depthwise_filter'], cur_dict['pointwise_filter']] if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) else: # rot weights current_layer_parameters = [cur_dict['weights']] if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) model.get_layer(layer.name).set_weights(current_layer_parameters) return model def KitModel(weight_file = None): weights_dict = load_weights_from_file(weight_file) if not weight_file == None else None """ def gen_code(self, phase): self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("KerasEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body(1, "{:<15} = Model(inputs = [{}], outputs = [{}])".format( "model", ', '.join([self.IR_graph.get_node(i).real_variable_name for i in self.IR_graph.input_layers]), ', '.join([self.IR_graph.get_node(i).real_variable_name for i in self.IR_graph.output_layers]))) self.add_body(1, ["set_layer_weights(model, weights_dict)", "return model"]) for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.body_code @staticmethod def shapeToStr(shapes): return ', '.join('%s' % i for i in filter(lambda x:x > 0, shapes)) def _emit_activation(self, IR_node, op): self.add_body(1, "{:<15} = layers.Activation(name='{}', activation='{}')({})".format( IR_node.variable_name, IR_node.name, op, self.parent_variable_name(IR_node))) def _emit_merge(self, IR_node, func): inputs = ', '.join('%s' % self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) axis = ' axis = {},'.format(IR_node.get_attr('axis')) if 'axis' in IR_node.layer.attr else "" self.add_body(1, "{:<15} = layers.{}(name = '{}',{} inputs = [{}])".format( IR_node.variable_name, func, IR_node.name, axis, inputs)) @staticmethod def _convert_padding(padding): padding = convert_onnx_pad_to_tf(padding)[1:-1] for idx, pad in enumerate(padding): padding[idx] = tuple(pad) padding = tuple(padding) return padding def _defuse_padding(self, IR_node): auto_pad = IR_node.get_attr('auto_pad') if auto_pad: input_node = self.parent_variable_name(IR_node) if auto_pad == 'VALID': padding = 'valid' elif auto_pad.startswith("SAME"): padding = 'same' else: assert False return input_node, padding else: padding = IR_node.get_attr("pads") padding = self._convert_padding(padding) if is_valid_padding(padding) == False: input_node = IR_node.variable_name + '_input' self.add_body(1, "{:<15} = layers.ZeroPadding{}D(padding = {})({})".format( input_node, len(padding), padding, self.parent_variable_name(IR_node))) else: input_node = self.parent_variable_name(IR_node) return input_node, 'valid' def _emit_convolution(self, IR_node, conv_type): self.used_layers.add('Conv') # assert IR_node.get_attr('group', 1) == 1 group = IR_node.get_attr("group", 1) if conv_type.endswith('Transpose'): filters = IR_node.get_attr('kernel_shape')[-2] else: filters = IR_node.get_attr('kernel_shape')[-1] filters_str = 'filters={}'.format(filters) if conv_type.startswith('layer') else 'depth_multiplier={}'.format(filters) input_node, padding = self._defuse_padding(IR_node) dilations = IR_node.get_attr('dilations') if not dilations: dilations = [1] * len(IR_node.get_attr('kernel_shape')) self.add_body(1, "{:<15} = convolution(weights_dict, name='{}', input={}, group={}, conv_type='{}', {}, kernel_size={}, strides={}, dilation_rate={}, padding='{}', use_bias={})".format( IR_node.variable_name, IR_node.name, input_node, group, conv_type, filters_str, tuple(IR_node.get_attr('kernel_shape')[:-2]), tuple(IR_node.get_attr('strides')[1:-1]), tuple(dilations[1:-1]), padding, IR_node.get_attr('use_bias'))) def emit_ConvTranspose(self, IR_node): dim = len(IR_node.get_attr('kernel_shape')) - 2 self._emit_convolution(IR_node, 'layers.Conv{}DTranspose'.format(dim)) def emit_Conv(self, IR_node): dim = len(IR_node.get_attr('kernel_shape')) - 2 self._emit_convolution(IR_node, 'layers.Conv{}D'.format(dim)) ############# # Operators # ############# def emit_UNKNOWN(self, IR_node): print (IR_node.name) def emit_Add(self, IR_node): self._emit_merge(IR_node, "add") def emit_DataInput(self, IR_node): shape_str = IRGraph.shapeToStr(IR_node.IR_layer.attr["shape"].shape) dtype_str = ", dtype = '{}'".format(self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'dtype' in IR_node.layer.attr else "" self.add_body(1, "{:<15} = layers.Input(name = '{}', shape = ({},) {})".format( IR_node.variable_name, IR_node.name, shape_str, dtype_str)) def emit_Dropout(self, IR_node): seed = 'None' if 'seed' in IR_node.IR_layer.attr: seed = IR_node.IR_layer.attr['seed'].i self.add_body(1, "{:<15} = layers.Dropout(name = '{}', rate = {}, seed = {})({})".format( IR_node.variable_name, IR_node.name, IR_node.IR_layer.attr["keep_prob"].f, seed, self.parent_variable_name(IR_node))) def emit_FullyConnected(self, IR_node): self.add_body(1, "{:<15} = layers.Dense(name = '{}', units = {}, use_bias = {})({})".format( IR_node.variable_name, IR_node.name, IR_node.get_attr('units'), IR_node.get_attr('use_bias'), self.parent_variable_name(IR_node))) def emit_Flatten(self, IR_node): self.used_layers.add('Flatten') self.add_body(1, "{:<15} = __flatten(name = '{}', input = {})".format( IR_node.variable_name, IR_node.name, self.parent_variable_name(IR_node))) def emit_Pool(self, IR_node): dim = len(IR_node.get_attr("strides")) - 2 pooling_type = IR_node.get_attr('pooling_type') if pooling_type == "MAX": pool_name = "MaxPooling{}D".format(dim) elif pooling_type == "AVG": pool_name = "AveragePooling{}D".format(dim) else: print(pooling_type) assert False if IR_node.layer.attr['global_pooling'].b: self.add_body(1, "{:<15} = layers.Global{}(name = '{}')({})".format( IR_node.variable_name, pool_name, IR_node.name, self.parent_variable_name(IR_node))) else: dilations = IR_node.get_attr('dilations') if dilations: for e in IR_node.get_attr('dilations'): assert e == 1 pool_size = IR_node.get_attr('kernel_shape')[1:-1] pool_size = ', '.join('%s' % i for i in pool_size) strides = IR_node.get_attr('strides')[1:-1] strides = ', '.join('%s' % i for i in strides) input_node, padding = self._defuse_padding(IR_node) self.add_body(1, "{:<15} = layers.{}(name = '{}', pool_size = ({}), strides = ({}), padding = '{}')({})".format( IR_node.variable_name, pool_name, IR_node.name, pool_size, strides, padding, input_node)) def emit_Reshape(self, IR_node): shape_str = self.shapeToStr(IR_node.IR_layer.attr["shape"].list.i) self.add_body(1, "{:<15} = layers.Reshape(name = '{}', target_shape = ({},))({})".format( IR_node.variable_name, IR_node.name, shape_str, self.parent_variable_name(IR_node))) def emit_Tanh(self, IR_node): self._emit_activation(IR_node, 'tanh') def emit_Relu(self, IR_node): self._emit_activation(IR_node, 'relu') def emit_Softmax(self, IR_node): self._emit_activation(IR_node, 'softmax') def emit_Sigmoid(self, IR_node): self._emit_activation(IR_node, 'sigmoid') def emit_Embedding(self, IR_node): self.add_body(1, "{:<15} = layers.Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format( IR_node.variable_name, IR_node.get_attr('input_dim'), IR_node.get_attr('output_dim'), IR_node.get_attr('mask_zero'), IR_node.in_edges[0])) def emit_RNNs(self, IR_node, func): # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "{:<15} = layers.{}(units = {}, use_bias = {} {})({})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Concat(self, IR_node): self._emit_merge(IR_node, "concatenate") def emit_BatchNorm(self, IR_node): axis = IR_node.layer.attr['axis'].i if 'axis' in IR_node.layer.attr else -1 self.add_body(1, "{:<15} = layers.BatchNormalization(name = '{}', axis = {}, epsilon = {}, center = {}, scale = {})({})".format( IR_node.variable_name, IR_node.name, axis, IR_node.layer.attr['epsilon'].f, IR_node.layer.attr['bias'].b, IR_node.layer.attr['scale'].b, self.parent_variable_name(IR_node))) def emit_Scale(self, IR_node): axis = IR_node.layer.attr['axis'].i if 'axis' in IR_node.layer.attr else -1 self.add_body(1, "{:<15} = layers.Scale(name = '{}', axis = {}, center = {}, scale = {})({})".format( IR_node.variable_name, IR_node.name, axis, IR_node.layer.attr['bias'].b, IR_node.layer.attr['scale'].b, self.parent_variable_name(IR_node))) def emit_Pad(self, IR_node): mode = IR_node.get_attr('mode', 'constant') if mode == "constant": func = "ZeroPadding" else: print (mode) raise NotImplementedError() dim = len(IR_node.get_attr('pads')) // 2 - 2 padding = self._convert_padding(IR_node.get_attr('pads')) self.add_body(1, "{:<15} = layers.{}{}D(name='{}', padding={})({})".format( IR_node.variable_name, func, dim, IR_node.name, padding, self.parent_variable_name(IR_node))) def emit_Squeeze(self, IR_node): self.emit_Flatten(IR_node) def emit_ReduceMean(self, IR_node): axes = ', '.join('%s' % i for i in IR_node.get_attr('axes')) self.add_body(1,"{:<15} = layers.Lambda(lambda x: K.mean(x, axis=[{}], keepdims={}))({})".format( IR_node.variable_name, axes, IR_node.get_attr('keepdims'), self.parent_variable_name(IR_node))) def emit_LRN(self, IR_node): self.used_layers.add(IR_node.type) self.add_body(1, "{:<15} = LRN(size = {}, alpha = {}, beta = {}, k = {}, name = '{}')({})".format( IR_node.variable_name, IR_node.get_attr('size'), IR_node.get_attr('alpha'), IR_node.get_attr('beta'), IR_node.get_attr('k'), IR_node.name, self.parent_variable_name(IR_node))) def emit_SeparableConv(self, IR_node): assert len(IR_node.get_attr("strides")) == 4 return self._emit_convolution(IR_node, "layers.SeparableConv2D") def emit_Relu6(self, IR_node): self.add_body(1, "{:<15} = layers.Activation(keras.applications.mobilenet.relu6, name = '{}')({})".format( IR_node.variable_name, IR_node.name, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)) def emit_DepthwiseConv(self, IR_node): self._emit_convolution(IR_node, 'keras.applications.mobilenet.DepthwiseConv2D') def emit_Crop(self, IR_node): border = IR_node.get_attr('border') rank = len(border) // 2 cropping = [] for idx in xrange(rank): cropping.append(tuple([border[idx * 2], border[idx * 2 + 1]])) self.add_body(1, "{:<15} = layers.Cropping{}D(cropping={}, name='{}')({})".format( IR_node.variable_name, rank, tuple(cropping), IR_node.name, self.parent_variable_name(IR_node))) def emit_LeakyRelu(self, IR_node): self.add_body(1, "{:<15} = layers.LeakyReLU(name='{}', alpha= {})({})".format( IR_node.variable_name, IR_node.name, IR_node.get_attr('alpha'), self.parent_variable_name(IR_node) )) def emit_upsample(self, IR_node): self.add_body(1, "{:<15} = layers.UpSampling2D(name='{}', size= ({},{}), data_format = 'channels_last')({})".format( IR_node.variable_name, IR_node.name, IR_node.get_attr('strides'), IR_node.get_attr('strides'), self.parent_variable_name(IR_node) )) def emit_SpaceToDepth(self, IR_node): self.used_layers.add(IR_node.type) assert IR_node.get_attr('blocksize') == 2 # TODO: arguments won't be saved in keras export model blocksize = "arguments={'blocksize': %d}" % 2 self.add_body(1, "{:<15} = layers.Lambda(space_to_depth, {}, name='{}')({})".format( IR_node.variable_name, blocksize, IR_node.name, self.parent_variable_name(IR_node) )) def emit_yolo(self, IR_node): self.used_layers.add('Yolo') # print(IR_node.layer) self.add_body(1, "{:<15} = {}".format( IR_node.variable_name, self.parent_variable_name(IR_node) )) self.yolo_parameter = [IR_node.get_attr('anchors'), IR_node.get_attr('classes'), IR_node.get_attr("ignore_thresh"), IR_node.get_attr("jitter")] def _layer_Yolo(self): self.add_body(0, ''' def yolo_parameter(): return {} '''.format(self.yolo_parameter)) def _layer_SpaceToDepth(self): self.add_body(0, ''' def space_to_depth(input, blocksize): import tensorflow as tf return tf.space_to_depth(input, block_size=blocksize) ''') def _layer_Flatten(self): self.add_body(0, ''' def __flatten(name, input): if input.shape.ndims > 2: return layers.Flatten(name = name)(input) else: return input ''') def _layer_LRN(self): self.add_body(0, ''' from keras.layers.core import Layer class LRN(Layer): def __init__(self, size=5, alpha=0.0005, beta=0.75, k=2, **kwargs): self.n = size self.alpha = alpha self.beta = beta self.k = k super(LRN, self).__init__(**kwargs) def build(self, input_shape): self.shape = input_shape super(LRN, self).build(input_shape) def call(self, x, mask=None): half_n = self.n - 1 squared = K.square(x) scale = self.k norm_alpha = self.alpha / (2 * half_n + 1) if K.image_dim_ordering() == "th": b, f, r, c = self.shape squared = K.expand_dims(squared, 0) squared = K.spatial_3d_padding(squared, padding=((half_n, half_n), (0, 0), (0,0))) squared = K.squeeze(squared, 0) for i in range(half_n*2+1): scale += norm_alpha * squared[:, i:i+f, :, :] else: b, r, c, f = self.shape squared = K.expand_dims(squared, -1) squared = K.spatial_3d_padding(squared, padding=((0, 0), (0,0), (half_n, half_n))) squared = K.squeeze(squared, -1) for i in range(half_n*2+1): scale += norm_alpha * squared[:, :, :, i:i+f] scale = K.pow(scale, self.beta) return x / scale def compute_output_shape(self, input_shape): return input_shape''') def _layer_Conv(self): self.add_body(0, """ def convolution(weights_dict, name, input, group, conv_type, filters=None, **kwargs): if not conv_type.startswith('layer'): layer = keras.applications.mobilenet.DepthwiseConv2D(name=name, **kwargs)(input) return layer grouped_channels = int(filters / group) group_list = [] if group == 1: func = getattr(layers, conv_type.split('.')[-1]) layer = func(name = name, filters = filters, **kwargs)(input) return layer weight_groups = list() if not weights_dict == None: w = np.array(weights_dict[name]['weights']) weight_groups = np.split(w, indices_or_sections=group, axis=-1) for c in range(group): x = layers.Lambda(lambda z: z[:, :, :, c * grouped_channels:(c + 1) * grouped_channels])(input) x = layers.Conv2D(name=name + "_" + str(c), filters=grouped_channels, **kwargs)(x) weights_dict[name + "_" + str(c)] = dict() weights_dict[name + "_" + str(c)]['weights'] = weight_groups[c] group_list.append(x) layer = layers.concatenate(group_list, axis = -1) if 'bias' in weights_dict[name]: b = K.variable(weights_dict[name]['bias'], name = name + "_bias") layer = layer + b return layer""")
class CaffeEmitter(Emitter): def __init__(self, model): from six import string_types as _string_types super(CaffeEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) self.IR_graph = IRGraph(network_path) super(CaffeEmitter, self)._build() @property def header_code(self): return """from __future__ import print_function import numpy as np import sys, argparse import caffe from caffe import layers as L from caffe import params as P from caffe import to_proto from six import text_type as _text_type __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): n = caffe.NetSpec() """ @property def end_code(self): return """ return n def make_net(prototxt): n = KitModel() with open(prototxt, 'w') as fpb: print(n.to_proto(), file=fpb) def gen_weight(weight_file, model, prototxt): global __weights_dict __weights_dict = load_weights(weight_file) net = caffe.Net(prototxt, caffe.TRAIN) for key in __weights_dict: if 'weights' in __weights_dict[key]: net.params[key][0].data.flat = __weights_dict[key]['weights'] elif 'mean' in __weights_dict[key]: net.params[key][0].data.flat = __weights_dict[key]['mean'] net.params[key][1].data.flat = __weights_dict[key]['var'] if 'scale' in __weights_dict[key]: net.params[key][2].data.flat = __weights_dict[key]['scale'] elif 'scale' in __weights_dict[key]: net.params[key][0].data.flat = __weights_dict[key]['scale'] if 'bias' in __weights_dict[key]: net.params[key][1].data.flat = __weights_dict[key]['bias'] if 'gamma' in __weights_dict[key]: # used for prelu, not sure if other layers use this too net.params[key][0].data.flat = __weights_dict[key]['gamma'] net.save(model) return net if __name__=='__main__': parser = argparse.ArgumentParser(description='Generate caffe model and prototxt') parser.add_argument('--weight_file', '-w', type=_text_type, default='IR weight file') parser.add_argument('--prototxt', '-p', type=_text_type, default='caffe_converted.prototxt') parser.add_argument('--model', '-m', type=_text_type, default='caffe_converted.caffemodel') args = parser.parse_args() # For some reason argparser gives us unicode, so we need to conver to str first make_net(str(args.prototxt)) gen_weight(str(args.weight_file), str(args.model), str(args.prototxt)) """ def gen_code(self, phase='test'): self.phase = phase self.add_body(0, self.header_code) #for test # with open("graph.txt", 'w') as f: # for layer in self.IR_graph.topological_sort: # current_node = self.IR_graph.get_node(layer) # print("========current_node=========\n{}".format(current_node.layer), file=f) #test end for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type #print("========current_node={}".format(current_node.layer)) if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("CaffeEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body(0, "") self.add_body(0, self.end_code) return self.body_code def run(self, dstNetworkPath, dstWeightPath=None, phase='test'): super(CaffeEmitter, self).run(dstNetworkPath, dstWeightPath, phase) if self.weight_loaded: self.save_weights(self.weights_dict, dstWeightPath) @staticmethod def _shapeToStr(shapes): return [dim.size if dim.size > 0 else 1 for dim in shapes.dim] def check_if_need_transpose(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) while parent.type == 'Flatten': parent = self.IR_graph.get_parent(parent.name, [0]) dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) if dim > 2: original_dims = self.weights_dict[IR_node.name]['weights'].shape dims = [ i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:] ] + [-1] self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], dims) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], original_dims) def emit_Conv(self, IR_node): self.add_body( 1, "n.{:<15} = L.Convolution(n.{}, kernel_size={}, stride={}, num_output={}, pad_h={}, pad_w={}, group={}, \ bias_term={}, ntop=1)".format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('kernel_shape')[0], IR_node.get_attr('strides')[1], IR_node.get_attr('kernel_shape')[-1], IR_node.get_attr('pads')[1], IR_node.get_attr('pads')[2], IR_node.get_attr('group', 1), IR_node.get_attr('use_bias', False))) dim = len(IR_node.get_attr('strides')) - 2 if self.weight_loaded: self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim))) self.weights_dict[IR_node.variable_name] = self.weights_dict.pop( IR_node.name) # keys = [] # for key in self.weights_dict[IR_node.name].keys(): # keys.append(key) # print("=======Layer: {}, keys: {}".format(IR_node.name, keys)) def emit_Pool(self, IR_node): pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': pooling_type = P.Pooling.MAX elif pooling_type == 'AVG': pooling_type = P.Pooling.AVE elif pooling_type == 'STOCHASTIC': pooling_type = P.Pooling.STOCHASTIC else: raise ValueError if IR_node.layer.attr['global_pooling'].b: self.used_layers.add('GlobalPooling') self.add_body( 1, "n.{:<15} = L.Pooling(n.{}, pool={}, stride={}, global_pooling=True, ntop=1)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), pooling_type, IR_node.get_attr('strides')[1])) else: self.add_body( 1, "n.{:<15} = L.Pooling(n.{}, pool={}, kernel_size={}, pad_h={}, pad_w={}, stride={}, ntop=1)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), pooling_type, IR_node.get_attr('kernel_shape')[1], IR_node.get_attr('pads')[1], IR_node.get_attr('pads')[2], IR_node.get_attr('strides')[1])) def emit_UNKNOWN(self, IR_node): print(IR_node.IR_layer.name) def emit_DataInput(self, IR_node): shape = self._shapeToStr(IR_node.get_attr('shape')) shape = [shape[0], shape[-1]] + shape[1:-1] self.add_body( 1, "n.{:<15} = L.Input(shape=[dict(dim={})], ntop=1)".format( IR_node.variable_name, shape)) def emit_Dropout(self, IR_node): in_place = True self.add_body( 1, "n.{:<15} = L.Dropout(n.{}, dropout_ratio={} , in_place={}, ntop=1)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), 1 - IR_node.get_attr('keep_prob'), in_place)) def emit_FullyConnected(self, IR_node): self.add_body( 1, "n.{:<15} = L.InnerProduct(n.{}, num_output={}, bias_term={}, ntop=1)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr["units"].i, IR_node.get_attr('use_bias', False))) if self.weight_loaded: self.check_if_need_transpose(IR_node) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], (1, 0)) self.weights_dict[IR_node.variable_name] = self.weights_dict.pop( IR_node.name) def emit_BatchNorm(self, IR_node): self.add_body( 1, "n.{:<15} = L.BatchNorm(n.{}, eps={}, use_global_stats={}, ntop=1)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('epsilon'), self.phase == 'test')) scale_layer_var_name = IR_node.variable_name + "_scale" # Since the scale layer is "almost part" of the bn layer, we can safely use in_place here. self.add_body( 1, "n.{:<15} = L.Scale(n.{}, bias_term={}, in_place=True, ntop=1)". format(scale_layer_var_name, IR_node.variable_name, IR_node.get_attr('bias', False))) if self.weight_loaded: self.weights_dict[scale_layer_var_name] = dict() if 'scale' in self.weights_dict[IR_node.name]: self.weights_dict[scale_layer_var_name][ 'scale'] = self.weights_dict[IR_node.name]['scale'] #self.weights_dict[IR_node.name].pop('scale', None) self.weights_dict[IR_node.name]['scale'] = 1 self.weights_dict[scale_layer_var_name][ 'bias'] = self.weights_dict[IR_node.name]['bias'] self.weights_dict[IR_node.name].pop('bias', None) self.weights_dict[IR_node.variable_name] = self.weights_dict.pop( IR_node.name) IR_node.real_name = IR_node.name + "_scale" def emit_LRN(self, IR_node): self.add_body( 1, "n.{:<15} = L.LRN(n.{}, local_size={}, alpha={}, beta={}, k={})". format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('size') * 2 - 1, IR_node.get_attr('alpha'), IR_node.get_attr('beta'), IR_node.get_attr('k'))) def emit_Add(self, IR_node): input_layers = ', '.join( ('n.' + self.IR_graph.get_parent(IR_node.name, [num]).real_variable_name) for num in range(0, len(IR_node.in_edges))) self.add_body( 1, "n.{:<15} = L.Eltwise({}, operation=1, ntop=1)".format( IR_node.variable_name, input_layers, )) def emit_Flatten(self, IR_node): IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name def emit_Squeeze(self, IR_node): IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name def emit_Concat(self, IR_node): axis_array = (2, 3, 1, 0) axis = axis_array.index(IR_node.get_attr('axis')) input_layers = ', '.join( ('n.' + self.IR_graph.get_node(edge).real_variable_name) for edge in IR_node.in_edges) self.add_body( 1, "n.{:<15} = L.Concat({}, axis={})".format(IR_node.variable_name, input_layers, axis)) # def emit_Tanh(self, IR_node): # self._emit_activation(IR_node, 'ops.tanh') def emit_Relu(self, IR_node): in_place = True self.add_body( 1, "n.{:<15} = L.ReLU(n.{}, in_place={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), in_place)) def emit_PRelu(self, IR_node): in_place = True self.add_body( 1, "n.{:<15} = L.PReLU(n.{}, in_place={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), in_place)) def emit_Softmax(self, IR_node): self.add_body( 1, "n.{:<15} = L.Softmax(n.{}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node))) def emit_Pad(self, IR_node): IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name
class Keras2Emitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16 : "float16", graph_pb2.DT_FLOAT32 : "float32", graph_pb2.DT_FLOAT64 : "float64", graph_pb2.DT_INT16 : "int16", graph_pb2.DT_INT32 : "int32", graph_pb2.DT_INT64 : "int64", graph_pb2.DT_UINT8 : "uint8", graph_pb2.DT_UINT16 : "uint16" } def __init__(self, model): super(Keras2Emitter, self).__init__() from six import string_types as _string_types if isinstance(model, _string_types): network_path = model else: network_path = model[0] weight_path = model[1] self.IR_graph = IRGraph(network_path) self.IR_graph.build() @property def header_code(self): return """import keras from keras.models import Model from keras import layers import keras.backend as K def load_weights(model, weight_file): import numpy as np if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() for layer in model.layers: if layer.name in weights_dict: cur_dict = weights_dict[layer.name] current_layer_parameters = list() if layer.__class__.__name__ == "BatchNormalization": if 'scale' in cur_dict: current_layer_parameters.append(cur_dict['scale']) if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) current_layer_parameters.extend([cur_dict['mean'], cur_dict['var']]) elif layer.__class__.__name__ == "SeparableConv2D": current_layer_parameters = [cur_dict['depthwise_filter'], cur_dict['pointwise_filter']] if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) else: # rot weights current_layer_parameters = [cur_dict['weights']] if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) model.get_layer(layer.name).set_weights(current_layer_parameters) return model def KitModel(weight_file = None): """ def gen_code(self, phase): self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("KerasEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body(1, "{:<15} = Model(inputs = [{}], outputs = [{}])".format( "model", ', '.join([self.IR_graph.get_node(i).real_variable_name for i in self.IR_graph.input_layers]), ', '.join([self.IR_graph.get_node(i).real_variable_name for i in self.IR_graph.output_layers]))) self.add_body(1, ["load_weights(model, weight_file)", "return model"]) for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.body_code @staticmethod def shapeToStr(shapes): return ', '.join('%s' % i for i in filter(lambda x:x > 0, shapes)) def _emit_activation(self, IR_node, op): self.add_body(1, "{:<15} = layers.Activation(name = '{}', activation = '{}')({})".format( IR_node.variable_name, IR_node.name, op, self.parent_variable_name(IR_node))) def _emit_merge(self, IR_node, func): inputs = ', '.join('%s' % self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) axis = ' axis = {},'.format(IR_node.get_attr('axis')) if 'axis' in IR_node.layer.attr else "" self.add_body(1, "{:<15} = layers.{}(name = '{}',{} inputs = [{}])".format( IR_node.variable_name, func, IR_node.name, axis, inputs)) @staticmethod def _convert_padding(padding): padding = convert_onnx_pad_to_tf(padding)[1:-1] for idx, pad in enumerate(padding): padding[idx] = tuple(pad) padding = tuple(padding) return padding def _defuse_padding(self, IR_node): auto_pad = IR_node.get_attr('auto_pad') if auto_pad: input_node = self.parent_variable_name(IR_node) if auto_pad == 'VALID': padding = 'valid' elif auto_pad.startswith("SAME"): padding = 'same' else: assert False return input_node, padding else: padding = IR_node.get_attr("pads") padding = self._convert_padding(padding) if is_valid_padding(padding) == False: input_node = IR_node.variable_name + '_input' self.add_body(1, "{:<15} = layers.ZeroPadding{}D(padding = {})({})".format( input_node, len(padding), padding, self.parent_variable_name(IR_node))) else: input_node = self.parent_variable_name(IR_node) return input_node, 'valid' def _emit_convolution(self, IR_node, conv_type): assert IR_node.get_attr('group', 1) == 1 filters = IR_node.get_attr('kernel_shape')[-1] filters_str = 'filters = {}'.format(filters) if conv_type.startswith('layer') else 'depth_multiplier = {}'.format(filters) input_node, padding = self._defuse_padding(IR_node) self.add_body(1, "{:<15} = {}(name='{}', {}, kernel_size=({}), strides=({}), padding='{}', use_bias={})({})".format( IR_node.variable_name, conv_type, IR_node.name, filters_str, tuple(IR_node.get_attr('kernel_shape')[:-2]), tuple(IR_node.get_attr('strides')[1:-1]), padding, IR_node.get_attr('use_bias'), input_node)) def emit_Conv(self, IR_node): dim = len(IR_node.get_attr('kernel_shape')) - 2 return self._emit_convolution(IR_node, 'layers.Conv{}D'.format(dim)) ############# # Operators # ############# def emit_UNKNOWN(self, IR_node): print (IR_node.name) def emit_Add(self, IR_node): self._emit_merge(IR_node, "add") def emit_DataInput(self, IR_node): shape_str = IRGraph.shapeToStr(IR_node.IR_layer.attr["shape"].shape) dtype_str = ", dtype = '{}'".format(self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'dtype' in IR_node.layer.attr else "" self.add_body(1, "{:<15} = layers.Input(name = '{}', shape = ({},) {})".format( IR_node.variable_name, IR_node.name, shape_str, dtype_str)) def emit_Dropout(self, IR_node): seed = 'None' if 'seed' in IR_node.IR_layer.attr: seed = IR_node.IR_layer.attr['seed'].i self.add_body(1, "{:<15} = layers.Dropout(name = '{}', rate = {}, seed = {})({})".format( IR_node.variable_name, IR_node.name, IR_node.IR_layer.attr["keep_prob"].f, seed, self.parent_variable_name(IR_node))) def emit_FullyConnected(self, IR_node): self.add_body(1, "{:<15} = layers.Dense(name = '{}', units = {}, use_bias = {})({})".format( IR_node.variable_name, IR_node.name, IR_node.layer.attr["units"].i, IR_node.layer.attr["use_bias"].b, self.parent_variable_name(IR_node))) def emit_Flatten(self, IR_node): self.used_layers.add('Flatten') self.add_body(1, "{:<15} = __flatten(name = '{}', input = {})".format( IR_node.variable_name, IR_node.name, self.parent_variable_name(IR_node))) def emit_Pool(self, IR_node): dim = len(IR_node.get_attr("strides")) - 2 pooling_type = IR_node.get_attr('pooling_type') if pooling_type == "MAX": pool_name = "MaxPooling{}D".format(dim) elif pooling_type == "AVG": pool_name = "AveragePooling{}D".format(dim) else: assert False if IR_node.layer.attr['global_pooling'].b: self.add_body(1, "{:<15} = layers.Global{}(name = '{}')({})".format( IR_node.variable_name, pool_name, IR_node.name, self.parent_variable_name(IR_node))) else: dilations = IR_node.get_attr('dilations') if dilations: for e in IR_node.get_attr('dilations'): assert e == 1 pool_size = IR_node.get_attr('kernel_shape')[1:-1] pool_size = ', '.join('%s' % i for i in pool_size) strides = IR_node.get_attr('strides')[1:-1] strides = ', '.join('%s' % i for i in strides) input_node, padding = self._defuse_padding(IR_node) self.add_body(1, "{:<15} = layers.{}(name = '{}', pool_size = ({}), strides = ({}), padding = '{}')({})".format( IR_node.variable_name, pool_name, IR_node.name, pool_size, strides, padding, input_node)) def emit_Reshape(self, IR_node): shape_str = self.shapeToStr(IR_node.IR_layer.attr["shape"].list.i) self.add_body(1, "{:<15} = layers.Reshape(name = '{}', target_shape = ({},))({})".format( IR_node.variable_name, IR_node.name, shape_str, self.parent_variable_name(IR_node))) def emit_Tanh(self, IR_node): self._emit_activation(IR_node, 'tanh') def emit_Relu(self, IR_node): self._emit_activation(IR_node, 'relu') def emit_Softmax(self, IR_node): self._emit_activation(IR_node, 'softmax') def emit_Sigmoid(self, IR_node): self._emit_activation(IR_node, 'sigmoid') def emit_Embedding(self, IR_node): self.add_body(1, "{:<15} = layers.Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format( IR_node.variable_name, IR_node.get_attr('input_dim'), IR_node.IR_layer.attr['output_dim'].i, IR_node.IR_layer.attr['mask_zero'].b, IR_node.in_edges[0])) def emit_RNNs(self, IR_node, func): # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "{:<15} = layers.{}(units = {}, use_bias = {} {})({})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Concat(self, IR_node): self._emit_merge(IR_node, "concatenate") def emit_BatchNorm(self, IR_node): axis = IR_node.layer.attr['axis'].i if 'axis' in IR_node.layer.attr else -1 self.add_body(1, "{:<15} = layers.BatchNormalization(name = '{}', axis = {}, epsilon = {}, center = {}, scale = {})({})".format( IR_node.variable_name, IR_node.name, axis, IR_node.layer.attr['epsilon'].f, IR_node.layer.attr['bias'].b, IR_node.layer.attr['scale'].b, self.parent_variable_name(IR_node))) def emit_Pad(self, IR_node): mode = IR_node.get_attr('mode', 'constant') if mode == "constant": func = "ZeroPadding" else: print (mode) raise NotImplementedError() dim = len(IR_node.get_attr('pads')) // 2 - 2 padding = self._convert_padding(IR_node.get_attr('pads')) self.add_body(1, "{:<15} = layers.{}{}D(name='{}', padding={})({})".format( IR_node.variable_name, func, dim, IR_node.name, padding, self.parent_variable_name(IR_node))) def emit_Squeeze(self, IR_node): self.emit_Flatten(IR_node) def emit_ReduceMean(self, IR_node): axes = ', '.join('%s' % i for i in IR_node.get_attr('axes')) self.add_body(1,"{:<15} = layers.Lambda(lambda x: K.mean(x, axis=[{}], keepdims={}))({})".format( IR_node.variable_name, axes, IR_node.get_attr('keepdims'), self.parent_variable_name(IR_node))) def emit_LRN(self, IR_node): self.used_layers.add(IR_node.type) self.add_body(1, "{:<15} = LRN(size = {}, alpha = {}, beta = {}, k = {}, name = '{}')({})".format( IR_node.variable_name, IR_node.get_attr('size'), IR_node.get_attr('alpha'), IR_node.get_attr('beta'), IR_node.get_attr('k'), IR_node.name, self.parent_variable_name(IR_node))) def emit_SeparableConv(self, IR_node): assert len(IR_node.get_attr("strides")) == 4 return self._emit_convolution(IR_node, "layers.SeparableConv2D") def emit_Relu6(self, IR_node): self.add_body(1, "{:<15} = layers.Activation(keras.applications.mobilenet.relu6, name = '{}')({})".format( IR_node.variable_name, IR_node.name, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)) def emit_DepthwiseConv(self, IR_node): return self._emit_convolution(IR_node, 'keras.applications.mobilenet.DepthwiseConv2D') def _layer_Flatten(self): self.add_body(0, ''' def __flatten(name, input): if input.shape.ndims > 2: return layers.Flatten(name = name)(input) else: return input ''') def _layer_LRN(self): self.add_body(0, ''' from keras.layers.core import Layer class LRN(Layer): def __init__(self, size=5, alpha=0.0005, beta=0.75, k=2, **kwargs): self.n = size self.alpha = alpha self.beta = beta self.k = k super(LRN, self).__init__(**kwargs) def build(self, input_shape): self.shape = input_shape super(LRN, self).build(input_shape) def call(self, x, mask=None): half_n = self.n - 1 squared = K.square(x) scale = self.k norm_alpha = self.alpha / (2 * half_n + 1) if K.image_dim_ordering() == "th": b, f, r, c = self.shape squared = K.expand_dims(squared, 0) squared = K.spatial_3d_padding(squared, padding=((half_n, half_n), (0, 0), (0,0))) squared = K.squeeze(squared, 0) for i in range(half_n*2+1): scale += norm_alpha * squared[:, i:i+f, :, :] else: b, r, c, f = self.shape squared = K.expand_dims(squared, -1) squared = K.spatial_3d_padding(squared, padding=((0, 0), (0,0), (half_n, half_n))) squared = K.squeeze(squared, -1) for i in range(half_n*2+1): scale += norm_alpha * squared[:, :, :, i:i+f] scale = K.pow(scale, self.beta) return x / scale def compute_output_shape(self, input_shape): return input_shape''')
class PytorchEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16: "torch.float16", graph_pb2.DT_FLOAT32: "torch.float32", graph_pb2.DT_FLOAT64: "torch.float64", graph_pb2.DT_INT16: "torch.int16", graph_pb2.DT_INT32: "torch.int32", graph_pb2.DT_INT64: "torch.int64", graph_pb2.DT_UINT8: "torch.uint8", graph_pb2.DT_UINT16: "torch.uint16" } # Base Functions def __init__(self, model): super(PytorchEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] weight_path = model[1] self.init_code = str() self.IR_graph = IRGraph(network_path) self.IR_graph.build() self._load_weights(weight_path) folder = Folder(self.IR_graph, self.weights_dict) folder.fold() def run(self, dstNetworkPath, dstWeightPath=None, phase='test'): super(PytorchEmitter, self).run(dstNetworkPath, dstWeightPath, phase) if self.weight_loaded: self.save_weights(self.weights_dict, dstWeightPath) def add_init(self, indent, codes): if isinstance(codes, _string_types): codes = [codes] for code in codes: self.init_code += (" " * indent) + code + '\n' def parent_variable_name(self, IR_node, path=[0], weight_type='weights'): if not IR_node.in_edges and IR_node.name in self.weights_dict.keys(): self.weights_dict[IR_node.name][weight_type] = self.weights_dict[ IR_node.name][weight_type] return "torch.from_numpy(__weights_dict['{}']['{}'])".format( IR_node.name, weight_type) return super(PytorchEmitter, self).parent_variable_name(IR_node, path) @property def header_code(self): return """import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import math __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file, allow_pickle=True).item() except: weights_dict = np.load(weight_file, allow_pickle=True, encoding='bytes').item() return weights_dict class KitModel(nn.Module): """ def gen_code(self, phase): self.add_init( 1, """ def __init__(self, weight_file): super(KitModel, self).__init__() global __weights_dict __weights_dict = load_weights(weight_file) """) self.add_body(1, "def forward(self, _x):") for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(current_node) if line: self.add_body(2, line) else: print("Pytorch Emitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body( 2, "return {}".format(', '.join([ '_' + self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers if self.IR_graph.get_node(name).type != 'Pack' ]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() self.add_body(0, "") for code in self.layers_codes.values(): self.add_body(0, code) return self.header_code + '\n' + self.init_code + '\n' + self.body_code def _defuse_padding(self, IR_node, extra_str=""): input_node = self.parent_variable_name(IR_node) if IR_node.get_attr('auto_pad') == 'VALID': return input_node if is_valid_padding(IR_node.get_attr("pads")) == True: return input_node padding = self._convert_padding(IR_node) input_node = IR_node.variable_name + '_pad' self.add_body( 2, "_{:<15} = F.pad(_{}, {}{})".format( input_node, self.parent_variable_name(IR_node), padding, extra_str)) return input_node def emit_Conv(self, IR_node): self.used_layers.add('Conv') dim = len(IR_node.get_attr('strides')) - 2 in_channels = IR_node.get_attr('kernel_shape')[-2] filter = IR_node.get_attr('kernel_shape')[-1] kernel = IR_node.get_attr('kernel_shape')[:-2] strides = IR_node.get_attr('strides')[1:-1] if IR_node.type == 'DepthwiseConv': group = in_channels filter *= group else: group = IR_node.get_attr('group', 1) self.add_init( 2, "self._{} = self.__conv({}, name='{}', in_channels={}, out_channels={}, kernel_size={}, stride={}, groups={}, bias={})" .format( IR_node.variable_name, dim, IR_node.name, in_channels, filter, tuple(kernel), tuple(strides), # padding, group, IR_node.get_attr('use_bias'))) input_node = self._defuse_padding(IR_node) code = "_{:<15} = self._{}(_{})".format(IR_node.variable_name, IR_node.variable_name, input_node) if self.weight_loaded: if IR_node.type == 'DepthwiseConv': self.weights_dict[IR_node.name]['weights'] = np.swapaxes( self.weights_dict[IR_node.name]['weights'], -1, -2) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim))) return code @staticmethod def is_ceil_mode(pads): lens = len(pads) for i in range(lens // 2 + 1, lens - 1): if pads[i] == pads[i - lens // 2]: return False else: return True def emit_Pool(self, IR_node): dim = len(IR_node.get_attr('strides')) - 2 if IR_node.get_attr('pooling_type') == "MAX": pool_name = "max_pool{}d".format(dim) # exstr = ", value=float('-Inf')" elif IR_node.get_attr('pooling_type') == "AVG": pool_name = "avg_pool{}d".format(dim) # exstr = "" else: raise ValueError() if IR_node.layer.attr['global_pooling'].b: code = "_{:<15} = F.{}(input = _{}, kernel_size = {}.size()[2:])".format( IR_node.variable_name, pool_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node)) return code else: if IR_node.get_attr('pooling_type') == "MAX": # Change to padding defuse input_node = self._defuse_padding(IR_node, ", value=float('-inf')") for e in IR_node.get_attr('dilations', []): assert e == 1 pool_size = IR_node.get_attr('kernel_shape')[1:-1] strides = IR_node.get_attr('strides')[1:-1] code = "_{:<15} = F.{}(_{}, kernel_size={}, stride={}, padding={}, ceil_mode={})".format( IR_node.variable_name, pool_name, input_node, tuple(pool_size), tuple(strides), 0, False) return code elif IR_node.get_attr('pooling_type') == "AVG": for e in IR_node.get_attr('dilations', []): assert e == 1 pool_size = IR_node.get_attr('kernel_shape')[1:-1] strides = IR_node.get_attr('strides')[1:-1] padding = IR_node.get_attr('pads')[1:dim] ceil_mode = self.is_ceil_mode(IR_node.get_attr('pads')) # input_node = self._defuse_padding(IR_node, exstr) code = "_{:<15} = F.{}(_{}, kernel_size={}, stride={}, padding={}, ceil_mode={}, count_include_pad=False)".format( IR_node.variable_name, pool_name, self.parent_variable_name(IR_node), tuple(pool_size), tuple(strides), tuple(padding), ceil_mode) return code else: raise ValueError() def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_DataInput(self, IR_node): # Ignore it in Pytorch IR_node.real_name = 'x' def emit_Dropout(self, IR_node): code = "_{:<15} = F.dropout(input = _{}, p = {}, training = self.training, inplace = True)".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr["keep_prob"].f) return code def check_if_need_transpose(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) while parent.type == 'Flatten' or parent.type == 'Dropout': parent = self.IR_graph.get_parent(parent.name, [0]) dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) if dim > 2: original_dims = self.weights_dict[IR_node.name]['weights'].shape dims = [ i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:] ] + [-1] self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], dims) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], original_dims) def emit_FullyConnected(self, IR_node): self.used_layers.add(IR_node.type) in_features = 1 for i in self.IR_graph.get_parent( IR_node.name, [0]).layer.attr['_output_shapes'].list.shape[0].dim[1:]: in_features *= i.size if IR_node.get_attr('in_features') != None: in_features = IR_node.get_attr('in_features') self.add_init( 2, "self._{} = self.__dense(name = '{}', in_features = {}, out_features = {}, bias = {})" .format(IR_node.variable_name, IR_node.name, in_features, IR_node.layer.attr["units"].i, IR_node.IR_layer.attr["use_bias"].b)) input_node = self.parent_variable_name(IR_node) if len( self.IR_graph.get_parent( IR_node.name, [0]).get_attr('_output_shapes')[0].dim) > 2: input_node = "{}.view({}.size(0), -1)".format( input_node, input_node) code = "_{:<15} = self._{}(_{})".format(IR_node.variable_name, IR_node.variable_name, input_node) if self.weight_loaded: self.check_if_need_transpose(IR_node) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], (1, 0)) return code def emit_Flatten(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name code = "_{:<15} = _{}.view(_{}.size(0), -1)".format( IR_node.variable_name, parent, parent) return code def emit_Reshape(self, IR_node): shape_list = IR_node.get_attr('shape') shape_str = ','.join([str(int(i)) for i in shape_list]) code = "_{:<15} = torch.reshape(input = _{}, shape = ({}))".format( IR_node.variable_name, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name, shape_str) return code def emit_Tanh(self, IR_node): code = "_{:<15} = F.tanh(_{})".format( IR_node.variable_name, self.parent_variable_name(IR_node, [0])) return code def emit_Relu(self, IR_node): code = "_{:<15} = F.relu(_{})".format( IR_node.variable_name, self.parent_variable_name(IR_node, [0])) return code def emit_LeakyRelu(self, IR_node): code = "_{:<15} = F.leaky_relu(_{}, negative_slope={})".format( IR_node.variable_name, self.parent_variable_name(IR_node, [0]), IR_node.get_attr('alpha')) return code def emit_Relu6(self, IR_node): code = "_{:<15} = F.relu6(_{})".format( IR_node.variable_name, self.parent_variable_name(IR_node, [0])) return code def emit_Softmax(self, IR_node): code = "_{:<15} = F.softmax(_{})".format( IR_node.variable_name, self.parent_variable_name(IR_node, [0])) return code def emit_Sigmoid(self, IR_node): code = "_{:<15} = F.sigmoid(_{})".format( IR_node.variable_name, self.parent_variable_name(IR_node)) return code def emit_Embedding(self, IR_node): self.used_layers.add("Embedding") self.add_init( 2, "self._{} = self.__embedding('{}', num_embeddings={}, embedding_dim={})" .format( IR_node.variable_name, IR_node.name, IR_node.get_attr('input_dim'), #2-D IR_node.get_attr('output_dim'))) code = "_{:<15} = self._{}(_{})".format( IR_node.variable_name, IR_node.variable_name, "torch.LongTensor(np.array({}))".format( self.parent_variable_name(IR_node))) return code def emit_RNNs(self, IR_node, func): raise NotImplementedError() # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "_{:<15} = {}(units = {}, use_bias = {} {})(_{})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): code = "_{:<15} = _{} + _{}".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_Sub(self, IR_node): code = "_{:<15} = _{}".format( IR_node.variable_name, ' - '.join('_' + self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges)))) return code def emit_Mul(self, IR_node): code = "_{:<15} = _{}".format( IR_node.variable_name, ' * '.join('_' + self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges)))) return code def emit_MatMul(self, IR_node): code = "_{:<15} = torch.matmul(_{})".format( IR_node.variable_name, ' , '.join('_%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges)) return code def emit_Constant(self, IR_node): if IR_node.get_attr('value'): value = IR_node.get_attr('value') if not isinstance(value, list): value = [value] code = "self._{:<15} = torch.autograd.Variable(torch.Tensor(_{}), requires_grad=False)".format( IR_node.variable_name, value) else: code = "self._{:<15} = torch.autograd.Variable(torch.from_numpy(__weights_dict['_{}']['value']), requires_grad=False)".format( IR_node.variable_name, IR_node.name) # self.add_init(2, "self.{:<15} = torch.from_numpy(__weights_dict['{}']['value'])".format( # IR_node.variable_name, # IR_node.name)) IR_node.real_name = "self." + IR_node.variable_name return code def _convert_axis(self, IR_node, axis): ndim = len( self.IR_graph.get_parent(IR_node.name, [0]).get_attr('_output_shapes')[0].dim) if axis == 0: return 0 elif axis == ndim - 1: return 1 else: return axis + 1 def emit_Concat(self, IR_node): axis = self._convert_axis(IR_node, IR_node.get_attr('axis')) code = "_{:<15} = torch.cat(({}), {})".format( IR_node.variable_name, ', '.join('_' + self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges))), axis, ) return code def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2 output_shape = IR_node.layer.attr['_output_shapes'].list.shape[0] if IR_node.get_attr('data_format', "NHWC") == "NCHW": num_features = output_shape.dim[1].size else: num_features = output_shape.dim[-1].size # fix so that we don't end up with untrainable batch norm layers momentum = IR_node.layer.attr['momentum'].f momentum = 0.01 if momentum < 1e-10 else momentum self.add_init( 2, "self._{} = self.__batch_normalization({}, '{}', num_features={}, eps={}, momentum={})" .format( IR_node.variable_name, dim, IR_node.name, num_features, IR_node.layer.attr['epsilon'].f, momentum, )) code = "_{:<15} = self._{}(_{})".format( IR_node.variable_name, IR_node.variable_name, self.parent_variable_name(IR_node)) return code def emit_Scale(self, IR_node): self.used_layers.add(IR_node.type) dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2 self.add_init( 2, "self._{} = self.__scale({}, '{}', num_features={})".format( IR_node.variable_name, dim, IR_node.name, IR_node.layer. attr['_output_shapes'].list.shape[0].dim[-1].size)) code = "_{:<15} = self._{}(_{})".format( IR_node.variable_name, IR_node.variable_name, self.parent_variable_name(IR_node)) return code def emit_Squeeze(self, IR_node): code = "_{:<15} = torch.squeeze(_{})".format( IR_node.variable_name, self.parent_variable_name(IR_node)) return code @staticmethod def _convert_padding(IR_node): padding = IR_node.get_attr('pads') padding = convert_onnx_pad_to_tf(padding)[1:-1] new_padding = [] for pad in padding: new_padding.insert(0, pad) return tuple(np.array(new_padding).reshape(-1).tolist()) def emit_Pad(self, IR_node): if IR_node.get_attr('mode').lower() == 'constant': mode = "mode = 'constant', value = {}".format(0) elif IR_node.get_attr('mode').lower() == 'reflect': mode = "mode = 'reflect'" elif IR_node.get_attr('mode').upper() == 'SYMMETRIC': mode = "mode = 'replicate'" else: assert False padding = self._convert_padding(IR_node) code = "_{:<15} = F.pad(_{}, {}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), padding, mode) return code def emit_ReduceMean(self, IR_node): axes = [ self._convert_axis(IR_node, x) for x in IR_node.get_attr('axes') ] input_node = self.parent_variable_name(IR_node) codes = [] for axis in sorted(axes, reverse=True): code = "_{:<15} = torch.mean(_{}, {}, {})".format( IR_node.variable_name, input_node, axis, IR_node.get_attr("keepdims")) codes.append(code) input_node = IR_node.variable_name return codes def emit_LRN(self, IR_node): code = "_{:<15} = F.local_response_norm(_{}, size={}, alpha={}, beta={}, k={})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('size') * 2 - 1, IR_node.get_attr('alpha'), IR_node.get_attr('beta'), IR_node.get_attr('k', 1)) return code def emit_DepthwiseConv(self, IR_node): return self.emit_Conv(IR_node) def emit_Const(self, IR_node): if 'dtype' in IR_node.layer.attr: dtype_str = "dtype={}".format( self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'int' in dtype_str: code = "_{:<15} = torch.tensor({}, {})".format( IR_node.variable_name, IR_node.layer.attr['value'].i, dtype_str) else: code = "_{:<15} = torch.tensor({}, {})".format( IR_node.variable_name, IR_node.layer.attr['value'].f, dtype_str) else: dtype_str = "dtype=torch.float32" code = "_{:<15} = torch.tensor({}, {})".format( IR_node.variable_name, IR_node.layer.attr['value'].f, dtype_str) return code def emit_Shape(self, IR_node): code = "_{:<15} = torch.Tensor(list(_{}.size()))".format( IR_node.variable_name, self.parent_variable_name(IR_node)) return code def emit_Pack(self, IR_node): code = "_{:<15} = {}".format( IR_node.variable_name, '[' + ','.join('_%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges) + ']', ) return code def emit_Slice(self, IR_node): starts = IR_node.get_attr('starts') if len(starts) > 1: starts = [starts[0], starts[-1]] + starts[1:-1] ends = IR_node.get_attr('ends') if len(ends) > 1: ends = [ends[0], ends[-1]] + ends[1:-1] extra_str = "" for idx, _ in enumerate(starts): if idx: extra_str += ", " extra_str += "{}:".format(starts[idx]) if ends[idx]: extra_str += "{}".format(ends[idx]) shrink_mask = IR_node.get_attr('shrink_axis_mask') if shrink_mask: mask = [int(s) for s in bin(shrink_mask)[2:][::-1]] shrink_str = '[' + ','.join(':' if bit == 0 else '0' for bit in mask) + ']' else: shrink_str = '' code = "_{:<15} = _{}[{}]{}".format(IR_node.variable_name, self.parent_variable_name(IR_node), extra_str, shrink_str) return code def emit_Split(self, IR_node): if isinstance(IR_node.get_attr('split'), list): split_str = IR_node.get_attr('split') else: num_split = IR_node.get_attr('split') split_str = "math.ceil({}.shape[{}]/{})".format( self.parent_variable_name(IR_node), IR_node.get_attr('axis'), num_split) code = "_{:<15} = torch.split(_{}, {}, dim={})".format( IR_node.variable_name, self.parent_variable_name(IR_node), split_str, IR_node.get_attr('axis'), ) return code def emit_Unstack(self, IR_node): code = "_{:<15} = torch.unbind(_{}, dim={})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('axis')) return code def emit_Fill(self, IR_node): code = "_{:<15} = torch.full(_{}.int().numpy().tolist(), {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('value')) return code def emit_Gather(self, IR_node): pass def emit_Unsqueeze(self, IR_node): code = "_{:<15} = _{}.unsqueeze({})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('axes')[0]) return code def emit_Transpose(self, IR_node): code = "_{:<15} = _{}.permute(_{})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_Minimum(self, IR_node): code = "_{:<15} = torch.min(_{}, _{})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_Maxmum(self, IR_node): code = "_{:<15} = torch.max(_{}, _{})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_Square(self, IR_node): code = "_{:<15} = _{}.pow(2)".format( IR_node.variable_name, self.parent_variable_name(IR_node)) return code def emit_PRelu(self, IR_node): self.used_layers.add(IR_node.type) self.add_init( 2, "self._{} = self.__prelu(name='{}')".format( IR_node.variable_name, IR_node.name)) code = "_{:<15} = self._{}(_{})".format( IR_node.real_variable_name, IR_node.variable_name, self.parent_variable_name(IR_node)) if self.weight_loaded: self.weights_dict[IR_node.name][ 'weights'.decode()] = self.weights_dict[IR_node.name]['gamma'] return code def emit_Cast(self, IR_node): dstType = IR_node.get_attr('dstType') if dstType == 'float': dst = 'torch.FloatTensor' elif dstType == 'double': dst = 'torch.DoubleTensor' elif dstType == 'int': dst = 'torch.IntTensor' code = "_{:<15} = _{}.type({})".format( IR_node.real_variable_name, self.parent_variable_name(IR_node), dst) return code def emit_Scope(self, IR_node): input_vars = [ '_' + self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges)) ] code = "_{:<15} = self.__{}({})".format(IR_node.real_variable_name, IR_node.pattern, ', '.join(input_vars)) self._gen_scope_code(IR_node) return code def _gen_scope_code(self, scope_node): def _scope_func(scope_name, params, code, return_var): code = """ def __{}({}): {} return {} """.format(scope_name, params, code, ', '.join(return_var)) return code if not self.layers_codes.get(scope_node.pattern, None): body_code = str() for node_name in scope_node.topology_list: node = self.IR_graph.get_node(node_name) node_type = node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(node) if line != None: body_code += " " + line + '\n' else: print("PytorchEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(node) # param_code does not need parameter slice. input_params = scope_node.input_params input_params.insert(0, "self") param_code = ', '.join(input_params) function_code = _scope_func(scope_node.pattern, param_code, body_code, scope_node.return_variables) self.layers_codes[scope_node.pattern] = function_code def _layer_PRelu(self): self.add_body( 0, """ @staticmethod def __prelu(name): class CompliantPReLU(nn.Module): def __init__(self, num_parameters=1, init=0.25): super(CompliantPReLU, self).__init__() self.prelu = torch.nn.PReLU(num_parameters, init) def forward(self, x): # standard PReLU for training. Use ReLU only during eval if self.training: return self.prelu(x) else: # Warning. Logic not exported (detached). Layer needs to have same dimensions each pass m_shape = [-1 if idx == 1 else 1 for idx in range(len(x.data.detach().shape))] m = self.prelu.weight.view(*m_shape) return F.relu(x) - m*F.relu(-x) @property def weight(self): return self.prelu.weight @property def num_paramters(self): return self.prelu.num_parameters @property def state_dict(self): return self.prelu.state_dict weights = torch.from_numpy(__weights_dict[name]['weights']) layer = CompliantPReLU(num_parameters=len(weights)) layer.state_dict()['weight'].copy_(weights) return layer """) def _layer_Embedding(self): self.add_body( 0, """ @staticmethod def __embedding(name, **kwargs): layer = nn.Embedding(**kwargs) #shape layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) return layer """) def _layer_Conv(self): self.add_body( 0, """ @staticmethod def __conv(dim, name, **kwargs): if dim == 1: layer = nn.Conv1d(**kwargs) elif dim == 2: layer = nn.Conv2d(**kwargs) elif dim == 3: layer = nn.Conv3d(**kwargs) else: raise NotImplementedError() layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) return layer""") def _layer_FullyConnected(self): self.add_body( 0, """ @staticmethod def __dense(name, **kwargs): layer = nn.Linear(**kwargs) layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) return layer""") def _layer_BatchNorm(self): self.add_body( 0, """ @staticmethod def __batch_normalization(dim, name, **kwargs): if dim == 0 or dim == 1: layer = nn.BatchNorm1d(**kwargs) elif dim == 2: layer = nn.BatchNorm2d(**kwargs) elif dim == 3: layer = nn.BatchNorm3d(**kwargs) else: raise NotImplementedError() if 'scale' in __weights_dict[name]: layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['scale'])) else: layer.weight.data.fill_(1) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) else: layer.bias.data.fill_(0) layer.state_dict()['running_mean'].copy_(torch.from_numpy(__weights_dict[name]['mean'])) layer.state_dict()['running_var'].copy_(torch.from_numpy(__weights_dict[name]['var'])) return layer""") def _layer_Scale(self): self.add_body( 0, """ # from torch.nn.parameter import Parameter class _Scale(nn.Module): def __init__(self, num_features, affine=True): super(KitModel._Scale, self).__init__() self.num_features = num_features self.affine = affine self.running_mean = torch.zeros(num_features) self.running_var = torch.ones(num_features) self.training = False self.eps = 1e-5 if self.affine: self.weight = nn.Parameter(torch.Tensor(num_features)) self.bias = nn.Parameter(torch.Tensor(num_features)) else: self.register_parameter('weight', None) self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): if self.affine: self.weight.data.uniform_() self.bias.data.zero_() def _check_input_dim(self, input): raise NotImplementedError def forward(self, input): self._check_input_dim(input) return F.batch_norm( input, self.running_mean, self.running_var, self.weight, self.bias, self.training, 0 , self.eps) class Scale1d(_Scale): def _check_input_dim(self, input): if input.dim() != 2 and input.dim() != 3: raise ValueError('expected 2D or 3D input (got {}D input)' .format(input.dim())) class Scale2d(_Scale): def _check_input_dim(self, input): if input.dim() != 4: raise ValueError('expected 4D input (got {}D input)' .format(input.dim())) class Scale3d(_Scale): def _check_input_dim(self, input): if input.dim() != 5: raise ValueError('expected 5D input (got {}D input)' .format(input.dim())) @staticmethod def __scale(dim, name, **kwargs): if dim == 1: layer = KitModel.Scale1d(**kwargs) elif dim == 2: layer = KitModel.Scale2d(**kwargs) elif dim == 3: layer = KitModel.Scale3d(**kwargs) else: raise NotImplementedError() if 'scale' in __weights_dict[name]: layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['scale'])) else: layer.weight.data.fill_(1) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) else: layer.bias.data.fill_(0) return layer""")
class CntkEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16: "np.float16", graph_pb2.DT_FLOAT32: "np.float32", graph_pb2.DT_FLOAT64: "np.float64", graph_pb2.DT_INT16: "np.int16", graph_pb2.DT_INT32: "np.int32", graph_pb2.DT_INT64: "np.int64", graph_pb2.DT_UINT8: "np.uint8", graph_pb2.DT_UINT16: "np.uint16" } def __init__(self, model): from six import string_types as _string_types super(CntkEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) self.IR_graph = IRGraph(network_path) super(CntkEmitter, self)._build() @property def header_code(self): return """import numpy as np import cntk from cntk import ops, layers from cntk.contrib.crosstalkcaffe.unimodel.cntkinstance import BlockApiSetup __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): global __weights_dict __weights_dict = load_weights(weight_file) """ def gen_code(self, phase='test'): self.phase = phase self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("CntkEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body( 1, "return {}".format(','.join([ self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers ]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.body_code @staticmethod def _shapeToStr(shapes): new_shape = filter(lambda x: x > -1, [dim.size for dim in shapes.dim]) return ', '.join('%s' % i for i in new_shape) @staticmethod def is_valid_padding(auto_pad, pads): """ different from utils.is_valid_padding """ if auto_pad: if auto_pad == 'VALID': return True elif auto_pad.startswith('SAME'): return False else: raise ValueError("Unknown padding type{}.".format(auto_pad)) else: lens = len(pads) assert lens % 2 == 0 for i in range(0, lens // 2): if pads[i] != 0: return False return True @staticmethod def is_ceil_mode(pads): lens = len(pads) for i in range(lens // 2 + 1, lens - 1): if pads[i] == pads[i - lens // 2]: return False else: return True def emit_Conv(self, IR_node): if self.weight_loaded: self.used_layers.add(IR_node.type) dim = len(IR_node.get_attr('strides')) - 2 padding = not self.is_valid_padding(IR_node.get_attr('auto_pad'), IR_node.get_attr('pads')) padding = [False] + [padding] * dim self.add_body( 1, "{:<15} = convolution({}, strides={}, auto_padding={}, dilation={}, groups={}, name='{}')" .format(IR_node.variable_name, self.parent_variable_name(IR_node), tuple(IR_node.get_attr('strides')[1:-1]), padding, tuple(IR_node.get_attr('dilations', [1])), IR_node.get_attr('group', 1), IR_node.name)) else: self.add_body( 1, "{:<15} = Convolution(name = '{}', num_filters = {}, filter_shape = ({}), strides = ({},), pad = {}, bias = {})({})\n" .format( IR_node.variable_name, IR_node.name, IR_node.get_attr('kernel_shape')[-1], ', '.join('%s' % i for i in IR_node.layer.attr["kernel_shape"].list.i[:-2]), ', '.join( '%s' % i for i in IR_node.layer.attr['strides'].list.i[1:-1]), IR_node.get_attr('auto_pad') != 'VALID', IR_node.get_attr('use_bias'), self.parent_variable_name(IR_node))) def emit_Pool(self, IR_node): input_node = self.IR_graph.get_node( IR_node.in_edges[0]).real_variable_name if IR_node.layer.attr['global_pooling'].b: self.used_layers.add('GlobalPooling') self.add_body( 1, "{:<15} = global_pooling({}, '{}', name = '{}')".format( IR_node.variable_name, input_node, IR_node.get_attr('pooling_type'), IR_node.name)) else: for e in IR_node.get_attr('dilations', []): assert e == 1 dim = len(IR_node.get_attr('kernel_shape')) - 2 padding = not self.is_valid_padding(IR_node.get_attr('auto_pad'), IR_node.get_attr('pads')) padding = [False] + [padding] * dim ceil_out_dim = self.is_ceil_mode(IR_node.get_attr('pads')) pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': pooling_type = cntk.MAX_POOLING elif pooling_type == 'AVG': pooling_type = cntk.AVG_POOLING else: raise ValueError if self.weight_loaded: self.used_layers.add(IR_node.type) self.add_body( 1, "{:<15} = pooling({}, pooling_type={}, pooling_window_shape={}, strides={}, auto_padding={}, ceil_out_dim={})" .format(IR_node.variable_name, input_node, pooling_type, tuple(IR_node.get_attr('kernel_shape')[1:-1]), tuple(IR_node.get_attr('strides')[1:-1]), padding, ceil_out_dim)) else: raise NotImplementedError def emit_UNKNOWN(self, IR_node): print(IR_node.IR_layer.name) def emit_DataInput(self, IR_node): shape_str = self._shapeToStr(IR_node.IR_layer.attr["shape"].shape) dtype_str = ", dtype = {}".format( self.dtype_map[IR_node.layer.attr['dtype']. type]) if 'dtype' in IR_node.layer.attr else "" self.add_body( 1, "{:<15} = cntk.input_variable(({},) {}, name='{}')".format( IR_node.variable_name, shape_str, dtype_str, IR_node.name)) def emit_Dropout(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) if self.phase == 'train': self.add_body( 1, "{:<15} = Dropout({}, name = '{}')({})".format( IR_node.variable_name, 1 - IR_node.get_attr('keep_prob'), IR_node.name, parent.real_variable_name)) else: IR_node.real_name = parent.real_name def emit_FullyConnected(self, IR_node): input_node = self.parent_variable_name(IR_node) if self.weight_loaded: self.used_layers.add(IR_node.type) self.add_body( 1, "{:<15} = dense({}, name = '{}')".format( IR_node.variable_name, input_node, IR_node.name)) else: self.add_body( 1, "{:<15} = Dense({}, bias = {}, name = '{}')({})".format( IR_node.variable_name, IR_node.layer.attr["units"].i, IR_node.layer.attr['use_bias'].b, IR_node.name, input_node)) def emit_Flatten(self, IR_node): self.add_body( 1, "{:<15} = ops.reshape({}, (-1,), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_Reshape(self, IR_node): self.add_body( 1, "{:<15} = cntk.reshape({}, shape={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), tuple(IR_node.get_attr('shape')), IR_node.name)) def _emit_activation(self, IR_node, op_name): self.add_body( 1, "{:<15} = layers.Activation(activation = {}, name = '{}')({})". format(IR_node.variable_name, op_name, IR_node.name, self.parent_variable_name(IR_node))) def emit_Tanh(self, IR_node): self._emit_activation(IR_node, 'ops.tanh') def emit_Relu(self, IR_node): self._emit_activation(IR_node, 'ops.relu') def emit_Softmax(self, IR_node): self._emit_activation(IR_node, 'ops.softmax') def emit_Sigmoid(self, IR_node): self._emit_activation(IR_node, 'ops.sigmoid') def emit_RNNs(self, IR_node, func): assert False def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): if len(IR_node.in_edges) > 1: inputs = ' + '.join( self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) self.add_body(1, "{:<15} = {}".format(IR_node.variable_name, inputs)) def emit_Sub(self, IR_node): if len(IR_node.in_edges) > 1: inputs = ' - '.join( self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) self.add_body(1, "{:<15} = {}".format(IR_node.variable_name, inputs)) def emit_Mul(self, IR_node): if len(IR_node.in_edges) > 1: inputs = ' * '.join( self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) self.add_body(1, "{:<15} = {}".format(IR_node.variable_name, inputs)) def emit_Constant(self, IR_node): self.add_body( 1, "{:<15} = cntk.Constant(value=__weights_dict['{}']['value'])". format(IR_node.variable_name, IR_node.name)) def emit_Concat(self, IR_node): inputs = ', '.join( self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) self.add_body( 1, "{:<15} = cntk.splice({}, axis={}, name='{}')".format( IR_node.variable_name, inputs, IR_node.get_attr('axis') - 1, IR_node.name)) def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) self.add_body( 1, "{:<15} = batch_normalization({}, epsilon={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('epsilon'), IR_node.name)) def emit_Pad(self, IR_node): if IR_node.get_attr('mode') == 'constant': mode = 'mode = ops.CONSTANT_PAD, constant_value = {}'.format( IR_node.get_attr('constant_values', 0.0)) elif IR_node.get_attr('mode') == 'reflect': mode = 'mode = ops.REFLECT_PAD' elif IR_node.get_attr('mode') == 'SYMMETRIC': mode = 'mode = ops.SYMMETRIC_PAD' else: assert False padding = IR_node.get_attr('pads') padding = convert_onnx_pad_to_tf(padding)[1:] self.add_body( 1, "{:<15} = ops.pad({}, pattern={}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), padding, mode)) def emit_Squeeze(self, IR_node): IR_node.real_name = self.IR_graph.get_node( IR_node.in_edges[0]).real_name def emit_Log(self, IR_node): self.add_body( 1, "{:<15} = _cntk.log({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_Exp(self, IR_node): self.add_body( 1, "{:<15} = _cntk.exp({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_Reciprocal(self, IR_node): self.add_body( 1, "{:<15} = _cntk.reciprocal({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_ReduceMean(self, IR_node): self.add_body( 1, "{:<15} = ops.reduce_mean({}, axis = ({}), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join('%s' % (i - 1) for i in IR_node.get_attr('axes')), IR_node.name)) def emit_LRN(self, IR_node): self.used_layers.add(IR_node.type) self.add_body( 1, "{:<15} = lrn({}, k=1, n={}, alpha={}, beta={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['size'].i, IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f, IR_node.name)) def _layer_LRN(self): self.add_body( 0, """ def lrn(input, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = BlockApiSetup.lrn(**kwargs)(input) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer """) def _layer_FullyConnected(self): self.add_body( 0, """ def dense(input, name, **kwargs): w = __weights_dict[name]['weights'] b = __weights_dict[name]['bias'] if 'bias' in __weights_dict[name] else None return BlockApiSetup.linear(output_shape=w.shape[1], input_shape=w.shape[0], scale_init=w, bias_init=b, name=name, **kwargs)(input) """) def _layer_Conv(self): self.add_body( 0, """ def convolution(input, name, **kwargs): dim = __weights_dict[name]['weights'].ndim weight = np.transpose(__weights_dict[name]['weights'], [dim - 1, dim - 2] + list(range(0, dim - 2))) w = cntk.Parameter(init=weight, name=name + '_weight') input = cntk.transpose(input, [dim - 2] + list(range(0, dim - 2))) layer = ops.convolution(w, input, **kwargs) if 'bias' in __weights_dict[name]: bias = np.reshape(__weights_dict[name]['bias'], [-1] + [1] * (dim - 2)) b = cntk.Parameter(init=bias, name=name + '_bias') layer = layer + b layer = cntk.transpose(layer, list(range(1, dim - 1)) + [0]) return layer """) def _layer_Pool(self): self.add_body( 0, """ def pooling(input, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = ops.pooling(input, **kwargs) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer """) def _layer_GlobalPooling(self): self.add_body( 0, """ def global_pooling(input, type, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = layers.GlobalMaxPooling(**kwargs)(input) if type == 'MAX' else layers.GlobalAveragePooling(**kwargs)(input) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer """) def _layer_BatchNorm(self): self.add_body( 0, """ def batch_normalization(input, name, epsilon, **kwargs): mean = cntk.Parameter(init = __weights_dict[name]['mean'], name = name + "_mean") var = cntk.Parameter(init = __weights_dict[name]['var'], name = name + "_var") layer = (input - mean) / cntk.sqrt(var + epsilon) if 'scale' in __weights_dict[name]: scale = cntk.Parameter(init = __weights_dict[name]['scale'], name = name + "_scale") layer = scale * layer if 'bias' in __weights_dict[name]: bias = cntk.Parameter(init = __weights_dict[name]['bias'], name = name + "_bias") layer = layer + bias return layer """)
class TensorflowEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16: "tf.float16", graph_pb2.DT_FLOAT32: "tf.float32", graph_pb2.DT_FLOAT64: "tf.float64", graph_pb2.DT_INT16: "tf.int16", graph_pb2.DT_INT32: "tf.int32", graph_pb2.DT_INT64: "tf.int64", graph_pb2.DT_UINT8: "tf.uint8", graph_pb2.DT_UINT16: "tf.uint16" } @property def header_code(self): return """import tensorflow as tf __weights_dict = dict() is_train = {} def load_weights(weight_file): import numpy as np if weight_file == None: return try: weights_dict = np.load(weight_file, allow_pickle=True).item() except: weights_dict = np.load(weight_file, allow_pickle=True, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): global __weights_dict __weights_dict = load_weights(weight_file) """.format(self.trainable) def __init__(self, model): super(TensorflowEmitter, self).__init__() from six import string_types as _string_types if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) self.IR_graph = IRGraph(network_path) super(TensorflowEmitter, self)._build() folder = Folder(self.IR_graph, self.weights_dict) folder.fold() def gen_code(self, phase): self.trainable = (phase == 'train') self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(current_node) if line != None: self.add_body(1, line) else: print("TensorflowEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body( 1, "return {}, {}".format( ', '.join([ self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.input_layers if self.IR_graph.get_node(name).type != 'Const' and not self.IR_graph.get_node(name).get_attr('feed_weights') ]), ', '.join([ self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers if self.IR_graph.get_node(name).type != 'Pack' and self.IR_graph.get_node(name).type != 'Shape' ]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() self.add_body(0, "") for code in self.layers_codes.values(): self.add_body(0, code) return self.body_code def parent_variable_name(self, IR_node, path=[0]): if not IR_node.in_edges and IR_node.name in self.weights_dict.keys(): return "tf.constant(__weights_dict['{}']['weights'], name='{}')".format( IR_node.name, IR_node.name) return super(TensorflowEmitter, self).parent_variable_name(IR_node, path) @staticmethod def _shapeToStr(shapes): ret = [dim.size if dim.size != -1 else 'None' for dim in shapes.dim] return ', '.join('%s' % i for i in ret) def emit_Conv(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')[1:-1]) input_node, padding = self._defuse_padding(IR_node) data_format = IR_node.get_attr('data_format') code = "{:<15} = convolution({}, group={}, strides=[{}], padding='{}', name='{}')".format( IR_node.variable_name, input_node, IR_node.get_attr('group', 1), strides_str, padding, IR_node.name) return code def _defuse_padding(self, IR_node, extra_str=""): auto_pad = IR_node.get_attr('auto_pad') if auto_pad: input_node = self.parent_variable_name(IR_node) if auto_pad == 'VALID': padding = 'VALID' elif auto_pad.startswith("SAME"): padding = 'SAME' else: raise ValueError("Unknown padding type [{}].".format(auto_pad)) return input_node, padding else: padding = IR_node.get_attr("pads") padding = convert_onnx_pad_to_tf(padding) if not is_valid_padding(padding): input_node = IR_node.variable_name + '_pad' self.add_body( 1, "{:<15} = tf.pad({}, paddings = {}{})".format( input_node, self.parent_variable_name(IR_node), padding, extra_str)) else: input_node = self.parent_variable_name(IR_node) return input_node, 'VALID' def emit_Constant(self, IR_node): if 'dtype' in IR_node.layer.attr: dtype_str = "{}".format( self.dtype_map[IR_node.layer.attr['dtype'].type]) else: dtype_str = "tf.float32" code = "{:<15} = tf.constant({}, dtype={}, name='{}')".format( IR_node.variable_name, "__weights_dict['{}']['value']".format(IR_node.name) if IR_node.get_attr('value') == None else IR_node.get_attr('value'), dtype_str, IR_node.name) return code def emit_Pool(self, IR_node): pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': op = 'max_pool' padding_const = ", constant_values=float('-Inf')" elif pooling_type == 'AVG': op = 'avg_pool' padding_const = "" else: raise ValueError("unknown pooling type [{}].".format(pooling_type)) arrlen = len(IR_node.get_attr('strides')) dim_str = '3d' if arrlen == 5 else "" if IR_node.layer.attr['global_pooling'].b: code = "{:<15} = tf.nn.{}{}({}, [1] + {}.get_shape().as_list()[1:-1] + [1], strides = [1] * {}, padding = 'VALID', name = '{}')".format( IR_node.variable_name, op, dim_str, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node), arrlen, IR_node.name) else: dim = len(IR_node.get_attr("strides")) - 2 dilations = IR_node.get_attr('dilations') if dilations: for e in IR_node.get_attr('dilations'): assert e == 1 pool_size = IR_node.get_attr('kernel_shape')[1:-1] strides = IR_node.get_attr('strides')[1:-1] padding = IR_node.get_attr('pads')[1:dim] if pooling_type == "AVG" and pool_size.count( pool_size[0] ) == len(pool_size) and strides[0] == 1 and strides.count( strides[0]) == len(strides) and padding.count( padding[0]) == len( padding) and pool_size[0] == padding[0] * 2 + 1: kernel_shape_str = ', '.join( '%s' % i for i in IR_node.get_attr('kernel_shape')) strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')) code = "{:<15} = tf.nn.{}{}({}, [{}], [{}], padding='{}', name='{}')".format( IR_node.variable_name, op, dim_str, self.parent_variable_name(IR_node), kernel_shape_str, strides_str, 'SAME', IR_node.name) else: kernel_shape_str = ', '.join( '%s' % i for i in IR_node.get_attr('kernel_shape')) strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')) input_node, padding = self._defuse_padding( IR_node, padding_const) code = "{:<15} = tf.nn.{}{}({}, [{}], [{}], padding='{}', name='{}')".format( IR_node.variable_name, op, dim_str, input_node, kernel_shape_str, strides_str, padding, IR_node.name) return code def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_Add(self, IR_node): code = "{:<15} = {}".format( IR_node.variable_name, ' + '.join('%s' % self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges)))) return code def emit_DataInput(self, IR_node): assert not IR_node.in_edges shape_str = self._shapeToStr(IR_node.layer.attr["shape"].shape) if 'dtype' in IR_node.layer.attr: dtype_str = "{}, ".format( self.dtype_map[IR_node.layer.attr['dtype'].type]) else: dtype_str = "tf.float32," code = "{:<15} = tf.placeholder({} shape = ({}), name = '{}')".format( IR_node.variable_name, dtype_str, shape_str, IR_node.name) return code def emit_Dropout(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) if self.trainable: self.add_body( 1, "{:<15} = Dropout(name = '{}', dropout_rate = {})({})".format( IR_node.variable_name, IR_node.name, 1 - IR_node.IR_layer.attr["keep_prob"].f, parent.real_variable_name)) else: IR_node.real_name = parent.real_name def emit_FullyConnected(self, IR_node): if IR_node.name in self.weights_dict and 'weights' in self.weights_dict[ IR_node.name]: kernel_str = "kernel_initializer = tf.constant_initializer(__weights_dict['{}']['weights']), ".format( IR_node.name) else: kernel_str = "" if IR_node.name in self.weights_dict and 'bias' in self.weights_dict[ IR_node.name]: bias_str = "bias_initializer = tf.constant_initializer(__weights_dict['{}']['bias']), ".format( IR_node.name) else: bias_str = "" # check whether flatten operator should be added parent = self.IR_graph.get_parent(IR_node.name, [0]) parent_shape = shape_to_list(parent.get_attr('_output_shapes')[0]) if len(parent_shape) > 2: # flatten is needed self.add_body( 1, "{:<15} = tf.contrib.layers.flatten({})".format( IR_node.variable_name + '_flatten', self.parent_variable_name(IR_node))) code = "{:<15} = tf.layers.dense({}, {}, {}{}use_bias = {})".format( IR_node.variable_name, IR_node.variable_name + '_flatten', IR_node.layer.attr['units'].i, kernel_str, bias_str, IR_node.layer.attr['use_bias'].b) return code else: code = "{:<15} = tf.layers.dense({}, {}, {}{}use_bias = {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['units'].i, kernel_str, bias_str, IR_node.layer.attr['use_bias'].b) return code def emit_UpSampling2D(self, IR_node): scales = IR_node.get_attr('scales') scales = tuple(scales) code = "{:<15} = tf.keras.layers.UpSampling2D(size={})({})".format( IR_node.variable_name, scales, self.parent_variable_name(IR_node)) return code def emit_Flatten(self, IR_node): #self._emit_unary_operation(IR_node, "contrib.layers.flatten") code = "{:<15} = tf.contrib.layers.flatten({})".format( IR_node.variable_name, self.parent_variable_name(IR_node)) return code def emit_Mul(self, IR_node): code = "{:<15} = {}".format( IR_node.variable_name, ' * '.join('%s' % self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges)))) return code def emit_Const(self, IR_node): if 'dtype' in IR_node.layer.attr: dtype_str = "dtype={}".format( self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'int' in dtype_str: code = "{:<15} = tf.constant({}, {}, shape=(1,))".format( IR_node.variable_name, IR_node.layer.attr['value'].i, dtype_str) else: code = "{:<15} = tf.constant({}, {}, shape=(1,))".format( IR_node.variable_name, IR_node.layer.attr['value'].f, dtype_str) else: dtype_str = "dtype=tf.float32" code = "{:<15} = tf.constant({}, {}, shape=(1,))".format( IR_node.variable_name, IR_node.layer.attr['value'].f, dtype_str) return code def emit_Transpose(self, IR_node): code = "{:<15} = tf.transpose(a = {}, perm = {})".format( IR_node.variable_name, self.parent_variable_name(IR_node, [0]), self.parent_variable_name(IR_node, [1])) return code def emit_Gather(self, IR_node): variable_str = "tf.convert_to_tensor(__weights_dict['{}']['weights'])".format( IR_node.name) code = "{:<15} = tf.gather(params = {}, indices = {}, axis = {})".format( IR_node.variable_name, variable_str, self.parent_variable_name(IR_node), IR_node.get_attr('axis')) return code def emit_Unstack(self, IR_node): code = "{:<15} = tf.unstack(value={}, num={}, axis={})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('num'), IR_node.get_attr('axis')) return code def emit_Reshape(self, IR_node): code = "{:<15} = tf.reshape({}, [{}], '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join('%s' % i for i in IR_node.get_attr('shape')), IR_node.name) return code def emit_Sub(self, IR_node): code = "{:<15} = {}".format( IR_node.variable_name, ' - '.join('%s' % self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges)))) return code def emit_Div(self, IR_node): code = "{:<15} = tf.div({}, {}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1]), IR_node.name) return code def _emit_unary_operation(self, IR_node, op_name): code = "{:<15} = tf.{}({}, name = '{}')".format( IR_node.variable_name, op_name, self.parent_variable_name(IR_node), IR_node.name) return code def emit_Tanh(self, IR_node): code = self._emit_unary_operation(IR_node, 'tanh') return code def emit_Elu(self, IR_node): return self._emit_unary_operation(IR_node, 'nn.elu') def emit_Relu(self, IR_node): return self._emit_unary_operation(IR_node, 'nn.relu') def emit_Relu6(self, IR_node): return self._emit_unary_operation(IR_node, 'nn.relu6') def emit_CRelu(self, IR_node): return self._emit_unary_operation(IR_node, 'nn.crelu') def emit_PRelu(self, IR_node): self.used_layers.add(IR_node.type) code = "{:<15} = prelu({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name) return code def emit_LeakyRelu(self, IR_node): self.add_body( 1, "{:<15} = tf.nn.leaky_relu({}, alpha={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('alpha'), IR_node.name)) def emit_Softmax(self, IR_node): return self._emit_unary_operation(IR_node, 'nn.softmax') def emit_Sigmoid(self, IR_node): code = self._emit_unary_operation(IR_node, 'sigmoid') return code def emit_Embedding(self, IR_node): variable_str = "tf.convert_to_tensor(__weights_dict['{}']['weights'])".format( IR_node.name) code = "{:<15} = tf.nn.embedding_lookup(params = {}, ids = {})".format( IR_node.variable_name, variable_str, self.parent_variable_name(IR_node)) return code def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Concat(self, IR_node): code = "{:<15} = tf.concat([{}], {}, name = '{}')".format( IR_node.variable_name, ', '.join( self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges))), IR_node.layer.attr['axis'].i, IR_node.name) return code def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) code = "{:<15} = batch_normalization({}, variance_epsilon={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('epsilon'), IR_node.name) return code def emit_Scale(self, IR_node): self.used_layers.add(IR_node.type) code = "{:<15} = scale({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name) return code def emit_Pad(self, IR_node): padding = IR_node.get_attr('pads') padding = convert_onnx_pad_to_tf(padding) mode = IR_node.get_attr('mode', 'constant') mode = mode.lower() if mode == 'constant' or mode == 'reflect': mode = mode.upper() elif mode == 'edge': mode = 'SYMMETRIC' else: raise NotImplementedError( "Not support padding mode {}.".format(mode)) code = "{:<15} = tf.pad({}, {}, '{}', name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), padding, mode, IR_node.variable_name) return code def emit_Squeeze(self, IR_node): code = "{:<15} = tf.squeeze({}, [{}], name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join('%s' % axis for axis in IR_node.layer.attr['axes'].list.i), IR_node.name) return code def emit_ReduceMean(self, IR_node): code = "{:<15} = tf.reduce_mean({}, [{}], {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ','.join('%s' % i for i in IR_node.get_attr('axes')), IR_node.get_attr('keepdims'), IR_node.name) return code def emit_LRN(self, IR_node): code = "{:<15} = tf.nn.lrn({}, depth_radius={}, bias={}, alpha={}, beta={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('size') - 1, IR_node.get_attr('bias', 1), IR_node.get_attr('alpha') / (IR_node.get_attr('size') * 2 - 1), IR_node.get_attr('beta'), IR_node.name) return code def emit_SeparableConv(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')) input_node, padding = self._defuse_padding(IR_node) code = "{:<15} = separable_convolution({}, strides = [{}], padding = '{}', name = '{}')".format( IR_node.variable_name, input_node, strides_str, padding, IR_node.name) return code def emit_DepthwiseConv(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i) input_node, padding = self._defuse_padding(IR_node) code = "{:<15} = depthwise_convolution({}, strides = [{}], padding = '{}', name = '{}')".format( IR_node.variable_name, input_node, strides_str, padding, IR_node.name) return code def emit_Crop(self, IR_node): border = IR_node.get_attr('border') assert len(border) == 4 output_shape = IR_node.get_attr('_output_shapes')[0] output_shape = shape_to_list(output_shape) code = "{:<15} = tf.image.crop_to_bounding_box({}, offset_height={}, offset_width={}, target_height={}, target_width={})".format( IR_node.variable_name, self.parent_variable_name(IR_node), border[0], border[1], output_shape[1], output_shape[2]) return code def emit_ConvTranspose(self, IR_node): self.used_layers.add(IR_node.type) output_shape = [1] + shape_to_list( IR_node.get_attr('_output_shapes')[0])[1:] input_node, padding = self._defuse_padding(IR_node) code = "{:<15} = convolution_transpose({}, output_shape={}, strides={}, padding='{}', name='{}')".format( IR_node.variable_name, input_node, output_shape, IR_node.get_attr('strides'), padding, IR_node.name) return code def emit_Slice(self, IR_node): extra_str = "" if IR_node.get_attr('begin_mask'): extra_str += ", begin_mask={}".format( IR_node.get_attr('begin_mask')) if IR_node.get_attr('end_mask') != None: extra_str += ", end_mask={}".format(IR_node.get_attr('end_mask')) if IR_node.get_attr('shrink_axis_mask') != None: extra_str += ", shrink_axis_mask={}".format( IR_node.get_attr('shrink_axis_mask')) if IR_node.get_attr('new_axis_mask') != None: extra_str += ", new_axis_mask={}".format( IR_node.get_attr('new_axis_mask')) if IR_node.get_attr('starts') != None: starts = IR_node.get_attr('starts') else: starts = self.parent_variable_name(IR_node, [1]) if IR_node.get_attr('ends') != None: ends = IR_node.get_attr('ends') else: ends = self.parent_variable_name(IR_node, [2]) if IR_node.get_attr('strides') != None: strides = IR_node.get_attr('strides') else: strides = self.parent_variable_name(IR_node, [3]) code = "{:<15} = tf.strided_slice({}, {}, {}, {} {}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), starts, ends, strides, extra_str, IR_node.name) return code def emit_Shape(self, IR_node): code = "{:<15} = tf.shape({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name) return code def emit_Pack(self, IR_node): code = "{:<15} = tf.stack({}, axis={}, name='{}')".format( IR_node.variable_name, '[' + ','.join('%s' % self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges))) + ']', IR_node.get_attr('axis'), IR_node.name) return code def emit_Split(self, IR_node): code = "{:<15} = tf.split({}, {}, {}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('split'), IR_node.get_attr('axis'), IR_node.name) return code def emit_Unsqueeze(self, IR_node): code = "{:<15} = tf.expand_dims({}, axis={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('axes')[0], IR_node.name) return code def emit_Fill(self, IR_node): code = "{:<15} = tf.fill({}, {}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('value'), IR_node.name) return code def emit_Maxmum(self, IR_node): code = "{:<15} = tf.maxmum({}, {}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1]), IR_node.name) return code def emit_Minimum(self, IR_node): code = "{:<15} = tf.minimum({}, {}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1]), IR_node.name) return code def emit_Scope(self, IR_node): input_vars = [ self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges)) ] input_vars.append('__weights_dict') code = "{:<15} = _{}({})".format(IR_node.real_variable_name, IR_node.pattern, ', '.join(input_vars)) self._gen_scope_code(IR_node) return code def _gen_scope_code(self, scope_node): def _scope_func(scope_name, params, code, return_var): code = """ def _{}({}): {} return {} """.format(scope_name, params, code, ', '.join(return_var)) return code if not self.layers_codes.get(scope_node.pattern, None): body_code = str() for node_name in scope_node.topology_list: node = self.IR_graph.get_node(node_name) node_type = node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(node) if line != None: body_code += " " + line + '\n' else: print( "TensorflowEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(node) # param_code does not need parameter slice. input_params = scope_node.input_params input_params.append("__weights_dict") param_code = ', '.join(input_params) function_code = _scope_func(scope_node.pattern, param_code, body_code, scope_node.return_variables) self.layers_codes[scope_node.pattern] = function_code def _layer_Conv(self): self.add_body( 0, """ def convolution(input, name, group, **kwargs): w = tf.Variable(__weights_dict[name]['weights'], trainable=is_train, name=name + "_weight") if group == 1: layer = tf.nn.convolution(input, w, name=name, **kwargs) else: weight_groups = tf.split(w, num_or_size_splits=group, axis=-1) xs = tf.split(input, num_or_size_splits=group, axis=-1) convolved = [tf.nn.convolution(x, weight, name=name, **kwargs) for (x, weight) in zip(xs, weight_groups)] layer = tf.concat(convolved, axis=-1) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable=is_train, name=name + "_bias") layer = layer + b return layer""") def _layer_PRelu(self): self.add_body( 0, """ def prelu(input, name): gamma = tf.Variable(__weights_dict[name]['gamma'], name=name + "_gamma", trainable=is_train) return tf.maximum(0.0, input) + gamma * tf.minimum(0.0, input) """) def _layer_BatchNorm(self): self.add_body( 0, """ def batch_normalization(input, name, **kwargs): mean = tf.Variable(__weights_dict[name]['mean'], name = name + "_mean", trainable = is_train) variance = tf.Variable(__weights_dict[name]['var'], name = name + "_var", trainable = is_train) offset = tf.Variable(__weights_dict[name]['bias'], name = name + "_bias", trainable = is_train) if 'bias' in __weights_dict[name] else None scale = tf.Variable(__weights_dict[name]['scale'], name = name + "_scale", trainable = is_train) if 'scale' in __weights_dict[name] else None return tf.nn.batch_normalization(input, mean, variance, offset, scale, name = name, **kwargs) """) def _layer_Scale(self): self.add_body( 0, """ def scale(input, name, **kwargs): mean = tf.Variable(__weights_dict[name]['scale_mean'], name = name + "_mean", trainable = is_train) variance = tf.Variable(__weights_dict[name]['scale_var'], name = name + "_var", trainable = is_train) offset = tf.Variable(__weights_dict[name]['bias'], name = name + "_bias", trainable = is_train) if 'bias' in __weights_dict[name] else None scale = tf.Variable(__weights_dict[name]['scale'], name = name + "_scale", trainable = is_train) if 'scale' in __weights_dict[name] else None return tf.nn.batch_normalization(input, mean, variance, offset, scale, variance_epsilon = 0, name = name) """) def _layer_SeparableConv(self): self.add_body( 0, """ def separable_convolution(input, name, **kwargs): depthwise = tf.Variable(__weights_dict[name]['depthwise_filter'], trainable = is_train, name = name + "_df") pointwise = tf.Variable(__weights_dict[name]['pointwise_filter'], trainable = is_train, name = name + "_pf") layer = tf.nn.separable_conv2d(input, depthwise, pointwise, **kwargs) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias") layer = layer + b return layer""") def _layer_DepthwiseConv(self): self.add_body( 0, """ def depthwise_convolution(input, name, **kwargs): depthwise = tf.Variable(__weights_dict[name]['weights'], trainable = is_train, name = name + "_df") layer = tf.nn.depthwise_conv2d(input, depthwise, **kwargs) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias") layer = layer + b return layer""") def _layer_ConvTranspose(self): self.add_body( 0, """ def convolution_transpose(input, name, **kwargs): w = tf.Variable(__weights_dict[name]['weights'], trainable=is_train, name=name + "_weight") dim = __weights_dict[name]['weights'].ndim - 2 if dim == 2: layer = tf.nn.conv2d_transpose(input, w, **kwargs) elif dim == 3: layer = tf.nn.conv3d_transpose(input, w, **kwargs) else: raise ValueError("Error dim number {} in ConvTranspose".format(dim)) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable=is_train, name=name + "_bias") layer = layer + b return layer""")
class CoreMLEmitter(Emitter): def __init__(self, architecture, weight): super(CoreMLEmitter, self).__init__() if os.path.exists(architecture) == False: raise ValueError("IR architecture file [{}] is not found.".format(architecture)) else: self.IR_graph = IRGraph(architecture) self.IR_graph.build() if os.path.exists(weight) == False: raise ValueError("IR weight file [{}] is not found.".format(weight)) else: self._load_weights(weight) def _get_inout(self): input_features = [] output_features = [] for input_node in self.IR_graph.input_layers: shape = shape_to_list(self.IR_graph.get_node(input_node).get_attr('shape')) shape = _infer_coreml_input_shape(shape) input_features.append((str(input_node), shape)) print("CoreML Model Input Layer: [{}] {}".format(input_node, shape)) for output_node in self.IR_graph.output_layers: node = self.IR_graph.get_node(output_node) node.out_edges.append(node.name) shape = node.get_attr('_output_shapes') if shape: shape = shape_to_list(shape[0]) else: shape = [1] if shape == []: pre_output_node = self.IR_graph.get_node(node.in_edges[0]) pre_output_node.out_edges.append(pre_output_node.name) shape = pre_output_node.get_attr('_output_shapes') shape = shape_to_list(shape[0]) # else: shape = _infer_coreml_input_shape(shape) output_features.append((str(node.in_edges[0]), shape)) print("CoreML Model Output Layer: [{}] {}".format(output_node, shape)) return list(input_features), list(output_features) def _connect_coreml_layers(self): for layer in self.builder.nn_spec.layers: # for i, in_node in enumerate(layer.input): # layer.input[i] = self.IR_graph.get_node(in_node).real_name for i, out_node in enumerate(layer.output): layer.output[i] = self.IR_graph.get_node(out_node).real_name def gen_model(self, input_names=None, output_names=None, image_input_names=None, is_bgr=False, red_bias=0.0, green_bias=0.0, blue_bias=0.0, gray_bias=0.0, image_scale=1.0, class_labels=None, predicted_feature_name=None, predicted_probabilities_output=''): input_features, output_features = self._get_inout() # assert False is_classifier = class_labels is not None mode = 'classifier' if is_classifier else None self.builder = _NeuralNetworkBuilder(input_features, output_features, mode=mode) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) print("Converting layer {}({})".format(current_node.name, current_node.type)) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("CoreMLEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) assert False # self._connect_coreml_layers() # Add classifier classes (if applicable) if is_classifier: classes_in = class_labels if isinstance(classes_in, _string_types): if not os.path.isfile(classes_in): raise ValueError("Path to class labels [{}] does not exist.".format(classes_in)) with open(classes_in, 'r') as f: classes = f.read() classes = classes.splitlines() elif type(classes_in) is list: # list[int or str] classes = classes_in else: raise ValueError('Class labels must be a list of integers / strings, or a file path') if predicted_feature_name is not None: self.builder.set_class_labels(classes, predicted_feature_name = predicted_feature_name, prediction_blob = predicted_probabilities_output) else: self.builder.set_class_labels(classes) # Set pre-processing paramsters self.builder.set_pre_processing_parameters( image_input_names=[input_features[0][0]], #image_input_names, is_bgr=is_bgr, red_bias=red_bias, green_bias=green_bias, blue_bias=blue_bias, gray_bias=gray_bias, image_scale=image_scale) # Return the protobuf spec # model = _MLModel(self.builder.spec) print (self.builder.spec.description) return self.builder.spec, input_features, output_features @staticmethod def _get_padding(IR_node): auto_pads = IR_node.get_attr('auto_pads') if auto_pads is not None: if auto_pads == 'VALID': return auto_pads else: return 'SAME' pads = IR_node.get_attr('pads') if is_valid_padding(pads): return 'VALID' else: return 'SAME' def _emit_merge(self, IR_node, func): """ Convert concat layer to coreml. """ # Get input and output names input_names = [self.IR_graph.get_node(inp).real_name for inp in IR_node.in_edges] self.builder.add_elementwise(name=IR_node.name, input_names=input_names, output_name=IR_node.name, mode=func) def emit_Conv(self, IR_node): """ Convert convolution layer to coreml. """ has_bias = IR_node.get_attr('use_bias', False) is_deconv = False # TODO: Deconv # Get the weights. output_channels = IR_node.get_attr('kernel_shape')[-1] # Dimensions and weights if is_deconv: raise NotImplementedError() height, width, n_filters, channels = weightList[0].shape W = weightList[0].transpose([0,1,3,2]) output_shape = output_blob_shape[:-1] else: W = self.weights_dict[IR_node.name]['weights'] height, width, channels, n_filters = W.shape output_shape = None b = self.weights_dict[IR_node.name]['bias'] if has_bias else None stride_height, stride_width = IR_node.get_attr('strides')[1], IR_node.get_attr('strides')[2] # Dilations dilations = IR_node.get_attr('dilations', [1, 1]) if is_deconv and not dilations == [1, 1]: raise ValueError("Unsupported non-unity dilation for Deconvolution layer") groups = IR_node.get_attr('groups', 1) kernel_channels = channels padding = self._get_padding(IR_node).lower() input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name # print(self.IR_graph.get_parent(IR_node.name, [0]).layer) # print(input_name) # print(IR_node.real_name) self.builder.add_convolution(name=IR_node.real_name, kernel_channels=kernel_channels, output_channels=output_channels, height=height, width=width, stride_height=stride_height, stride_width=stride_width, border_mode=padding, groups=groups, W=W, b=b, has_bias=has_bias, is_deconv=is_deconv, output_shape=output_shape, input_name=input_name, output_name=IR_node.real_name, dilation_factors=dilations) def emit_DepthwiseConv(self, IR_node): # depth-wise convolution input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name kernel_channels = 1 is_deconv = False has_bias = IR_node.get_attr('use_bias', False) depth_multiplier = IR_node.get_attr('kernel_shape')[-1] W = self.weights_dict[IR_node.name]['weights'] height, width, channels, n_filters = W.shape output_shape = None W = np.reshape(W,(height, width,1,channels * depth_multiplier)) b = self.weights_dict[IR_node.name]['bias'] if has_bias else None # Dilations dilations = IR_node.get_attr('dilations', [1, 1]) padding = self._get_padding(IR_node).lower() output_channels = W.shape[-1] groups = W.shape[-1] stride_height, stride_width = IR_node.get_attr('strides')[1], IR_node.get_attr('strides')[2] self.builder.add_convolution(name=IR_node.real_name, kernel_channels=kernel_channels, output_channels=output_channels, height=height, width=width, stride_height=stride_height, stride_width=stride_width, border_mode=padding, groups=groups, W=W, b=b, has_bias=has_bias, is_deconv=is_deconv, output_shape=output_shape, input_name=input_name, output_name=IR_node.real_name, dilation_factors=dilations) def emit_Pool(self, IR_node): """ Convert pooling layer to coreml. """ # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name # Pooling layer type pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': layer_type_str = 'MAX' elif pooling_type == 'AVG': layer_type_str = 'AVERAGE' else: raise TypeError("Pooling type %s not supported" % pooling_type) # if it's global, set the global flag global_pooling = IR_node.get_attr('global_pooling', False) dim = len(IR_node.get_attr('strides')) - 2 if global_pooling: if dim == 2: height, width = (0, 0) stride_height = stride_width = 0 padding_type = 'VALID' elif dim == 1: raise NotImplementedError() global_pooling = False _, width, channels = keras_layer.input_shape height = 1 stride_height, stride_width = height, width padding_type = 'VALID' else: raise NotImplementedError() else: height, width = tuple(IR_node.get_attr('kernel_shape')[1:-1]) stride_height, stride_width = tuple(IR_node.get_attr('strides')[1:-1]) # Padding padding_type = self._get_padding(IR_node) self.builder.add_pooling(name=IR_node.name, height=height, width=width, stride_height=stride_height, stride_width=stride_width, layer_type=layer_type_str, padding_type=padding_type, input_name=input_name, output_name=IR_node.name, exclude_pad_area=True, is_global=global_pooling) def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_Crop(self, IR_node): input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name output_name=IR_node.real_name is_1d = False border = IR_node.get_attr('border') if is_1d: raise ValueError("Unrecognized padding option: %s" % (str(border))) else: if type(border) is int: top = left = bottom = right = border elif type(border) is list: top, left = border[1], border [0] bottom, right = border[2], border [3] else: raise ValueError("Unrecognized padding option: %s" % (str(border))) # Now add the layer self.builder.add_crop(name = IR_node.name, left = left, right=right, top=top, bottom=bottom, offset = [0,0], input_names = [input_name], output_name=output_name ) # assert False def emit_DataInput(self, IR_node): """ Layers that can be skipped. """ return def emit_Dropout(self, IR_node): """ Layers that can be skipped (because they are train time only. """ IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name def emit_FullyConnected(self, IR_node): """ Convert a dense layer to coreml. """ # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name output_name = IR_node.out_edges[0] has_bias = IR_node.get_attr('use_bias') # Get the weights from keras W = self.weights_dict[IR_node.name]['weights'].T Wb = self.weights_dict[IR_node.name]['bias'].T if has_bias else None output_channels, input_channels = W.shape self.builder.add_inner_product(name=IR_node.name, W=W, b=Wb, input_channels=input_channels, output_channels=output_channels, has_bias=has_bias, input_name=input_name, output_name=IR_node.name) def emit_Flatten(self, IR_node): """ Convert a flatten layer from keras to coreml. """ # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name output_name = IR_node.out_edges[0] """ # blob_order == 0 if the input blob needs not be rearranged # blob_order == 1 if the input blob needs to be rearranged blob_order = 0 # using keras_layer.input.shape have a "?" (Dimension[None] at the front), # making a 3D tensor with unknown batch size 4D if len(keras_layer.input.shape) == 4: blob_order = 1 """ self.builder.add_flatten(name=IR_node.name, mode=1, input_name=input_name, output_name=IR_node.name) def emit_Reshape(self, IR_node): def ShapetrToTuple(string, batch_none = False): if batch_none == True: ls = [int(item) for item in string.split(', ')] ls.insert(0,None) return tuple(ls) else: ls = [int(item) for item in string.split(', ')] return tuple(ls) last_node = self.IR_graph.get_node(IR_node.in_edges[0]).layer input_shape_dims = last_node.attr["_output_shapes"].list.shape target_shape_dims = IR_node.IR_layer.attr["_output_shapes"].list.shape input_shape = ShapetrToTuple(IRGraph.shapeToStr(input_shape_dims[0]),True) target_shape = ShapetrToTuple(IRGraph.shapeToStr(target_shape_dims[0])) def get_coreml_target_shape(target_shape): if len(target_shape) == 1: #(D,) coreml_shape = (1,target_shape[0],1,1) elif len(target_shape) == 2: #(S,D) coreml_shape = target_shape + (1,1) elif len(target_shape) == 3: #(H,W,C) coreml_shape = (1, target_shape[2], target_shape[0], target_shape[1]) else: coreml_shape = None return coreml_shape def get_mode(input_shape, target_shape): in_shape = input_shape[1:] if len(in_shape) == 3 or len(target_shape) == 3: return 1 else: return 0 input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name new_shape = get_coreml_target_shape(target_shape) mode = get_mode(input_shape, target_shape) self.builder.add_reshape( name=IR_node.real_name, input_name=input_name, output_name=IR_node.real_name, target_shape=new_shape, mode=mode) def emit_Tanh(self, IR_node): assert False code = "{:<15} = Activation(name = '{}', activation = tanh)({})".format( IR_node.replace_scope(IR_node.name), IR_node.name, IR_node.replace_scope(IR_node.in_edges[0])) return code def _emit_activation(self, IR_node, act, params=None): # Get input and output names input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name output_name = IR_node.real_name self.builder.add_activation(name=IR_node.real_name, non_linearity=act, input_name=input_name, output_name=output_name, params=params) def emit_Relu(self, IR_node): self._emit_activation(IR_node, 'RELU') def emit_PRelu(self, IR_node): self._emit_activation(IR_node, 'PRELU', self.weights_dict[IR_node.name]['gamma']) def emit_Softmax(self, IR_node): # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name output_name = IR_node.out_edges[0] self.builder.add_softmax(name=IR_node.name, input_name=input_name, output_name=IR_node.name) def emit_Sigmoid(self, IR_node): assert False code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format( IR_node.replace_scope(IR_node.name), IR_node.name, IR_node.replace_scope(IR_node.in_edges[0])) return code def emit_Relu6(self, IR_node): # print(IR_node.name) layer = IR_node.real_name input_name, output_name = (IR_node.IR_layer.input[0], IR_node.IR_layer.name) # input_name = relu_output_name = output_name + '_relu' self.builder.add_activation(layer, 'RELU', input_name, relu_output_name) # negate it neg_output_name = relu_output_name + '_neg' self.builder.add_activation(layer+'__neg__', 'LINEAR', relu_output_name, neg_output_name,[-1.0, 0]) # apply threshold clip_output_name = relu_output_name + '_clip' self.builder.add_unary(layer+'__clip__', neg_output_name, clip_output_name, 'threshold', alpha = -6.0) # negate it back self.builder.add_activation( layer + '_neg2', 'LINEAR', clip_output_name, output_name, [-1.0, 0]) def emit_Gather(self, IR_node): raise NotImplementedError() W = self.weights_dict[IR_node.name]['weights'] if W.ndim == 2: vocab_size = W.shape[0] output_channels = W.shape[1] builder.add_embedding( name=IR_node.real_name, W = W, b = None, input_dim = vocab_size, output_channels = output_channels, has_bias=False, input_name=input_name, output_name=IR_node.real_name) else: raise NotImplementedError() def emit_RNNs(self, IR_node, func): assert False # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): self._emit_merge(IR_node, 'ADD') def emit_Concat(self, IR_node): self._emit_merge(IR_node, "CONCAT") def emit_BatchNorm(self, IR_node): """ Convert a Batch Normalization layer. """ # Get input and output names input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name # print(input_name) # print(IR_node.real_name) axis = IR_node.get_attr('axis', -1) nb_channels = IR_node.get_attr('_output_shapes')[0].dim[axis].size # Set parameters # Parameter arrangement in Keras: gamma, beta, mean, variance weights = self.weights_dict[IR_node.name] mean = weights['mean'] std = weights['var'] gamma = weights.get('scale', np.ones(mean.shape)) beta = weights.get('bias', np.zeros(mean.shape)) # compute adjusted parameters variance = std * std f = 1.0 / np.sqrt(std + IR_node.get_attr('epsilon')) gamma1 = gamma*f beta1 = beta - gamma*mean*f mean[:] = 0.0 #mean variance[:] = 1.0 - .00001 #stddev self.builder.add_batchnorm( name=IR_node.real_name, channels = nb_channels, gamma = gamma1, beta = beta1, mean = mean, variance = variance, input_name = input_name, output_name=IR_node.real_name) # assert False def emit_Pad(self, IR_node): input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name output_name=IR_node.real_name is_1d = False padding = IR_node.get_attr('pads') if is_1d: raise ValueError("Unrecognized padding option: %s" % (str(padding))) else: if type(padding) is int: top = left = bottom = right = padding elif type(padding) is list: top, left = padding[1], padding [2] bottom, right = padding[5], padding [6] else: raise ValueError("Unrecognized padding option: %s" % (str(padding))) # Now add the layer self.builder.add_padding(name = IR_node.name, left = left, right=right, top=top, bottom=bottom, value = 0, input_name = input_name, output_name=output_name ) def emit_Squeeze(self, IR_node): self.emit_Flatten(IR_node) # if IR_node.name != "MMdnn_Output" : # self.emit_Flatten(IR_node) # self.emit_Reshape(IR_node) def emit_SeparableConv(self, IR_node): input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name output_name = output_name=IR_node.real_name assert len(IR_node.get_attr("strides")) == 4 strides = IR_node.get_attr('strides') stride_height, stride_width = (strides[1], strides[2]) # Get the weights W0 = self.weights_dict[IR_node.name]['depthwise_filter'] W1 = self.weights_dict[IR_node.name]['pointwise_filter'] padding = IR_node.get_attr('auto_pad').split('_')[0].lower() has_bias = IR_node.get_attr('use_bias') b = self.weights_dict[IR_node.name]['bias'] if has_bias else None output_blob_shape = IR_node.get_attr('_output_shapes') shape = shape_to_list(output_blob_shape[0]) output_channels = shape[-1] height, width, input_channels, depth_mult = W0.shape W0 = np.reshape(W0, (height, width, 1, input_channels * depth_mult)) intermediate_name = input_name + '_intermin_' self.builder.add_convolution(name = IR_node.name + '_step_1', kernel_channels = 1, output_channels = input_channels * depth_mult, height = height, width = width, stride_height = stride_height, stride_width = stride_width, border_mode = padding, groups = input_channels, W = W0, b = None, has_bias = False, is_deconv = False, output_shape = None, input_name = input_name, output_name = intermediate_name, dilation_factors = [1,1]) self.builder.add_convolution(name = IR_node.name + '_step_2', kernel_channels = input_channels * depth_mult, output_channels = output_channels, height = 1, width = 1, stride_height = 1, stride_width = 1, border_mode = padding, groups = 1, W = W1, b = b, has_bias = has_bias, is_deconv = False, output_shape = None, input_name = intermediate_name, output_name = output_name, dilation_factors = [1,1])
class TensorflowEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16 : "tf.float16", graph_pb2.DT_FLOAT32 : "tf.float32", graph_pb2.DT_FLOAT64 : "tf.float64", graph_pb2.DT_INT16 : "tf.int16", graph_pb2.DT_INT32 : "tf.int32", graph_pb2.DT_INT64 : "tf.int64", graph_pb2.DT_UINT8 : "tf.uint8", graph_pb2.DT_UINT16 : "tf.uint16" } @property def header_code(self): return """import tensorflow as tf __weights_dict = dict() is_train = {} def load_weights(weight_file): import numpy as np if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): global __weights_dict __weights_dict = load_weights(weight_file) """.format(self.trainable) def __init__(self, model): super(TensorflowEmitter, self).__init__() from six import string_types as _string_types if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) self.IR_graph = IRGraph(network_path) super(TensorflowEmitter, self)._build() def gen_codes(self, phase): self.trainable = (phase == 'train') self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("TensorflowEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body(1, "return {}, {}\n".format( ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.input_layers]), ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.body_codes @staticmethod def _shapeToStr(shapes): ret = [dim.size if dim.size != -1 else 'None' for dim in shapes.dim] return ', '.join('%s' % i for i in ret) def emit_Convolution(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i[1:-1]) code = "{:<15} = convolution({}, strides = [{}], padding = '{}', name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), strides_str, IR_node.layer.attr['padding'].s.decode('utf-8'), IR_node.name) self.add_body(1, code) def emit_Pool(self, IR_node): op = 'max_pool' if IR_node.layer.attr['pooling_type'].s == b'MAX' else 'avg_pool' arrlen = len(IR_node.layer.attr['strides'].list.i) dim_str = '3d' if arrlen == 5 else "" if IR_node.layer.attr['global_pooling'].b: self.add_body(1, "{:<15} = tf.nn.{}{}({}, [1] + {}.get_shape().as_list()[1:-1] + [1], strides = [1] * {}, padding = 'VALID', name = '{}')".format( IR_node.variable_name, op, dim_str, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node), arrlen, IR_node.name)) else: kernel_shape_str = ', '.join('%s' % i for i in IR_node.layer.attr['window_shape'].list.i) strides_str = ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i) self.add_body(1, "{:<15} = tf.nn.{}{}({}, [{}], [{}], padding = '{}', name = '{}')".format( IR_node.variable_name, op, dim_str, self.parent_variable_name(IR_node), kernel_shape_str, strides_str, IR_node.layer.attr['padding'].s.decode('utf-8'), IR_node.name)) def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_DataInput(self, IR_node): assert not IR_node.in_edges shape_str = self._shapeToStr(IR_node.layer.attr["shape"].shape) if 'dtype' in IR_node.layer.attr: dtype_str = "{}, ".format(self.dtype_map[IR_node.layer.attr['dtype'].type]) else: dtype_str = "tf.float32," code = "{:<15} = tf.placeholder({} shape = ({}), name = '{}')".format( IR_node.variable_name, dtype_str, shape_str, IR_node.name ) self.add_body(1, code) def emit_Dropout(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) if self.trainable: self.add_body(1, "{:<15} = Dropout(name = '{}', dropout_rate = {})({})".format( IR_node.variable_name, IR_node.name, 1 - IR_node.IR_layer.attr["keep_prob"].f, parent.real_variable_name)) else: IR_node.real_name = parent.real_name def emit_FullyConnected(self, IR_node): if IR_node.name in self.weights_dict and 'weights' in self.weights_dict[IR_node.name]: kernel_str = "kernel_initializer = tf.constant_initializer(__weights_dict['{}']['weights']), ".format(IR_node.name) else: kernel_str = "" if IR_node.name in self.weights_dict and 'bias' in self.weights_dict[IR_node.name]: bias_str = "bias_initializer = tf.constant_initializer(__weights_dict['{}']['bias']), ".format(IR_node.name) else: bias_str = "" code = "{:<15} = tf.layers.dense({}, {}, {}{}use_bias = {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['units'].i, kernel_str, bias_str, IR_node.layer.attr['use_bias'].b) self.add_body(1, code) def emit_Flatten(self, IR_node): #self._emit_unary_operation(IR_node, "contrib.layers.flatten") self.add_body(1, "{:<15} = tf.contrib.layers.flatten({})".format( IR_node.variable_name, self.parent_variable_name(IR_node))) def emit_Reshape(self, IR_node): self.add_body(1, "{:<15} = tf.reshape({}, [{}], '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join('%s' % i for i in IR_node.layer.attr["shape"].list.i), IR_node.name)) def _emit_unary_operation(self, IR_node, op_name): self.add_body(1, "{:<15} = tf.{}({}, name = '{}')".format( IR_node.variable_name, op_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_Tanh(self, IR_node): self._emit_unary_operation(IR_node, 'tanh') def emit_Elu(self, IR_node): self._emit_unary_operation(IR_node, 'nn.elu') def emit_Relu(self, IR_node): self._emit_unary_operation(IR_node, 'nn.relu') def emit_Relu6(self, IR_node): self._emit_unary_operation(IR_node, 'nn.relu6') def emit_CRelu(self, IR_node): self._emit_unary_operation(IR_node, 'nn.crelu') def emit_Softmax(self, IR_node): self._emit_unary_operation(IR_node, 'nn.softmax') def emit_Sigmoid(self, IR_node): self._emit_unary_operation(IR_node, 'sigmoid') def emit_Embedding(self, IR_node): raise NotImplementedError() ret = "{:<15} = Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format( IR_node.name, IR_node.IR_layer.attr['input_dim'].i, IR_node.IR_layer.attr['output_dim'].i, IR_node.IR_layer.attr['mask_zero'].b, IR_node.in_edges[0]) return ret def emit_RNNs(self, IR_node, func): assert False def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): self.add_body(1, "{:<15} = {}".format( IR_node.variable_name, ' +'.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) def emit_Concat(self, IR_node): self.add_body(1, "{:<15} = tf.concat([{}], {}, name = '{}')".format( IR_node.variable_name, ', '.join(self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges), IR_node.layer.attr['axis'].i, IR_node.name)) def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) self.add_body(1, "{:<15} = batch_normalization({}, variance_epsilon = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['epsilon'].f, IR_node.name)) def emit_Pad(self, IR_node): padding_str = ', '.join('[%s, %s]' % (IR_node.layer.attr['paddings'].list.i[idx], IR_node.layer.attr['paddings'].list.i[idx + 1]) for idx in range(0, len(IR_node.layer.attr['paddings'].list.i), 2)) mode_str = "" if 'mode' in IR_node.layer.attr: mode_str = ", mode = '{}'".format(IR_node.layer.attr['mode'].s.decode('utf-8')) code = "{:<15} = tf.pad({}, paddings = ({}){}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), padding_str, mode_str, IR_node.variable_name ) self.add_body(1, code) def emit_Squeeze(self, IR_node): self.add_body(1, "{:<15} = tf.squeeze({}, [{}], name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join('%s' % axis for axis in IR_node.layer.attr['axes'].list.i), IR_node.name)) def emit_ReduceMean(self, IR_node): self.add_body(1, "{:<15} = tf.reduce_mean({}, [{}], {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ','.join('%s' % i for i in IR_node.layer.attr['axes'].list.i), IR_node.layer.attr['keepdims'].b, IR_node.name)) def emit_LRN(self, IR_node): self.add_body(1, "{:<15} = tf.nn.lrn({}, {}, alpha = {}, beta = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['size'].i - 1, IR_node.layer.attr['alpha'].f / (IR_node.layer.attr['size'].i * 2 - 1), IR_node.layer.attr['beta'].f, IR_node.name)) def emit_SeparableConv(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i) self.add_body(1, "{:<15} = separable_convolution({}, strides = [{}], padding = '{}', name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), strides_str, IR_node.layer.attr['padding'].s.decode('utf-8'), IR_node.name)) def emit_DepthwiseConv(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i) self.add_body(1, "{:<15} = depthwise_convolution({}, strides = [{}], padding = '{}', name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), strides_str, IR_node.layer.attr['padding'].s.decode('utf-8'), IR_node.name)) def _layer_Convolution(self): self.add_body(0, """ def convolution(input, name, **kwargs): w = tf.Variable(__weights_dict[name]['weights'], trainable = is_train, name = name + "_weight") layer = tf.nn.convolution(input, w, **kwargs) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias") layer = layer + b return layer""") def _layer_BatchNorm(self): self.add_body(0, """ def batch_normalization(input, name, **kwargs): mean = tf.Variable(__weights_dict[name]['mean'], name = name + "_mean", trainable = is_train) variance = tf.Variable(__weights_dict[name]['var'], name = name + "_var", trainable = is_train) offset = tf.Variable(__weights_dict[name]['bias'], name = name + "_bias", trainable = is_train) if 'bias' in __weights_dict[name] else None scale = tf.Variable(__weights_dict[name]['scale'], name = name + "_scale", trainable = is_train) if 'scale' in __weights_dict[name] else None return tf.nn.batch_normalization(input, mean, variance, offset, scale, name = name, **kwargs) """) def _layer_SeparableConv(self): self.add_body(0, """ def separable_convolution(input, name, **kwargs): depthwise = tf.Variable(__weights_dict[name]['depthwise_filter'], trainable = is_train, name = name + "_df") pointwise = tf.Variable(__weights_dict[name]['pointwise_filter'], trainable = is_train, name = name + "_pf") layer = tf.nn.separable_conv2d(input, depthwise, pointwise, **kwargs) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias") layer = layer + b return layer""") def _layer_DepthwiseConv(self): self.add_body(0, """ def depthwise_convolution(input, name, **kwargs): depthwise = tf.Variable(__weights_dict[name]['weights'], trainable = is_train, name = name + "_df") layer = tf.nn.depthwise_conv2d(input, depthwise, **kwargs) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias") layer = layer + b return layer""")
class PytorchEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16: "float16", graph_pb2.DT_FLOAT32: "float32", graph_pb2.DT_FLOAT64: "float64", graph_pb2.DT_INT16: "int16", graph_pb2.DT_INT32: "int32", graph_pb2.DT_INT64: "int64", graph_pb2.DT_UINT8: "uint8", graph_pb2.DT_UINT16: "uint16" } # Base Functions def __init__(self, model): super(PytorchEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] weight_path = model[1] self.init_codes = str() self.IR_graph = IRGraph(network_path) self.IR_graph.build() self._load_weights(weight_path) def run(self, dstNetworkPath, dstWeightPath=None, phase='test'): super(PytorchEmitter, self).run(dstNetworkPath, dstWeightPath, phase) if self.weight_loaded: self.save_weights(self.weights_dict, dstWeightPath) def add_init(self, indent, codes): if isinstance(codes, _string_types): codes = [codes] for code in codes: self.init_codes += (" " * indent) + code + '\n' @property def header_code(self): return """import numpy as np import torch import torch.nn as nn import torch.nn.functional as F __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict class KitModel(nn.Module): """ def gen_codes(self, phase): self.add_init( 1, """ def __init__(self, weight_file): super(KitModel, self).__init__() global __weights_dict __weights_dict = load_weights(weight_file) """) self.add_body(1, "def forward(self, x):") for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(current_node) else: print("Pytorch Emitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body( 2, "return {}".format(','.join([ self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers ]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.header_code + '\n' + self.init_codes + '\n' + self.body_codes def emit_Convolution(self, IR_node): # https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py self.used_layers.add(IR_node.type) dim = len(IR_node.IR_layer.attr["strides"].list.i) - 2 in_channels = IR_node.IR_layer.attr["filter"].list.i[-2] filter = IR_node.IR_layer.attr["filter"].list.i[-1] kernel = IR_node.IR_layer.attr["filter"].list.i[:-2] strides = IR_node.IR_layer.attr["strides"].list.i[1:-1] use_bias = IR_node.IR_layer.attr["use_bias"].b if IR_node.IR_layer.attr["padding"].s == b'VALID': padding = 0 else: calculate_same_pad self.add_init( 2, "self.{} = self.__convolution({}, name = '{}', in_channels = {}, out_channels = {}, kernel_size = ({}), stride = ({}), padding = {}, bias = {})" .format(IR_node.variable_name, dim, IR_node.name, in_channels, filter, ','.join('%s' % id for id in kernel), ','.join('%s' % id for id in strides), padding, use_bias)) self.add_body( 2, "{:<15} = self.{}({})".format( IR_node.variable_name, IR_node.variable_name, self.IR_graph.get_node( IR_node.in_edges[0]).real_variable_name)) if self.weight_loaded: self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim))) def emit_Pool(self, IR_node): dim = len(IR_node.IR_layer.attr["strides"].list.i) - 2 if IR_node.layer.attr['pooling_type'].s == b"MAX": pool_name = "max_pool{}d".format(dim) elif IR_node.layer.attr['pooling_type'].s == b"AVG": pool_name = "avg_pool{}d".format(dim) else: assert False if IR_node.layer.attr['global_pooling'].b: raise NotImplementedError("Not Global Pooling support!") else: for e in IR_node.IR_layer.attr["dilation_rate"].list.i: assert e == 1 if IR_node.IR_layer.attr["padding"].s == b'VALID': padding = 0 else: # Kit TODO: to handle padding padding = 1 pool_size = IR_node.IR_layer.attr['window_shape'].list.i[1:-1] strides = IR_node.IR_layer.attr['strides'].list.i[1:-1] self.add_body( 2, "{:<15} = F.{}(input = {}, kernel_size = ({}), stride = ({}), padding = {})" .format( IR_node.variable_name, pool_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name, ','.join([str(id) for id in pool_size]), ','.join([str(id) for id in strides]), padding)) def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_DataInput(self, IR_node): # Ignore it in Pytorch IR_node.real_name = 'x' def emit_Dropout(self, IR_node): self.add_body( 2, "{:<15} = F.dropout(input = {}, p = {}, training = self.training, inplace = True)" .format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name, IR_node.layer.attr["keep_prob"].f)) def check_if_need_transpose(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) while parent.type == 'Flatten': parent = self.IR_graph.get_parent(parent.name, [0]) dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) if dim > 2: original_dims = self.weights_dict[IR_node.name]['weights'].shape dims = [ i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:] ] + [-1] self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], dims) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], original_dims) def emit_FullyConnected(self, IR_node): self.used_layers.add(IR_node.type) in_features = 1 for i in self.IR_graph.get_parent( IR_node.name, [0]).layer.attr['_output_shapes'].list.shape[0].dim[1:]: in_features *= i.size self.add_init( 2, "self.{} = self.__dense(name = '{}', in_features = {}, out_features = {}, bias = {})" .format(IR_node.variable_name, IR_node.name, in_features, IR_node.layer.attr["units"].i, IR_node.IR_layer.attr["use_bias"].b)) self.add_body( 2, "{:<15} = self.{}({})".format( IR_node.variable_name, IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) if self.weight_loaded: self.check_if_need_transpose(IR_node) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], (1, 0)) def emit_Flatten(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name self.add_body( 2, "{:<15} = {}.view({}.size(0), -1)".format(IR_node.variable_name, parent, parent)) def emit_Reshape(self, IR_node): shape_str = IRGraph.shapeToStr(IR_node.IR_layer.attr["shape"].shape, True) self.add_body( 1, "{:<15} = Reshape(name = \"{}\", target_shape = ({}))({})".format( IR_node.variable_name, IR_node.name, shape_str, self.IR_graph.get_node( IR_node.in_edges[0]).real_variable_name)) def emit_Tanh(self, IR_node): code = "{:<15} = Activation(name = '{}', activation = 'tanh')({})".format( IR_node.replace_scope(IR_node.name), IR_node.name, IR_node.replace_scope(IR_node.in_edges[0])) return code def emit_Relu(self, IR_node): self.add_body( 2, "{:<15} = F.relu({})".format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) def emit_Softmax(self, IR_node): self.add_body( 2, "{:<15} = F.softmax({})".format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) def emit_Sigmoid(self, IR_node): code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format( IR_node.replace_scope(IR_node.name), IR_node.name, IR_node.replace_scope(IR_node.in_edges[0])) return code def emit_Embedding(self, IR_node): ret = "{:<15} = Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format( IR_node.name, IR_node.IR_layer.attr['input_dim'].i, IR_node.IR_layer.attr['output_dim'].i, IR_node.IR_layer.attr['mask_zero'].b, IR_node.in_edges[0]) return ret def emit_RNNs(self, IR_node, func): # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): code = Keras2Emitter._emit_merge(IR_node, "add") return code def emit_Concat(self, IR_node): code = Keras2Emitter._emit_merge(IR_node, "concatenate") return code def emit_BatchNorm(self, IR_node): code = "{:<15} = BatchNormalization(name = '{}', axis = {}, center = {}, scale = {})({})".format( IR_node.variable_name, IR_node.name, IR_node.IR_layer.attr['axis'].i, IR_node.IR_layer.attr['center'].b, IR_node.IR_layer.attr['scale'].b, IR_node.replace_scope(IR_node.in_edges[0])) return code def emit_pad(self, IR_node): if IR_node.IR_layer.attr['mode'].s == b"CONSTANT": func = "ZeroPadding" dim = len(IR_node.IR_layer.attr['padding'].list.i) // 2 padding_str = "" for idx in range(0, dim): padding_str += "({}, {}),".format( IR_node.IR_layer.attr['padding'].list.i[idx + idx], IR_node.IR_layer.attr['padding'].list.i[idx + idx + 1]) code = "{:<15} = {}{}D(name = \"{}\", padding = ({}))({})".format( IR_node.variable_name, func, dim, IR_node.name, padding_str, IR_node.replace_scope(IR_node.in_edges[0])) return code def emit_Squeeze(self, IR_node): raise NotImplementedError() input_name = IR_node.replace_scope( self.IR_graph.layer_name_map[IR_node.in_edges[0]]) self.forward_code += " {} = {}.view({}.size(0), -1)\n".format( IR_node.replace_scope(IR_node.name), input_name, input_name) def emit_Pad(self, IR_node): if IR_node.layer.attr['mode'].s == b'CONSTANT': mode = "mode = 'constant', value = {}".format(0) elif IR_node.layer.attr['mode'].s == b'REFLECT': mode = "mode = 'reflect'" elif IR_node.layer.attr['mode'].s == b'SYMMETRIC': mode = "mode = 'replicate'" else: assert False padding_str = ', '.join( '%s' % i for i in IR_node.layer.attr['paddings'].list.i[2:-2]) self.add_body( 2, "{:<15} = F.pad({}, ({}), {})".format( IR_node.variable_name, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name, padding_str, mode)) def _layer_Convolution(self): self.add_body( 0, """ @staticmethod def __convolution(dim, name, **kwargs): if dim == 1: layer = nn.Conv1d(**kwargs) elif dim == 2: layer = nn.Conv2d(**kwargs) elif dim == 3: layer = nn.Conv3d(**kwargs) else: raise NotImplementedError() layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) return layer""") def _layer_FullyConnected(self): self.add_body( 0, """ @staticmethod def __dense(name, **kwargs): layer = nn.Linear(**kwargs) layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) return layer""")
class Keras2Emitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16 : "float16", graph_pb2.DT_FLOAT32 : "float32", graph_pb2.DT_FLOAT64 : "float64", graph_pb2.DT_INT16 : "int16", graph_pb2.DT_INT32 : "int32", graph_pb2.DT_INT64 : "int64", graph_pb2.DT_UINT8 : "uint8", graph_pb2.DT_UINT16 : "uint16" } def __init__(self, model): super(Keras2Emitter, self).__init__() from six import string_types as _string_types if isinstance(model, _string_types): network_path = model else: network_path = model[0] weight_path = model[1] self._load_weights(weight_path) self.IR_graph = IRGraph(network_path) self.IR_graph.build() self.yolo_parameter = [] self.region_parameter = [] self.layers_codes_count = dict() folder = Folder(self.IR_graph, self.weights_dict) folder.fold() @property def header_code(self): return """import keras from keras.models import Model from keras import layers import keras.backend as K import numpy as np from keras.layers.core import Lambda import tensorflow as tf weights_dict = dict() def load_weights_from_file(weight_file): try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def set_layer_weights(model, weights_dict): for layer in model.layers: if layer.name in weights_dict: cur_dict = weights_dict[layer.name] current_layer_parameters = list() if layer.__class__.__name__ == "BatchNormalization": if 'scale' in cur_dict: current_layer_parameters.append(cur_dict['scale']) if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) current_layer_parameters.extend([cur_dict['mean'], cur_dict['var']]) elif layer.__class__.__name__ == "Scale": if 'scale' in cur_dict: current_layer_parameters.append(cur_dict['scale']) if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) elif layer.__class__.__name__ == "SeparableConv2D": current_layer_parameters = [cur_dict['depthwise_filter'], cur_dict['pointwise_filter']] if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) elif layer.__class__.__name__ == "Embedding": current_layer_parameters.append(cur_dict['weights']) else: # rot weights current_layer_parameters = [cur_dict['weights']] if 'bias' in cur_dict: current_layer_parameters.append(cur_dict['bias']) model.get_layer(layer.name).set_weights(current_layer_parameters) return model def KitModel(weight_file = None): global weights_dict weights_dict = load_weights_from_file(weight_file) if not weight_file == None else None """ def gen_code(self, phase): self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): # print("Converting layer {}({})".format(current_node.name, node_type)) func = getattr(self, "emit_" + node_type) line = func(current_node) if line: self.add_body(1, line) else: print("KerasEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body(1, "{:<15} = Model(inputs = [{}], outputs = [{}])".format( "model", ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.input_layers if self.IR_graph.get_node(name).type != 'Const']), ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers if self.IR_graph.get_node(name).type != 'Pack']))) self.add_body(1, ["set_layer_weights(model, weights_dict)", "return model"]) for i in self.used_layers: func = getattr(self, "_layer_" + i) func() self.add_body(0, "") for code in self.layers_codes.values(): self.add_body(0, code) return self.body_code @staticmethod def shapeToStr(shapes): return ', '.join('%s' % i for i in filter(lambda x:x > 0, shapes)) def _emit_activation(self, IR_node, op, in_scope=False): if in_scope: code = "{:<15} = keras.activations.get('{}')({})".format( IR_node.variable_name, op, self.parent_variable_name(IR_node)) else: code = "{:<15} = layers.Activation(name='{}', activation='{}')({})".format( IR_node.variable_name, IR_node.name, op, self.parent_variable_name(IR_node)) return code def _emit_merge(self, IR_node, func): if len(IR_node.in_edges) == 1: IR_node.in_edges.append(IR_node.in_edges[0]) inputs = ', '.join('%s' % self.parent_variable_name(IR_node, i) for i in IR_node.in_edges) axis = ' axis = {},'.format(IR_node.get_attr('axis')) if 'axis' in IR_node.layer.attr else "" code = "{:<15} = layers.{}(name = '{}', inputs = [{}])".format( IR_node.variable_name, func, IR_node.name, inputs) return code @staticmethod def _convert_padding(padding): padding = convert_onnx_pad_to_tf(padding)[1:-1] for idx, pad in enumerate(padding): padding[idx] = tuple(pad) padding = tuple(padding) return padding def _defuse_padding(self, IR_node, in_scope=False): auto_pad = IR_node.get_attr('auto_pad') if auto_pad != None and auto_pad.startswith("SAME"): input_node = self.parent_variable_name(IR_node) padding = 'same' return input_node, padding else: padding = IR_node.get_attr("pads") if padding != None: padding = self._convert_padding(padding) if is_valid_padding(padding) == False: input_node = IR_node.variable_name + '_input' self.add_body(1, "{:<15} = layers.ZeroPadding{}D(padding = {})({})".format( input_node, len(padding), padding, self.parent_variable_name(IR_node))) else: input_node = self.parent_variable_name(IR_node) else: input_node = self.parent_variable_name(IR_node) # TODO return input_node, 'valid' # return input_node, 'same' def _emit_convolution(self, IR_node, conv_type): self.used_layers.add('Conv') # assert IR_node.get_attr('group', 1) == 1 group = IR_node.get_attr("group", 1) if conv_type.endswith('Transpose'): filters = IR_node.get_attr('kernel_shape')[-2] else: filters = IR_node.get_attr('kernel_shape')[-1] filters_str = 'filters={}'.format(filters) if not conv_type.endswith('DepthwiseConv2D') else 'depth_multiplier={}'.format(filters) # change dw from filters to 1 input_node, padding = self._defuse_padding(IR_node) dilations = IR_node.get_attr('dilations') if not dilations or len(dilations) == 2: # reset the default dilation dilations = [1] * len(IR_node.get_attr('kernel_shape')) code = "{:<15} = convolution(weights_dict, name='{}', input={}, group={}, conv_type='{}', {}, kernel_size={}, strides={}, dilation_rate={}, padding='{}', use_bias={})".format( IR_node.variable_name, IR_node.name, input_node, group, conv_type, filters_str, tuple(IR_node.get_attr('kernel_shape')[:-2]), tuple(IR_node.get_attr('strides')[1:-1]), tuple(dilations[1:-1]), padding, IR_node.get_attr('use_bias')) return code def emit_ConvTranspose(self, IR_node, in_scope=False): dim = len(IR_node.get_attr('kernel_shape')) - 2 return self._emit_convolution(IR_node, 'layers.Conv{}DTranspose'.format(dim)) def emit_Conv(self, IR_node, in_scope=False): dim = len(IR_node.get_attr('kernel_shape')) - 2 return self._emit_convolution(IR_node, 'layers.Conv{}D'.format(dim)) ############# # Operators # ############# def emit_UNKNOWN(self, IR_node, in_scope=False): print (IR_node.name) def emit_Mul(self, IR_node, in_scope=False): if in_scope: code = "{:<15} = {} * {}".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code node_1 = self.IR_graph.get_node(IR_node.in_edges[0]) node_2 = self.IR_graph.get_node(IR_node.in_edges[1]) if node_1.type == 'Constant' or node_2.type == 'Constant': self.used_layers.add('Mul_Constant') if node_1.type == 'Constant': weight_factor = node_1.get_attr('value') code = "{:<15} = mul_constant(weight_factor={}, layer_name= {})".format( IR_node.variable_name, weight_factor, self.parent_variable_name(IR_node, [1])) else: weight_factor = node_2.get_attr('value') code = "{:<15} = mul_constant(weight_factor={}, layer_name= {})".format( IR_node.variable_name, weight_factor, self.parent_variable_name(IR_node)) else: self.used_layers.add('Mul') code = "{:<15} = my_mul(name='{}')([{}, {}])".format( IR_node.variable_name, IR_node.name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_Sub(self, IR_node, in_scope=False): if in_scope: code = "{:<15} = {} - {}".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code self.used_layers.add('Sub') code = "{:<15} = my_sub()({}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) # code = self._emit_merge(IR_node, "subtract") return code def emit_Add(self, IR_node, in_scope=False): if in_scope: code = "{:<15} = {} + {}".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code self.used_layers.add('Add') code = "{:<15} = my_add()([{}, {}])".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_DataInput(self, IR_node, in_scope=False): shape_str = IRGraph.shapeToStr(IR_node.IR_layer.attr["shape"].shape) dtype_str = ", dtype = '{}'".format(self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'dtype' in IR_node.layer.attr else "" code = "{:<15} = layers.Input(name = '{}', shape = ({},) {})".format( IR_node.variable_name, IR_node.name, shape_str, dtype_str) return code def emit_Dropout(self, IR_node, in_scope=False): seed = 'None' if 'seed' in IR_node.IR_layer.attr: seed = IR_node.IR_layer.attr['seed'].i code = "{:<15} = layers.Dropout(name = '{}', rate = {}, seed = {})({})".format( IR_node.variable_name, IR_node.name, IR_node.IR_layer.attr["keep_prob"].f, seed, self.parent_variable_name(IR_node)) return code def emit_FullyConnected(self, IR_node, in_scope=False): if in_scope: code = "{:<15} = K.bias_add(K.dot({}, K.variable(weights_dict['{}']['weights'])), K.variable(weights_dict['{}']['bias']))".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name, IR_node.name) else: code = "{:<15} = layers.Dense(name = '{}', units = {}, use_bias = {})({})".format( IR_node.variable_name, IR_node.name, IR_node.get_attr('units'), IR_node.get_attr('use_bias'), self.parent_variable_name(IR_node)) return code def emit_Flatten(self, IR_node, in_scope=False): self.used_layers.add('Flatten') code = "{:<15} = __flatten(name = '{}', input = {})".format( IR_node.variable_name, IR_node.name, self.parent_variable_name(IR_node)) return code def emit_Pool(self, IR_node, in_scope=False): codes = list() dim = len(IR_node.get_attr("strides")) - 2 pooling_type = IR_node.get_attr('pooling_type') if pooling_type == "MAX": pool_name = "MaxPooling{}D".format(dim) elif pooling_type == "AVG": pool_name = "AveragePooling{}D".format(dim) else: print(pooling_type) assert False # TODO if IR_node.layer.attr['global_pooling'].b: shape_str = IR_node.get_attr("shape_coreml") if shape_str: shape_str = ','.join([str(i) for i in shape_str]) codes.append("{:<15} = layers.Global{}(name = '{}')({})".format( IR_node.variable_name+'before', pool_name, IR_node.name, self.parent_variable_name(IR_node))) # when converting from coreml model, reshape is needed after the global pooling codes.append("{:<15} = layers.Reshape(name = '{}', target_shape = ({},))({})".format( IR_node.variable_name, IR_node.name + 'reshape', shape_str, IR_node.variable_name+'before')) else: codes.append("{:<15} = layers.Global{}(name = '{}')({})".format( IR_node.variable_name, pool_name, IR_node.name, self.parent_variable_name(IR_node))) else: dilations = IR_node.get_attr('dilations') if dilations: for e in IR_node.get_attr('dilations'): assert e == 1 pool_size = IR_node.get_attr('kernel_shape')[1:-1] strides = IR_node.get_attr('strides')[1:-1] padding = IR_node.get_attr('pads')[1:dim] if pooling_type == "AVG" and pool_size.count(pool_size[0]) == len(pool_size) and strides[0] == 1 and strides.count(strides[0]) == len(strides) and padding.count(padding[0]) == len(padding) and pool_size[0] == padding[0]*2 + 1: pool_size = ', '.join('%s' % i for i in pool_size) strides = ', '.join('%s' % i for i in strides) codes.append("{:<15} = layers.{}(name = '{}', pool_size = ({}), strides = ({}), padding = '{}')({})".format( IR_node.variable_name, pool_name, IR_node.name, pool_size, strides, 'same', self.parent_variable_name(IR_node) )) else: pool_size = ', '.join('%s' % i for i in pool_size) strides = ', '.join('%s' % i for i in strides) input_node, padding = self._defuse_padding(IR_node) codes.append("{:<15} = layers.{}(name = '{}', pool_size = ({}), strides = ({}), padding = '{}')({})".format( IR_node.variable_name, pool_name, IR_node.name, pool_size, strides, padding, input_node)) return codes def emit_Reshape(self, IR_node, in_scope=False): shape_str = self.shapeToStr(IR_node.IR_layer.attr["shape"].list.i) code = "{:<15} = layers.Reshape(name = '{}', target_shape = ({},))({})".format( IR_node.variable_name, IR_node.name, shape_str, self.parent_variable_name(IR_node)) return code def emit_Tanh(self, IR_node, in_scope=False): code = self._emit_activation(IR_node, 'tanh', in_scope) return code def emit_Relu(self, IR_node, in_scope=False): code = self._emit_activation(IR_node, 'relu', in_scope) return code def emit_Softmax(self, IR_node, in_scope=False): code = self._emit_activation(IR_node, 'softmax', in_scope) return code def emit_Sigmoid(self, IR_node, in_scope=False): code = self._emit_activation(IR_node, 'sigmoid', in_scope) return code def emit_Embedding(self, IR_node, in_scope=False): code = "{:<15} = layers.Embedding(name = '{}', input_dim = {}, output_dim = {}, mask_zero = {})({})".format( IR_node.variable_name, IR_node.name, IR_node.get_attr('input_dim'), IR_node.get_attr('output_dim'), IR_node.get_attr('mask_zero'), IR_node.in_edges[0]) return code def emit_RNNs(self, IR_node, func): # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "{:<15} = layers.{}(units = {}, use_bias = {} {})({})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code def emit_LSTM(self, IR_node, in_scope=False): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node, in_scope=False): return self.emit_RNNs(IR_node, "GRU") def emit_Concat(self, IR_node, in_scope=False): inputs = ', '.join('%s' % self.parent_variable_name(IR_node, s) for s in IR_node.in_edges) if in_scope: code = "{:<15} = K.concatenate([{}])".format( IR_node.variable_name, inputs) else: code = self._emit_merge(IR_node, "concatenate") return code def emit_BatchNorm(self, IR_node, in_scope=False): axis = IR_node.layer.attr['axis'].i if 'axis' in IR_node.layer.attr else -1 code = "{:<15} = layers.BatchNormalization(name = '{}', axis = {}, epsilon = {}, center = {}, scale = {})({})".format( IR_node.variable_name, IR_node.name, axis, IR_node.layer.attr['epsilon'].f, IR_node.layer.attr['bias'].b, IR_node.layer.attr['scale'].b, self.parent_variable_name(IR_node)) return code def emit_Scale(self, IR_node, in_scope=False): self.used_layers.add('Scale') axis = IR_node.layer.attr['axis'].i if 'axis' in IR_node.layer.attr else -1 code = "{:<15} = Scale(name = '{}', axis = {}, center = {}, scale = {})({})".format( IR_node.variable_name, IR_node.name, axis, IR_node.layer.attr['use_bias'].b, True, self.parent_variable_name(IR_node)) return code def emit_Pad(self, IR_node, in_scope=False): mode = IR_node.get_attr('mode', 'constant') mode = mode.lower() if mode == "constant": func = "ZeroPadding" else: raise NotImplementedError() dim = len(IR_node.get_attr('pads')) // 2 - 2 padding = self._convert_padding(IR_node.get_attr('pads')) code = "{:<15} = layers.{}{}D(name='{}', padding={})({})".format( IR_node.variable_name, func, dim, IR_node.name, padding, self.parent_variable_name(IR_node)) return code def emit_Squeeze(self, IR_node, in_scope=False): return self.emit_Flatten(IR_node) def emit_ReduceMean(self, IR_node, in_scope=False): axes = ', '.join('%s' % i for i in IR_node.get_attr('axes')) code = "{:<15} = layers.Lambda(lambda x: K.mean(x, axis=[{}], keepdims={}))({})".format( IR_node.variable_name, axes, IR_node.get_attr('keepdims'), self.parent_variable_name(IR_node)) return code def emit_LRN(self, IR_node, in_scope=False): self.used_layers.add(IR_node.type) code = "{:<15} = LRN(size = {}, alpha = {}, beta = {}, k = {}, name = '{}')({})".format( IR_node.variable_name, IR_node.get_attr('size'), IR_node.get_attr('alpha'), IR_node.get_attr('beta'), IR_node.get_attr('k'), IR_node.name, self.parent_variable_name(IR_node)) return code def emit_Split(self, IR_node, in_scope=False): if in_scope: axis = IR_node.get_attr('axis') split_num = IR_node.get_attr('split') segment_len = "K.int_shape({})[{}]//{}".format(self.parent_variable_name(IR_node),axis, split_num) split_str = '[' + ','.join(':' for i in range(axis)) + ',{}:{},...]' split_strs = [] for i in range(split_num-1): split_strs.append(self.parent_variable_name(IR_node)+split_str.format(str(i)+'*'+ segment_len, str(i+1)+'*'+segment_len)) split_strs.append(self.parent_variable_name(IR_node)+split_str.format(str(split_num-1)+'*'+segment_len, '')) code = "{:<15} = {}".format(IR_node.variable_name, ', '.join(split_strs)) else: self.used_layers.add(IR_node.type) code = "{:<15} = __split(input={}, split_num={}, axis={})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('split'), IR_node.get_attr('axis')) return code def emit_Unsqueeze(self, IR_node, in_scope=False): self.used_layers.add(IR_node.type) code = "{:<15} = __unsqueeze(input={}, axis={})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('axes')[0]) return code def emit_Constant(self, IR_node, in_scope=False): if in_scope: if IR_node.get_attr('value'): code = "{:<15} = K.constant({})".format(IR_node.variable_name, IR_node.get_attr('value')) else: code = "{:<15} = K.constant(weights_dict['{}']['value'])".format(IR_node.variable_name, IR_node.name) return code else: pass def emit_Shape(self, IR_node, in_scope=False): self.used_layers.add(IR_node.type) code = "{:<15} = __shape(input={})".format( IR_node.variable_name, self.parent_variable_name(IR_node)) return code def emit_Fill(self, IR_node, in_scope=False): self.used_layers.add(IR_node.type) code = "{:<15} = __fill(input={}, value={})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('value')) return code def emit_Slice(self, IR_node, in_scope=False): # It arouses some problems: # it can be implemented by Lambda Layer # https://github.com/keras-team/keras/issues/890 self.used_layers.add(IR_node.type) extra_str = "" if IR_node.get_attr('strides'): extra_str += "strides={}".format(IR_node.get_attr('strides')) if IR_node.get_attr('begin_mask'): extra_str += ", begin_mask={}".format(IR_node.get_attr('begin_mask')) if IR_node.get_attr('end_mask'): extra_str += ", end_mask={}".format(IR_node.get_attr('end_mask')) if IR_node.get_attr('shrink_axis_mask'): extra_str += ", shrink_axis_mask={}".format(IR_node.get_attr('shrink_axis_mask')) code = "{:<15} = __slice({}, {}, {}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('starts'), IR_node.get_attr('ends'), extra_str) return code def emit_Unstack(self, IR_node, in_scope=False): self.used_layers.add(IR_node.type) code = "{:<15} = __unstack(input={}, num={}, axis={})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('num'), IR_node.get_attr('axis')) return code def emit_Pack(self, IR_node, in_scope=False): pass def emit_SeparableConv(self, IR_node, in_scope=False): assert len(IR_node.get_attr("strides")) == 4 return self._emit_convolution(IR_node, "layers.SeparableConv2D") def emit_Relu6(self, IR_node, in_scope=False): try: # Keras == 2.1.6 from keras.applications.mobilenet import relu6 str_relu6 = 'keras.applications.mobilenet.relu6' code = "{:<15} = layers.Activation({}, name = '{}')({})".format( IR_node.variable_name, str_relu6, IR_node.name, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name) return code except: # Keras == 2.2.2 from keras.layers import ReLU code = "{:<15} = layers.ReLU(6, name = '{}')({})".format( IR_node.variable_name, IR_node.name, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name) return code def emit_DepthwiseConv(self, IR_node, in_scope=False): try: from keras.applications.mobilenet import DepthwiseConv2D return self._emit_convolution(IR_node, 'keras.applications.mobilenet.DepthwiseConv2D') except: return self._emit_convolution(IR_node, 'layers.DepthwiseConv2D') def emit_Crop(self, IR_node, in_scope=False): border = IR_node.get_attr('border') rank = len(border) // 2 cropping = [] for idx in xrange(rank): cropping.append(tuple([border[idx * 2], border[idx * 2 + 1]])) code = "{:<15} = layers.Cropping{}D(cropping={}, name='{}')({})".format( IR_node.variable_name, rank, tuple(cropping), IR_node.name, self.parent_variable_name(IR_node)) return code def emit_LeakyRelu(self, IR_node, in_scope=False): code = "{:<15} = layers.LeakyReLU(name='{}', alpha = {})({})".format( IR_node.variable_name, IR_node.name, IR_node.get_attr('alpha'), self.parent_variable_name(IR_node)) return code def emit_UpSampling2D(self, IR_node, in_scope=False): code = "{:<15} = layers.UpSampling2D(name='{}', size= ({}), data_format = 'channels_last')({})".format( IR_node.variable_name, IR_node.name, IR_node.get_attr('scales'), self.parent_variable_name(IR_node)) return code def emit_SpaceToDepth(self, IR_node, in_scope=False): self.used_layers.add(IR_node.type) assert IR_node.get_attr('blocksize') == 2 # TODO: arguments won't be saved in keras export model blocksize = "arguments={'blocksize': %d}" % 2 code = "{:<15} = layers.Lambda(space_to_depth, {}, name='{}')({})".format( IR_node.variable_name, blocksize, IR_node.name, self.parent_variable_name(IR_node)) return code def emit_Maxmum(self, IR_node, in_scope=False): if in_scope: code = "{:<15} = K.maxmum({}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1]) ) return code else: return self._emit_merge(IR_node, 'Maxmum') def emit_Minimum(self, IR_node, in_scope=False): if in_scope: code = "{:<15} = K.minimum({}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1]) ) return code else: return self._emit_merge(IR_node, 'Minimum') def emit_yolo(self, IR_node, in_scope=False): self.used_layers.add('Yolo') self.yolo_parameter = [IR_node.get_attr('anchors'), IR_node.get_attr('classes'), IR_node.get_attr("ignore_thresh"), IR_node.get_attr("jitter")] code = "{:<15} = {}".format( IR_node.variable_name, self.parent_variable_name(IR_node)) return code def emit_region(self, IR_node, in_scope=False): self.used_layers.add('Region') code = "{:<15} = {}".format( IR_node.variable_name, self.parent_variable_name(IR_node)) self.region_parameter = [IR_node.get_attr('anchors'), IR_node.get_attr('classes'), IR_node.get_attr("thresh"), IR_node.get_attr("softmax"), IR_node.get_attr("bias_match"), IR_node.get_attr("jitter"), IR_node.get_attr("num"), IR_node.get_attr("random"), IR_node.get_attr("coords"), IR_node.get_attr("absolute"), IR_node.get_attr("rescore"), IR_node.get_attr("class_scale"), IR_node.get_attr("object_scale"), IR_node.get_attr("noobject_scale"), IR_node.get_attr("coord_scale"), ] return code def emit_Scope(self, IR_node, in_scope=False): if hasattr(self, '_emit_' + IR_node.pattern): func = getattr(self, '_emit_' + IR_node.pattern) line = func(IR_node) return line input_vars = list() for idx, in_edge in enumerate(IR_node.in_edges): in_node = self.IR_graph.get_node(in_edge) if in_node.type == 'Scope' and len(in_node.return_variables) > 1 and ':' not in in_edge: # the input is a list var_name = ', '.join([(in_node.variable_name + "[%s]") %s for s in range(len(in_node.return_variables))]) input_vars.append(var_name) else: input_vars.append(self.parent_variable_name(IR_node, [idx])) code = "{:<15} = my_{}()([{}])".format( IR_node.real_variable_name, IR_node.pattern, ', '.join(input_vars)) self._gen_scope_code(IR_node) return code def _gen_scope_code(self, scope_node): def _scope_func(scope_name, params, code, return_var): if len(return_var) > 1: return_var_code = '[{}]'.format(', '.join(return_var)) output_shape_code = ' self.output_shapes = [{}]\n'.format(', '.join(['K.int_shape(%s)' %s for s in return_var])) else: return_var_code = ', '.join(return_var) output_shape_code = ' self.output_shapes = K.int_shape({})\n'.format(return_var[0]) code = """ class my_{}(keras.layers.Layer): def __init__(self, **kwargs): super(my_{}, self).__init__(**kwargs) def call(self, inputs): {} {} {} return {} def compute_output_shape(self, input_shape): return self.output_shapes """.format(scope_name, scope_name, params, code, output_shape_code, return_var_code) return code if not self.layers_codes.get(scope_node.pattern, None): body_code = str() for node_name in scope_node.topology_list: node = self.IR_graph.get_node(node_name) node_type = node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(node, True) if line != None: body_code += " " + line + '\n' else: print("KerasEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(node) # param_code does not need parameter slice. input_params = scope_node.input_params param_code = str() import re for i, p in enumerate(scope_node.in_edges): p_node = self.IR_graph.get_node(p) if p_node.type == 'Scope' and len(p_node.return_variables) > 1 and ':' not in p: # input is a list. param_code += " {} = [{}]\n".format(p_node.variable_name, ', '.join('inputs[%s]'%s for s in range(i, i + len(p_node.return_variables)))) else: param_code += " {} = inputs[{}]\n".format(p_node.variable_name, i) function_code = _scope_func(scope_node.pattern, param_code, body_code, scope_node.return_variables) self.layers_codes[scope_node.pattern] = function_code return body_code def _emit_h_zero(self, IR_node): if not self.layers_codes.get(IR_node.pattern, None): class_code = ''' class my_h_zero(keras.layers.Layer): def __init__(self, **kwargs): super(my_h_zero, self).__init__(**kwargs) def call(self, dummy): {:<15} = K.constant(np.full((1, {}), {})) return {} '''.format(IR_node.variable_name, IR_node.get_attr('fill_size'), IR_node.get_attr('fill_value'), IR_node.variable_name) self.layers_codes[IR_node.pattern] = class_code code = "{:<15} = my_h_zero()({})".format(IR_node.variable_name, self.parent_variable_name(IR_node)) return code def _layer_Yolo(self): self.add_body(0, ''' def yolo_parameter(): return {} '''.format(self.yolo_parameter)) def _layer_Region(self): self.add_body(0, ''' def region_parameter(): return {} '''.format(self.region_parameter)) def _layer_SpaceToDepth(self): self.add_body(0, ''' def space_to_depth(input, blocksize): import tensorflow as tf return tf.space_to_depth(input, block_size=blocksize) ''') def _layer_Flatten(self): self.add_body(0, ''' def __flatten(name, input): if input.shape.ndims > 2: return layers.Flatten(name = name)(input) else: return input ''') def _layer_LRN(self): self.add_body(0, ''' from keras.layers.core import Layer class LRN(Layer): def __init__(self, size=5, alpha=0.0005, beta=0.75, k=2, **kwargs): self.n = size self.alpha = alpha self.beta = beta self.k = k super(LRN, self).__init__(**kwargs) def build(self, input_shape): self.shape = input_shape super(LRN, self).build(input_shape) def call(self, x, mask=None): half_n = self.n - 1 squared = K.square(x) scale = self.k norm_alpha = self.alpha / (2 * half_n + 1) if K.image_dim_ordering() == "th": b, f, r, c = self.shape squared = K.expand_dims(squared, 0) squared = K.spatial_3d_padding(squared, padding=((half_n, half_n), (0, 0), (0,0))) squared = K.squeeze(squared, 0) for i in range(half_n*2+1): scale += norm_alpha * squared[:, i:i+f, :, :] else: b, r, c, f = self.shape squared = K.expand_dims(squared, -1) squared = K.spatial_3d_padding(squared, padding=((0, 0), (0,0), (half_n, half_n))) squared = K.squeeze(squared, -1) for i in range(half_n*2+1): scale += norm_alpha * squared[:, :, :, i:i+f] scale = K.pow(scale, self.beta) return x / scale def compute_output_shape(self, input_shape): return input_shape''') def _layer_Conv(self): self.add_body(0, """ def convolution(weights_dict, name, input, group, conv_type, filters=None, **kwargs): if not conv_type.startswith('layer'): layer = keras.applications.mobilenet.DepthwiseConv2D(name=name, **kwargs)(input) return layer elif conv_type == 'layers.DepthwiseConv2D': layer = layers.DepthwiseConv2D(name=name, **kwargs)(input) return layer inp_filters = K.int_shape(input)[-1] inp_grouped_channels = int(inp_filters / group) out_grouped_channels = int(filters / group) group_list = [] if group == 1: func = getattr(layers, conv_type.split('.')[-1]) layer = func(name = name, filters = filters, **kwargs)(input) return layer weight_groups = list() if not weights_dict == None: w = np.array(weights_dict[name]['weights']) weight_groups = np.split(w, indices_or_sections=group, axis=-1) for c in range(group): x = layers.Lambda(lambda z: z[..., c * inp_grouped_channels:(c + 1) * inp_grouped_channels])(input) x = layers.Conv2D(name=name + "_" + str(c), filters=out_grouped_channels, **kwargs)(x) weights_dict[name + "_" + str(c)] = dict() weights_dict[name + "_" + str(c)]['weights'] = weight_groups[c] group_list.append(x) layer = layers.concatenate(group_list, axis = -1) if 'bias' in weights_dict[name]: b = K.variable(weights_dict[name]['bias'], name = name + "_bias") layer = layer + b return layer""") def _layer_Scale(self): self.add_body(0, """ from keras.engine import Layer, InputSpec from keras import initializers from keras import backend as K class Scale(Layer): def __init__(self, axis=-1, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', **kwargs): super(Scale, self).__init__(**kwargs) self.supports_masking = True self.axis = axis self.center = center self.scale = scale self.beta_initializer = initializers.get(beta_initializer) self.gamma_initializer = initializers.get(gamma_initializer) def build(self, input_shape): dim = input_shape[self.axis] if dim is None: raise ValueError('Axis ' + str(self.axis) + ' of ' 'input tensor should have a defined dimension ' 'but the layer received an input with shape ' + str(input_shape) + '.') self.input_spec = InputSpec(ndim=len(input_shape), axes={self.axis: dim}) shape = (dim,) if self.scale: self.gamma = self.add_weight(shape=shape, name='gamma', initializer=self.gamma_initializer) else: self.gamma = None if self.center: self.beta = self.add_weight(shape=shape, name='beta', initializer=self.beta_initializer) else: self.beta = None self.built = True def call(self, inputs, training=None): input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] return K.reshape(self.gamma, broadcast_shape) * inputs + K.reshape(self.beta, broadcast_shape) def get_config(self): config = { 'axis': self.axis, 'center': self.center, 'scale': self.scale, } base_config = super(Scale, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape""") def _layer_Split(self): self.add_body(0, ''' def __split(input, split_num, axis): return Lambda(lambda x: tf.split(x, split_num, axis))(input) ''') def _layer_Unsqueeze(self): self.add_body(0, ''' def __unsqueeze(input, axis): return Lambda(lambda x: tf.expand_dims(x, axis))(input) ''') def _layer_Fill(self): self.add_body(0, ''' def __fill(input, value): class Fill(keras.layers.Layer): def call(self, input): if keras.backend.backend() =='tensorflow': output = tf.fill(input, value) else: raise NotImplementedError self.output_dim = [dim.value for dim in output.shape] return output def compute_output_shape(self, input_shape): return tuple(self.output_dim) # output = Lambda(lambda x: tf.fill(x, value))(input) output = Fill()(input) # return output ''') def _layer_Slice(self): self.add_body(0, ''' def __slice(input, start, end, **kargs): return Lambda(lambda x: tf.strided_slice(x, start, end, **kargs))(input) ''') def _layer_Unstack(self): self.add_body(0, ''' def __unstack(input, num, axis): return Lambda(lambda x: tf.unstack(x, num, axis))(input) ''') def _layer_Mul(self): self.add_body(0, ''' class my_mul(keras.layers.Layer): def __init__(self, **kwargs): super(my_mul, self).__init__(**kwargs) def call(self, inputs): res = inputs[0] * inputs[1] self.output_shapes = K.int_shape(res) return res def compute_output_shape(self, input_shape): return self.output_shapes ''') def _layer_Add(self): self.add_body(0, ''' class my_add(keras.layers.Layer): def __init__(self, **kwargs): super(my_add, self).__init__(**kwargs) def call(self, inputs): res = inputs[0] + inputs[1] self.output_shapes = K.int_shape(res) return res def compute_output_shape(self, input_shape): return self.output_shapes ''') def _layer_Sub(self): self.add_body(0, ''' class my_sub(keras.layers.Layer): def __init__(self, **kwargs): super(my_sub, self).__init__(**kwargs) def call(self, inputs): res = inputs[0] - inputs[1] self.output_shapes = K.int_shape(res) return res def compute_output_shape(self, input_shape): return self.output_shapes ''') def _layer_Shape(self): self.add_body(0, ''' def __shape(input): return Lambda(lambda x: tf.shape(x))(input) ''') # def _layer_Constant(self): # self.add_body(0, ''' # class my_constant(keras.layers.Layer): # def __init__(self, value, **kwargs): # super(my_constant, self).__init__(**kwargs) # self._value = value # # the input is dummy, just for creating keras graph. # def call(self, dummy): # res = K.constant(self._value) # self.output_shapes = K.int_shape(res) # return res # def compute_output_shape(self, input_shape): # return self.output_shapes # ''') def _layer_Mul_Constant(self): self.add_body(0, ''' def mul_constant(weight_factor, layer_name): weight = Lambda(lambda x: x*weight_factor) weight(layer_name) return weight.output ''')
class CaffeEmitter(Emitter): def __init__(self, model): from six import string_types as _string_types super(CaffeEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) self.IR_graph = IRGraph(network_path) super(CaffeEmitter, self)._build() @property def header_code(self): return """import numpy as np import sys, argparse import caffe from caffe import layers as L from caffe import params as P from caffe import to_proto from six import text_type as _text_type n = caffe.NetSpec() __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): """ @property def end_code(self): return """ def make_net(prototxt): KitModel() with open(prototxt, 'w') as fpb: print(n.to_proto(), file=fpb) def gen_weight(weight_file, model, prototxt): global __weights_dict __weights_dict = load_weights(weight_file) net = caffe.Net(prototxt, caffe.TRAIN) for key in __weights_dict: org_w = __weights_dict[key]['weights'] net.params[key][0].data.flat = __weights_dict[key]['weights'] if 'bias' in __weights_dict[key]: net.params[key][1].data.flat = __weights_dict[key]['bias'] net.save(model) if __name__=='__main__': parser = argparse.ArgumentParser(description='Generate caffe model and prototxt') parser.add_argument('--weight_file', '-w', type=_text_type, default='IR weight file') parser.add_argument('--prototxt', '-p', type=_text_type, default='caffe_converted.prototxt') parser.add_argument('--model', '-m', type=_text_type, default='caffe_converted.caffemodel') args = parser.parse_args() make_net(args.prototxt) gen_weight(args.weight_file, args.model, args.prototxt) """ def gen_code(self, phase = 'test'): self.phase = phase self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type #print("========current_node={}".format(current_node.layer)) if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("CaffeEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) # self.add_body(1, "return n.{}".format( # ','.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers]))) self.add_body(0, "") #for test self.add_body(0,self.end_code) return self.body_code def run(self, dstNetworkPath, dstWeightPath = None, phase = 'test'): super(CaffeEmitter, self).run(dstNetworkPath, dstWeightPath, phase) if self.weight_loaded: self.save_weights(self.weights_dict, dstWeightPath) @staticmethod def _shapeToStr(shapes): return [dim.size if dim.size > 0 else 1 for dim in shapes.dim] def check_if_need_transpose(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) while parent.type == 'Flatten': parent = self.IR_graph.get_parent(parent.name, [0]) dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) if dim > 2: original_dims = self.weights_dict[IR_node.name]['weights'].shape dims = [i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1] self.weights_dict[IR_node.name]['weights'] = np.reshape(self.weights_dict[IR_node.name]['weights'], dims) self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) self.weights_dict[IR_node.name]['weights'] = np.reshape(self.weights_dict[IR_node.name]['weights'], original_dims) def emit_Conv(self, IR_node): self.used_layers.add(IR_node.type) self.add_body(1, "n.{:<15} = L.Convolution(n.{}, kernel_size={}, stride={}, num_output={}, pad={}, group={}, \ bias_term={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('kernel_shape')[0], IR_node.get_attr('strides')[1], IR_node.get_attr('kernel_shape')[-1], IR_node.get_attr('pads')[1], IR_node.get_attr('group', 1), IR_node.get_attr('use_bias', False))) dim = len(IR_node.get_attr('strides')) - 2 if self.weight_loaded: self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim))) def emit_Pool(self, IR_node): self.used_layers.add(IR_node.type) pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': pooling_type = P.Pooling.MAX elif pooling_type == 'AVE': pooling_type = P.Pooling.AVE elif pooling_type == 'STOCHASTIC': pooling_type = P.Pooling.STOCHASTIC else: raise ValueError if IR_node.layer.attr['global_pooling'].b: self.used_layers.add('GlobalPooling') self.add_body(1, "n.{:<15} = L.Pooling(n.{}, pool={}, stride={}, global_pooling=true, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), pooling_type, IR_node.get_attr('strides')[1])) else: self.add_body(1, "n.{:<15} = L.Pooling(n.{}, pool={}, kernel_size={}, stride={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), pooling_type, IR_node.get_attr('kernel_shape')[1], IR_node.get_attr('strides')[1])) def emit_UNKNOWN(self, IR_node): print(IR_node.IR_layer.name) def emit_DataInput(self, IR_node): shape = self._shapeToStr(IR_node.get_attr('shape')) shape = [shape[0], shape[-1]] + shape[1:-1] self.add_body(1, "n.{:<15} = L.Input(shape=[dict(dim={})], ntop=1)".format( IR_node.variable_name, shape)) def emit_Dropout(self, IR_node): self.used_layers.add(IR_node.type) in_place = True self.add_body(1, "n.{:<15} = L.Dropout(n.{}, dropout_ratio={} , in_place={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), 1 - IR_node.get_attr('keep_prob'), in_place)) def emit_FullyConnected(self, IR_node): self.used_layers.add(IR_node.type) self.add_body(1, "n.{:<15} = L.InnerProduct(n.{}, num_output={}, bias_term={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr["units"].i, IR_node.get_attr('use_bias', False))) if self.weight_loaded: self.check_if_need_transpose(IR_node) self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], (1, 0)) def emit_Flatten(self, IR_node): IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name # def emit_Tanh(self, IR_node): # self._emit_activation(IR_node, 'ops.tanh') def emit_Relu(self, IR_node): self.used_layers.add(IR_node.type) in_place = True self.add_body(1, "n.{:<15} = L.ReLU(n.{}, in_place={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), in_place)) def emit_Softmax(self, IR_node): self.used_layers.add(IR_node.type) self.add_body(1, "n.{:<15} = L.Softmax(n.{}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node)))
class CntkEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16: "np.float16", graph_pb2.DT_FLOAT32: "np.float32", graph_pb2.DT_FLOAT64: "np.float64", graph_pb2.DT_INT16: "np.int16", graph_pb2.DT_INT32: "np.int32", graph_pb2.DT_INT64: "np.int64", graph_pb2.DT_UINT8: "np.uint8", graph_pb2.DT_UINT16: "np.uint16" } def __init__(self, model): from six import string_types as _string_types super(CntkEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) self.IR_graph = IRGraph(network_path) super(CntkEmitter, self)._build() @property def header_code(self): return """import numpy as np import cntk from cntk import ops, layers from cntk.contrib.crosstalkcaffe.unimodel.cntkinstance import BlockApiSetup __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): global __weights_dict __weights_dict = load_weights(weight_file) """ def gen_code(self, phase='test'): self.phase = phase self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("CntkEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body( 1, "return {}".format(','.join([ self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers ]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.body_codes @staticmethod def _shapeToStr(shapes): new_shape = filter(lambda x: x > -1, [dim.size for dim in shapes.dim]) return ', '.join('%s' % i for i in new_shape) def emit_Convolution(self, IR_node): if self.weight_loaded: self.used_layers.add(IR_node.type) dim = len(IR_node.layer.attr['strides'].list.i) - 2 padding = [False ] + [IR_node.layer.attr['padding'].s == b'SAME'] * dim self.add_body( 1, "{:<15} = convolution({}, strides = ({},), auto_padding = [{}], name = '{}')" .format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join( '%s' % i for i in IR_node.layer.attr['strides'].list.i[1:-1]), ', '.join('%s' % i for i in padding), IR_node.name)) else: self.add_body( 1, "{:<15} = Convolution(name = '{}', num_filters = {}, filter_shape = ({}), strides = ({},), pad = {}, bias = {})({})\n" .format( IR_node.variable_name, IR_node.name, IR_node.layer.attr["filter"].list.i[-1], ', '.join( '%s' % i for i in IR_node.layer.attr["kernel_size"].list.i[-2]), ', '.join( '%s' % i for i in IR_node.layer.attr['strides'].list.i[1:-1]), IR_node.layer.attr['padding'].s == b'SAME', IR_node.layer.attr['use_bias'].b, self.parent_variable_name(IR_node))) def emit_Pool(self, IR_node): input_node = self.IR_graph.get_node( IR_node.in_edges[0]).real_variable_name if IR_node.layer.attr['global_pooling'].b: self.used_layers.add('GlobalPooling') self.add_body( 1, "{:<15} = global_pooling({}, '{}', name = '{}')".format( IR_node.variable_name, input_node, IR_node.layer.attr['pooling_type'].s.decode('utf-8'), IR_node.name)) else: for e in IR_node.IR_layer.attr["dilation_rate"].list.i: assert e == 1 pool_size = ', '.join( '%s' % id for id in IR_node.layer.attr['window_shape'].list.i[1:-1]) strides = ', '.join( '%s' % id for id in IR_node.layer.attr['strides'].list.i[1:-1]) padding = IR_node.layer.attr['padding'].s == b'SAME' if self.weight_loaded: self.used_layers.add(IR_node.type) self.add_body( 1, "{:<15} = pooling({}, '{}', filter_shape = ({}), strides = ({}), pad = {}, name = '{}')" .format( IR_node.variable_name, input_node, IR_node.layer.attr['pooling_type'].s.decode('utf-8'), pool_size, strides, padding, IR_node.name)) else: raise NotImplementedError("") def emit_UNKNOWN(self, IR_node): print(IR_node.IR_layer.name) def emit_DataInput(self, IR_node): shape_str = self._shapeToStr(IR_node.IR_layer.attr["shape"].shape) dtype_str = ", dtype = {}".format( self.dtype_map[IR_node.layer.attr['dtype']. type]) if 'dtype' in IR_node.layer.attr else "" self.add_body( 1, "{:<15} = cntk.input_variable(({},) {}, name = '{}')\n".format( IR_node.variable_name, shape_str, dtype_str, IR_node.name)) def emit_Dropout(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) if self.phase == 'train': self.add_body( 1, "{:<15} = Dropout({}, name = '{}')({})".format( IR_node.variable_name, 1 - IR_node.IR_layer.attr["keep_prob"].f, IR_node.name, parent.real_variable_name)) else: IR_node.real_name = parent.real_name def emit_FullyConnected(self, IR_node): input_node = self.parent_variable_name(IR_node) if self.weight_loaded: self.used_layers.add(IR_node.type) self.add_body( 1, "{:<15} = dense({}, name = '{}')".format( IR_node.variable_name, input_node, IR_node.name)) else: self.add_body( 1, "{:<15} = Dense({}, bias = {}, name = '{}')({})".format( IR_node.variable_name, IR_node.layer.attr["units"].i, IR_node.layer.attr['use_bias'].b, IR_node.name, input_node)) def emit_Flatten(self, IR_node): self.add_body( 1, "{:<15} = ops.reshape({}, (-1,), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_Reshape(self, IR_node): self.add_body( 1, "{:<15} = cntk.reshape({}, shape = ({},) name = '{}')".format( IR_node.variable_name, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name, ', '.join('%s' % i for i in IR_node.layer.attr["shape"].list.i), IR_node.name)) def _emit_activation(self, IR_node, op_name): self.add_body( 1, "{:<15} = layers.Activation(activation = {}, name = '{}')({})". format(IR_node.variable_name, op_name, IR_node.name, self.parent_variable_name(IR_node))) def emit_Tanh(self, IR_node): self._emit_activation(IR_node, 'ops.tanh') def emit_Relu(self, IR_node): self._emit_activation(IR_node, 'ops.relu') def emit_Softmax(self, IR_node): self._emit_activation(IR_node, 'ops.softmax') def emit_Sigmoid(self, IR_node): self._emit_activation(IR_node, 'ops.sigmoid') def emit_RNNs(self, IR_node, func): assert False def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): if len(IR_node.in_edges) > 1: inputs = '+ '.join( self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) self.add_body(1, "{:<15} = {}".format(IR_node.variable_name, inputs)) def emit_Concat(self, IR_node): inputs = ', '.join( self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) self.add_body( 1, "{:<15} = cntk.splice({}, axis = {}, name = '{}')".format( IR_node.variable_name, inputs, IR_node.layer.attr['axis'].i - 1, IR_node.name)) def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) self.add_body( 1, "{:<15} = batch_normalization({}, epsilon = {}, name = '{}')". format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['epsilon'].f, IR_node.name)) def emit_Pad(self, IR_node): if IR_node.layer.attr['mode'].s == b'CONSTANT': mode = 'mode = ops.CONSTANT_PAD, constant_value = {}'.format( IR_node.layer.attr['constant_values'].f) elif IR_node.layer.attr['mode'].s == b'REFLECT': mode = 'mode = ops.REFLECT_PAD' elif IR_node.layer.attr['mode'].s == b'SYMMETRIC': mode = 'mode = ops.SYMMETRIC_PAD' else: assert False padding_str = ', '.join( '(%s, %s)' % (IR_node.layer.attr['paddings'].list.i[idx], IR_node.layer.attr['paddings'].list.i[idx + 1]) for idx in range(2, len(IR_node.layer.attr['paddings'].list.i), 2)) self.add_body( 1, "{:<15} = ops.pad({}, pattern = [{}], {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), padding_str, mode)) def emit_Squeeze(self, IR_node): IR_node.real_name = self.IR_graph.get_node( IR_node.in_edges[0]).real_name def emit_ReduceMean(self, IR_node): self.add_body( 1, "{:<15} = ops.reduce_mean({}, axis = ({}), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join('%s' % (i - 1) for i in IR_node.layer.attr['axes'].list.i), IR_node.name)) def emit_LRN(self, IR_node): self.used_layers.add(IR_node.type) self.add_body( 1, "{:<15} = lrn({}, k = 1, n = {}, alpha = {}, beta = {}, name = '{}')" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['size'].i, IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f, IR_node.name)) def _layer_LRN(self): self.add_body( 0, """ def lrn(input, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = BlockApiSetup.lrn(**kwargs)(input) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer """) def _layer_FullyConnected(self): self.add_body( 0, """ def dense(input, name, **kwargs): w = __weights_dict[name]['weights'] b = __weights_dict[name]['bias'] if 'bias' in __weights_dict[name] else None return BlockApiSetup.linear(output_shape = w.shape[1], input_shape = w.shape[0], scale_init = w, bias_init = b, name = name, **kwargs)(input) """) def _layer_Convolution(self): self.add_body( 0, """ def convolution(input, name, **kwargs): dim = __weights_dict[name]['weights'].ndim weight = np.transpose(__weights_dict[name]['weights'], [dim - 1, dim - 2] + list(range(0, dim - 2))) w = cntk.Parameter(init = weight, name = name + '_weight') input = cntk.transpose(input, [dim - 2] + list(range(0, dim - 2))) layer = ops.convolution(w, input, **kwargs) if 'bias' in __weights_dict[name]: bias = np.reshape(__weights_dict[name]['bias'], [-1] + [1] * (dim - 2)) b = cntk.Parameter(init = bias, name = name + '_bias') layer = layer + b layer = cntk.transpose(layer, list(range(1, dim - 1)) + [0]) return layer """) def _layer_Pool(self): self.add_body( 0, """ def pooling(input, type, name, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = layers.MaxPooling(**kwargs)(input) if type == 'MAX' else layers.AveragePooling(**kwargs)(input) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer """) def _layer_GlobalPooling(self): self.add_body( 0, """ def global_pooling(input, type, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = layers.GlobalMaxPooling(**kwargs)(input) if type == 'MAX' else layers.GlobalAveragePooling(**kwargs)(input) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer """) def _layer_BatchNorm(self): self.add_body( 0, """ def batch_normalization(input, name, epsilon, **kwargs): mean = cntk.Parameter(init = __weights_dict[name]['mean'], name = name + "_mean") var = cntk.Parameter(init = __weights_dict[name]['var'], name = name + "_var") layer = (input - mean) / cntk.sqrt(var + epsilon) if 'scale' in __weights_dict[name]: scale = cntk.Parameter(init = __weights_dict[name]['scale'], name = name + "_scale") layer = scale * layer if 'bias' in __weights_dict[name]: bias = cntk.Parameter(init = __weights_dict[name]['bias'], name = name + "_bias") layer = layer + bias return layer """)
class TensorflowEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16 : "tf.float16", graph_pb2.DT_FLOAT32 : "tf.float32", graph_pb2.DT_FLOAT64 : "tf.float64", graph_pb2.DT_INT16 : "tf.int16", graph_pb2.DT_INT32 : "tf.int32", graph_pb2.DT_INT64 : "tf.int64", graph_pb2.DT_UINT8 : "tf.uint8", graph_pb2.DT_UINT16 : "tf.uint16" } @property def header_code(self): return """import tensorflow as tf __weights_dict = dict() is_train = {} def load_weights(weight_file): import numpy as np if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): global __weights_dict __weights_dict = load_weights(weight_file) """.format(self.trainable) def __init__(self, model): super(TensorflowEmitter, self).__init__() from six import string_types as _string_types if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) self.IR_graph = IRGraph(network_path) super(TensorflowEmitter, self)._build() def gen_code(self, phase): self.trainable = (phase == 'train') self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("TensorflowEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body(1, "return {}, {}".format( ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.input_layers]), ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.body_code @staticmethod def _shapeToStr(shapes): ret = [dim.size if dim.size != -1 else 'None' for dim in shapes.dim] return ', '.join('%s' % i for i in ret) def emit_Conv(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')[1:-1]) input_node, padding = self._defuse_padding(IR_node) self.add_body(1, "{:<15} = convolution({}, group={}, strides=[{}], padding='{}', name='{}')".format( IR_node.variable_name, input_node, IR_node.get_attr('group', 1), strides_str, padding, IR_node.name)) def _defuse_padding(self, IR_node, extra_str=""): auto_pad = IR_node.get_attr('auto_pad') if auto_pad: input_node = self.parent_variable_name(IR_node) if auto_pad == 'VALID': padding = 'VALID' elif auto_pad.startswith("SAME"): padding = 'SAME' else: raise ValueError("Unknown padding type [{}].".format(auto_pad)) return input_node, padding else: padding = IR_node.get_attr("pads") padding = convert_onnx_pad_to_tf(padding) if is_valid_padding(padding) == False: input_node = IR_node.variable_name + '_pad' self.add_body(1, "{:<15} = tf.pad({}, paddings = {}{})".format( input_node, self.parent_variable_name(IR_node), padding, extra_str )) else: input_node = self.parent_variable_name(IR_node) return input_node, 'VALID' def emit_Pool(self, IR_node): pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': op = 'max_pool' padding_const = ", constant_values=float('-Inf')" elif pooling_type == 'AVG': op = 'avg_pool' padding_const = "" else: raise ValueError("unknown pooling type [{}].".format(pooling_type)) arrlen = len(IR_node.get_attr('strides')) dim_str = '3d' if arrlen == 5 else "" if IR_node.layer.attr['global_pooling'].b: self.add_body(1, "{:<15} = tf.nn.{}{}({}, [1] + {}.get_shape().as_list()[1:-1] + [1], strides = [1] * {}, padding = 'VALID', name = '{}')".format( IR_node.variable_name, op, dim_str, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node), arrlen, IR_node.name)) else: kernel_shape_str = ', '.join('%s' % i for i in IR_node.get_attr('kernel_shape')) strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')) input_node, padding = self._defuse_padding(IR_node, padding_const) self.add_body(1, "{:<15} = tf.nn.{}{}({}, [{}], [{}], padding='{}', name='{}')".format( IR_node.variable_name, op, dim_str, input_node, kernel_shape_str, strides_str, padding, IR_node.name)) def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_DataInput(self, IR_node): assert not IR_node.in_edges shape_str = self._shapeToStr(IR_node.layer.attr["shape"].shape) if 'dtype' in IR_node.layer.attr: dtype_str = "{}, ".format(self.dtype_map[IR_node.layer.attr['dtype'].type]) else: dtype_str = "tf.float32," code = "{:<15} = tf.placeholder({} shape = ({}), name = '{}')".format( IR_node.variable_name, dtype_str, shape_str, IR_node.name ) self.add_body(1, code) def emit_Dropout(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) if self.trainable: self.add_body(1, "{:<15} = Dropout(name = '{}', dropout_rate = {})({})".format( IR_node.variable_name, IR_node.name, 1 - IR_node.IR_layer.attr["keep_prob"].f, parent.real_variable_name)) else: IR_node.real_name = parent.real_name def emit_FullyConnected(self, IR_node): if IR_node.name in self.weights_dict and 'weights' in self.weights_dict[IR_node.name]: kernel_str = "kernel_initializer = tf.constant_initializer(__weights_dict['{}']['weights']), ".format(IR_node.name) else: kernel_str = "" if IR_node.name in self.weights_dict and 'bias' in self.weights_dict[IR_node.name]: bias_str = "bias_initializer = tf.constant_initializer(__weights_dict['{}']['bias']), ".format(IR_node.name) else: bias_str = "" code = "{:<15} = tf.layers.dense({}, {}, {}{}use_bias = {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['units'].i, kernel_str, bias_str, IR_node.layer.attr['use_bias'].b) self.add_body(1, code) def emit_Flatten(self, IR_node): #self._emit_unary_operation(IR_node, "contrib.layers.flatten") self.add_body(1, "{:<15} = tf.contrib.layers.flatten({})".format( IR_node.variable_name, self.parent_variable_name(IR_node))) def emit_Reshape(self, IR_node): self.add_body(1, "{:<15} = tf.reshape({}, [{}], '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join('%s' % i for i in IR_node.get_attr('shape')), IR_node.name)) def _emit_unary_operation(self, IR_node, op_name): self.add_body(1, "{:<15} = tf.{}({}, name = '{}')".format( IR_node.variable_name, op_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_Tanh(self, IR_node): self._emit_unary_operation(IR_node, 'tanh') def emit_Elu(self, IR_node): self._emit_unary_operation(IR_node, 'nn.elu') def emit_Relu(self, IR_node): self._emit_unary_operation(IR_node, 'nn.relu') def emit_Relu6(self, IR_node): self._emit_unary_operation(IR_node, 'nn.relu6') def emit_CRelu(self, IR_node): self._emit_unary_operation(IR_node, 'nn.crelu') def emit_Softmax(self, IR_node): self._emit_unary_operation(IR_node, 'nn.softmax') def emit_Sigmoid(self, IR_node): self._emit_unary_operation(IR_node, 'sigmoid') def emit_Embedding(self, IR_node): raise NotImplementedError() ret = "{:<15} = Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format( IR_node.name, IR_node.IR_layer.attr['input_dim'].i, IR_node.IR_layer.attr['output_dim'].i, IR_node.IR_layer.attr['mask_zero'].b, IR_node.in_edges[0]) return ret def emit_RNNs(self, IR_node, func): assert False def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): self.add_body(1, "{:<15} = {}".format( IR_node.variable_name, ' +'.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) def emit_Concat(self, IR_node): self.add_body(1, "{:<15} = tf.concat([{}], {}, name = '{}')".format( IR_node.variable_name, ', '.join(self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges), IR_node.layer.attr['axis'].i, IR_node.name)) def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) self.add_body(1, "{:<15} = batch_normalization({}, variance_epsilon={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('epsilon'), IR_node.name)) def emit_Pad(self, IR_node): padding = IR_node.get_attr('pads') padding = convert_onnx_pad_to_tf(padding) mode = IR_node.get_attr('mode', 'constant') if mode == 'constant' or mode == 'reflect': mode = mode.upper() elif mode == 'edge': mode = 'SYMMETRIC' else: raise NotImplementedError("Not support padding mode {}.".format(mode)) self.add_body(1, "{:<15} = tf.pad({}, {}, '{}', name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), padding, mode, IR_node.variable_name)) def emit_Squeeze(self, IR_node): self.add_body(1, "{:<15} = tf.squeeze({}, [{}], name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join('%s' % axis for axis in IR_node.layer.attr['axes'].list.i), IR_node.name)) def emit_ReduceMean(self, IR_node): self.add_body(1, "{:<15} = tf.reduce_mean({}, [{}], {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ','.join('%s' % i for i in IR_node.get_attr('axes')), IR_node.get_attr('keepdims'), IR_node.name)) def emit_LRN(self, IR_node): self.add_body(1, "{:<15} = tf.nn.lrn({}, {}, alpha = {}, beta = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('size') - 1, IR_node.layer.attr['alpha'].f / (IR_node.layer.attr['size'].i * 2 - 1), IR_node.get_attr('beta'), IR_node.name)) def emit_SeparableConv(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')) input_node, padding = self._defuse_padding(IR_node) self.add_body(1, "{:<15} = separable_convolution({}, strides = [{}], padding = '{}', name = '{}')".format( IR_node.variable_name, input_node, strides_str, padding, IR_node.name)) def emit_DepthwiseConv(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i) input_node, padding = self._defuse_padding(IR_node) self.add_body(1, "{:<15} = depthwise_convolution({}, strides = [{}], padding = '{}', name = '{}')".format( IR_node.variable_name, input_node, strides_str, padding, IR_node.name)) def _layer_Conv(self): self.add_body(0, """ def convolution(input, name, group, **kwargs): w = tf.Variable(__weights_dict[name]['weights'], trainable=is_train, name=name + "_weight") if group == 1: layer = tf.nn.convolution(input, w, **kwargs) else: weight_groups = tf.split(w, num_or_size_splits=group, axis=-1) xs = tf.split(input, num_or_size_splits=group, axis=-1) convolved = [tf.nn.convolution(x, weight, **kwargs) for (x, weight) in zip(xs, weight_groups)] layer = tf.concat(convolved, axis=-1) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable=is_train, name=name + "_bias") layer = layer + b return layer""") def _layer_BatchNorm(self): self.add_body(0, """ def batch_normalization(input, name, **kwargs): mean = tf.Variable(__weights_dict[name]['mean'], name = name + "_mean", trainable = is_train) variance = tf.Variable(__weights_dict[name]['var'], name = name + "_var", trainable = is_train) offset = tf.Variable(__weights_dict[name]['bias'], name = name + "_bias", trainable = is_train) if 'bias' in __weights_dict[name] else None scale = tf.Variable(__weights_dict[name]['scale'], name = name + "_scale", trainable = is_train) if 'scale' in __weights_dict[name] else None return tf.nn.batch_normalization(input, mean, variance, offset, scale, name = name, **kwargs) """) def _layer_SeparableConv(self): self.add_body(0, """ def separable_convolution(input, name, **kwargs): depthwise = tf.Variable(__weights_dict[name]['depthwise_filter'], trainable = is_train, name = name + "_df") pointwise = tf.Variable(__weights_dict[name]['pointwise_filter'], trainable = is_train, name = name + "_pf") layer = tf.nn.separable_conv2d(input, depthwise, pointwise, **kwargs) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias") layer = layer + b return layer""") def _layer_DepthwiseConv(self): self.add_body(0, """ def depthwise_convolution(input, name, **kwargs): depthwise = tf.Variable(__weights_dict[name]['weights'], trainable = is_train, name = name + "_df") layer = tf.nn.depthwise_conv2d(input, depthwise, **kwargs) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias") layer = layer + b return layer""")
class CaffeEmitter(Emitter): def __init__(self, model): from six import string_types as _string_types super(CaffeEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) self.IR_graph = IRGraph(network_path) super(CaffeEmitter, self)._build() @property def header_code(self): return """from __future__ import print_function import numpy as np import sys, argparse import caffe from caffe import layers as L from caffe import params as P from caffe import to_proto from six import text_type as _text_type __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): n = caffe.NetSpec() """ @property def end_code(self): return """ return n def make_net(prototxt): n = KitModel() with open(prototxt, 'w') as fpb: print(n.to_proto(), file=fpb) def gen_weight(weight_file, model, prototxt): global __weights_dict __weights_dict = load_weights(weight_file) net = caffe.Net(prototxt, caffe.TRAIN) for key in __weights_dict: if 'weights' in __weights_dict[key]: net.params[key][0].data.flat = __weights_dict[key]['weights'] elif 'mean' in __weights_dict[key]: net.params[key][0].data.flat = __weights_dict[key]['mean'] net.params[key][1].data.flat = __weights_dict[key]['var'] if 'scale' in __weights_dict[key]: net.params[key][2].data.flat = __weights_dict[key]['scale'] elif 'scale' in __weights_dict[key]: net.params[key][0].data.flat = __weights_dict[key]['scale'] if 'bias' in __weights_dict[key]: net.params[key][1].data.flat = __weights_dict[key]['bias'] if 'gamma' in __weights_dict[key]: # used for prelu, not sure if other layers use this too net.params[key][0].data.flat = __weights_dict[key]['gamma'] net.save(model) return net if __name__=='__main__': parser = argparse.ArgumentParser(description='Generate caffe model and prototxt') parser.add_argument('--weight_file', '-w', type=_text_type, default='IR weight file') parser.add_argument('--prototxt', '-p', type=_text_type, default='caffe_converted.prototxt') parser.add_argument('--model', '-m', type=_text_type, default='caffe_converted.caffemodel') args = parser.parse_args() # For some reason argparser gives us unicode, so we need to conver to str first make_net(str(args.prototxt)) gen_weight(str(args.weight_file), str(args.model), str(args.prototxt)) """ def gen_code(self, phase='test'): self.phase = phase self.add_body(0, self.header_code) #for test # with open("graph.txt", 'w') as f: # for layer in self.IR_graph.topological_sort: # current_node = self.IR_graph.get_node(layer) # print("========current_node=========\n{}".format(current_node.layer), file=f) #test end for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type #print("========current_node={}".format(current_node.layer)) if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("CaffeEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body(0, "") self.add_body(0, self.end_code) return self.body_code def run(self, dstNetworkPath, dstWeightPath=None, phase='test'): super(CaffeEmitter, self).run(dstNetworkPath, dstWeightPath, phase) if self.weight_loaded: self.save_weights(self.weights_dict, dstWeightPath) @staticmethod def _shapeToStr(shapes): return [dim.size if dim.size > 0 else 1 for dim in shapes.dim] def _get_symmetric_padding(self, IR_node): stride_h = IR_node.get_attr('strides')[1] stride_w = IR_node.get_attr('strides')[2] # check if have pad layer IR_parent_node = self.IR_graph.get_parent(IR_node.name, [0]) if IR_parent_node.type == 'Pad': pads = IR_parent_node.get_attr('pads') else: pads = IR_node.get_attr('pads') pad_h = pads[1] + (0 if pads[1] == pads[5] else stride_h) pad_w = pads[2] + (0 if pads[2] == pads[6] else stride_w) return pad_h, pad_w def check_if_need_transpose(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) while parent.type == 'Flatten' or parent.type == 'Dropout' or parent.type == 'Reshape': parent = self.IR_graph.get_parent(parent.name, [0]) dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) if dim > 2: original_dims = self.weights_dict[IR_node.name]['weights'].shape dims = [ i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:] ] + [-1] self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], dims) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], original_dims) def emit_Conv(self, IR_node): # implement asymmetric paddings by applying symmetric padding then cropping pad_h, pad_w = self._get_symmetric_padding(IR_node) num_output = IR_node.get_attr('kernel_shape')[-1] if IR_node.type == "DepthwiseConv": num_group = IR_node.get_attr("kernel_shape")[-2] num_output = num_group * num_output else: num_group = IR_node.get_attr("group", 1) self.add_body( 1, "n.{:<15} = L.Convolution(n.{}, kernel_h={}, kernel_w={}, stride={}, num_output={}, pad_h={}, pad_w={}, group={}, \ bias_term={}, ntop=1)".format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('kernel_shape')[0], IR_node.get_attr('kernel_shape')[1], IR_node.get_attr('strides')[1], num_output, pad_h, pad_w, num_group, IR_node.get_attr('use_bias', False))) dim = len(IR_node.get_attr('strides')) - 2 if self.weight_loaded: if IR_node.type == "DepthwiseConv": self.weights_dict[IR_node.name]['weights'] = np.swapaxes( self.weights_dict[IR_node.name]['weights'], -1, -2) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim))) self.weights_dict[IR_node.variable_name] = self.weights_dict.pop( IR_node.name) self.check_if_need_crop(IR_node) # keys = [] # for key in self.weights_dict[IR_node.name].keys(): # keys.append(key) # print("=======Layer: {}, keys: {}".format(IR_node.name, keys)) def compute_output_shape(self, IR_node, kernel_h, kernel_w): parent_node = self.IR_graph.get_parent(IR_node.name, [0]) if parent_node.get_attr('_output_shapes'): shape = parent_node.get_attr('_output_shapes')[0] shape = shape_to_list(shape) h_i = shape[1] w_i = shape[2] pad_h, pad_w = self._get_symmetric_padding(IR_node) stride_h = IR_node.get_attr('strides')[1] stride_w = IR_node.get_attr('strides')[2] if IR_node.type == 'Pool': h_o = (h_i + 2 * pad_h - kernel_h + stride_h - 1) // stride_h + 1 w_o = (w_i + 2 * pad_w - kernel_w + stride_w - 1) // stride_w + 1 else: h_o = (h_i + 2 * pad_h - kernel_h) // stride_h + 1 w_o = (w_i + 2 * pad_w - kernel_w) // stride_w + 1 return h_o, w_o else: assert False def check_if_need_crop(self, IR_node): shape = IR_node.get_attr('_output_shapes')[0] shape = shape_to_list(shape) ir_ho = shape[1] ir_wo = shape[2] if ir_ho < 0 or ir_wo < 0: return if IR_node.type == 'Pool': k_h = IR_node.get_attr('kernel_shape')[1] k_w = IR_node.get_attr('kernel_shape')[2] else: k_h = IR_node.get_attr('kernel_shape')[0] k_w = IR_node.get_attr('kernel_shape')[1] caffe_ho, caffe_wo = self.compute_output_shape(IR_node, k_h, k_w) # if asymmetric padding, set offset to 1 pads = IR_node.get_attr('pads') offset = [ 0 if pads[1] == pads[5] else 1, 0 if pads[2] == pads[6] else 1 ] if caffe_ho > ir_ho or caffe_wo > ir_wo: crop_layer_variable_name = IR_node.variable_name + "_crop" self.add_body( 1, "n.{:<15} = L.Crop(n.{}, L.DummyData(shape=[dict(dim=[1, {}, {}, {}])], \ ntop=1), ntop=1, offset={})".format(crop_layer_variable_name, IR_node.variable_name, shape[3], ir_ho, ir_wo, offset)) # Change the layer name IR_node.real_name = IR_node.real_name + "_crop" def emit_Pool(self, IR_node): pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': pooling_type = P.Pooling.MAX elif pooling_type == 'AVG': pooling_type = P.Pooling.AVE elif pooling_type == 'STOCHASTIC': pooling_type = P.Pooling.STOCHASTIC else: raise ValueError() if IR_node.layer.attr['global_pooling'].b: self.add_body( 1, "n.{:<15} = L.Pooling(n.{}, pool={}, stride={}, global_pooling=True, ntop=1)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), pooling_type, IR_node.get_attr('strides')[1])) else: pad_h, pad_w = self._get_symmetric_padding(IR_node) self.add_body( 1, "n.{:<15} = L.Pooling(n.{}, pool={}, kernel_size={}, pad_h={}, pad_w={}, stride={}, ntop=1)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), pooling_type, IR_node.get_attr('kernel_shape')[1], pad_h, pad_w, IR_node.get_attr('strides')[1])) # check if need crop output shape self.check_if_need_crop(IR_node) def emit_ResizeBilinear(self, IR_node): shape = IR_node.get_attr("_output_shapes")[0] shape = shape_to_list(shape) self.add_body( 1, "n.{:<15} = L.ResizeBilinear(n.{}, height={}, width={}, ntop=1)". format(IR_node.variable_name, self.parent_variable_name(IR_node), shape[1], shape[2])) def emit_UNKNOWN(self, IR_node): print(IR_node.IR_layer.name) def emit_DataInput(self, IR_node): shape = self._shapeToStr(IR_node.get_attr('shape')) shape = [shape[0], shape[-1]] + shape[1:-1] self.add_body( 1, "n.{:<15} = L.Input(shape=[dict(dim={})], ntop=1)".format( IR_node.variable_name, shape)) def emit_Dropout(self, IR_node): in_place = True self.add_body( 1, "n.{:<15} = L.Dropout(n.{}, dropout_ratio={} , in_place={}, ntop=1)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), 1 - IR_node.get_attr('keep_prob'), in_place)) def emit_FullyConnected(self, IR_node): self.add_body( 1, "n.{:<15} = L.InnerProduct(n.{}, num_output={}, bias_term={}, ntop=1)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr["units"].i, IR_node.get_attr('use_bias', False))) if self.weight_loaded: self.check_if_need_transpose(IR_node) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], (1, 0)) self.weights_dict[IR_node.variable_name] = self.weights_dict.pop( IR_node.name) def emit_BatchNorm(self, IR_node): self.add_body( 1, "n.{:<15} = L.BatchNorm(n.{}, eps={}, use_global_stats={}, ntop=1)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('epsilon'), self.phase == 'test')) scale_layer_var_name = IR_node.variable_name + "_scale" self.add_body( 1, "n.{:<15} = L.Scale(n.{}, bias_term={}, in_place=True, ntop=1)". format(scale_layer_var_name, IR_node.variable_name, IR_node.get_attr('bias', False))) if self.weight_loaded: self.weights_dict[scale_layer_var_name] = dict() if 'scale' in self.weights_dict[IR_node.name]: self.weights_dict[scale_layer_var_name][ 'scale'] = self.weights_dict[IR_node.name]['scale'] else: self.weights_dict[scale_layer_var_name]['scale'] = 1 self.weights_dict[IR_node.name]['scale'] = 1 if 'bias' in self.weights_dict[IR_node.name]: self.weights_dict[scale_layer_var_name][ 'bias'] = self.weights_dict[IR_node.name]['bias'] self.weights_dict[IR_node.name].pop('bias', None) # change the key "name" to "variable_name", in case of the layer name has invalid characters self.weights_dict[IR_node.variable_name] = self.weights_dict.pop( IR_node.name) IR_node.real_name = IR_node.name + "_scale" def emit_Scale(self, IR_node): self.add_body( 1, "n.{:<15} = L.Scale(n.{}, bias_term={}, in_place=True, ntop=1)". format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('use_bias', False))) if self.weight_loaded: self.weights_dict[IR_node.variable_name] = self.weights_dict.pop( IR_node.name) def emit_Constant(self, IR_node): IR_node_after = self.IR_graph.get_son(IR_node.name, [0]) shape = IR_node_after.get_attr("_output_shapes")[0] shape = shape_to_list(shape) self.add_body( 1, "n.{:<15} = L.DummyData(shape=[dict(dim=[1,{},{},{}])], data_filler=dict(type='constant', value={}), ntop=1)" .format(IR_node.variable_name, shape[-1], shape[1], shape[2], self.weights_dict[IR_node.name]['value'][0])) def emit_LRN(self, IR_node): self.add_body( 1, "n.{:<15} = L.LRN(n.{}, local_size={}, alpha={}, beta={}, k={})". format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('size') * 2 - 1, IR_node.get_attr('alpha'), IR_node.get_attr('beta'), IR_node.get_attr('k'))) def emit_Add(self, IR_node): input_layers = ', '.join( ('n.' + self.IR_graph.get_parent(IR_node.name, [num]).real_variable_name) for num in range(0, len(IR_node.in_edges))) self.add_body( 1, "n.{:<15} = L.Eltwise({}, operation=1, ntop=1)".format( IR_node.variable_name, input_layers, )) def emit_Flatten(self, IR_node): IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name def emit_Squeeze(self, IR_node): shape = IR_node.get_attr("_output_shapes")[0] shape = shape_to_list(shape) if shape: dim_str = "'dim': {}".format(shape) dim_str = " reshape_param={'shape': { " + dim_str + '} }' self.add_body( 1, "n.{:<15} = L.Reshape(n.{}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), dim_str)) else: IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name def emit_Concat(self, IR_node): axis_array = (2, 3, 1, 0) axis = axis_array.index(IR_node.get_attr('axis')) input_layers = ', '.join( ('n.' + self.IR_graph.get_node(edge).real_variable_name) for edge in IR_node.in_edges) self.add_body( 1, "n.{:<15} = L.Concat({}, axis={})".format(IR_node.variable_name, input_layers, axis)) def emit_Sigmoid(self, IR_node): self.add_body( 1, "n.{:<15} = L.Sigmoid(n.{}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node))) def emit_Relu(self, IR_node): in_place = True self.add_body( 1, "n.{:<15} = L.ReLU(n.{}, in_place={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), in_place)) def emit_LeakyRelu(self, IR_node): in_place = True self.add_body( 1, "n.{:<15} = L.ReLU(n.{}, in_place={}, negative_slope={}, ntop=1)". format(IR_node.variable_name, self.parent_variable_name(IR_node), in_place, IR_node.IR_layer.attr['alpha'].f)) def emit_PRelu(self, IR_node): in_place = True self.add_body( 1, "n.{:<15} = L.PReLU(n.{}, in_place={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), in_place)) def emit_Tanh(self, IR_node): self.add_body( 1, "n.{:<15} = L.TanH(n.{}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node))) def emit_Softmax(self, IR_node): self.add_body( 1, "n.{:<15} = L.Softmax(n.{}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node))) def emit_Pad(self, IR_node): IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name def reduction(self, IR_node, op, axes): # Convert NHWC (IR) to NCHW (Caffe): [0,1,2,3]->[0,3,1,2] if len(axes) == 1: assert (axes[0] == 2) elif len(axes) == 2: assert ((axes[0] == 1) and (axes[1] == 2)) self.add_body( 1, "n.{:<15} = L.Reduction(n.{}, operation={} , axis={} ,ntop=1)". format(IR_node.variable_name, self.parent_variable_name(IR_node), op, len(axes))) if IR_node.get_attr('keepdims') == True: shape = IR_node.get_attr("_output_shapes")[0] shape = shape_to_list(shape) shape = [1] + [shape[-1]] + shape[1:-1] dim_str = "'dim': {}".format(shape) dim_str = "{'shape': { " + dim_str + '} }' self.add_body( 1, "n.{:<15} = L.Reshape(n.{}, reshape_param={}) ".format( IR_node.variable_name + "_reshape", IR_node.real_variable_name, dim_str)) IR_node.real_name = IR_node.real_name + '_reshape' def emit_ReduceMean(self, IR_node): self.reduction(IR_node, 4, IR_node.get_attr('axes')) def emit_ReduceSum(self, IR_node): self.reduction(IR_node, 1, IR_node.get_attr('axes')) def emit_Relu6(self, IR_node): self.emit_Relu(IR_node) def emit_DepthwiseConv(self, IR_node): self.emit_Conv(IR_node) def emit_Const(self, IR_node): pass def emit_Shape(self, IR_node): pass def emit_Reshape(self, IR_node): # currently for the flatten layer self.add_body( 1, "n.{:<15} = L.Flatten(n.{})".format( IR_node.variable_name, self.parent_variable_name(IR_node), )) def emit_Slice(self, IR_node): pass def emit_Pack(self, IR_node): pass def emit_Abs(self, IR_node): self.add_body( 1, "n.{:<15} = L.AbsVal(n.{}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node))) def emit_Sub(self, IR_node): input_layers = ', '.join( ('n.' + self.IR_graph.get_node(edge).real_variable_name) for edge in IR_node.in_edges) self.add_body( 1, "n.{:<15} = L.Eltwise({}, coeff = [1, -1], ntop=1)".format( IR_node.variable_name, input_layers)) def emit_Mul(self, IR_node): if len(IR_node.in_edges) == 2: input_layers = ', '.join( ('n.' + self.IR_graph.get_node(edge).real_variable_name) for edge in IR_node.in_edges) self.add_body( 1, "n.{:<15} = L.Eltwise({}, operation=0, ntop=1)".format( IR_node.variable_name, input_layers)) elif len(IR_node.in_edges) == 1: self.emit_Scale(IR_node) else: assert False def emit_UpSampling2D(self, IR_node): scales = IR_node.get_attr('scales') scale = tuple(scales)[0] shape = IR_node.get_attr('_output_shapes')[0] shape = shape_to_list(shape) self.add_body( 1, "n.{:<15} = L.Deconvolution(n.{}, convolution_param=dict(kernel_size={}, stride={}, pad={}, num_output={}, group={}, bias_term={}), param=[dict(lr_mult=0)], ntop=1)" .format(IR_node.variable_name, IR_node.in_edges[0], 2 * scale - scale % 2, scale, int(math.ceil( (scale - 1) / 2)), shape[-1], shape[-1], False))
class CoreMLEmitter(Emitter): def __init__(self, architecture, weight): super(CoreMLEmitter, self).__init__() if os.path.exists(architecture) == False: raise ValueError( "IR architecture file [{}] is not found.".format(architecture)) else: self.IR_graph = IRGraph(architecture) self.IR_graph.build() if os.path.exists(weight) == False: raise ValueError( "IR weight file [{}] is not found.".format(weight)) else: self._load_weights(weight) def _get_inout(self): input_features = [] output_features = [] for input_node in self.IR_graph.input_layers: shape = shape_to_list( self.IR_graph.get_node(input_node).get_attr('shape')) shape = _infer_coreml_input_shape(shape) input_features.append((input_node.encode(), shape)) print("CoreML Model Input Layer: [{}] {}".format( input_node, shape)) for output_node in self.IR_graph.output_layers: node = self.IR_graph.get_node(output_node) node.out_edges.append(node.name) shape = node.get_attr('_output_shapes') if shape: shape = shape_to_list(shape[0]) else: shape = [1] shape = _infer_coreml_input_shape(shape) output_features.append((output_node.encode(), shape)) print("CoreML Model Output Layer: [{}] {}".format( output_node, shape)) return list(input_features), list(output_features) def _connect_coreml_layers(self): for layer in self.builder.nn_spec.layers: # for i, in_node in enumerate(layer.input): # layer.input[i] = self.IR_graph.get_node(in_node).real_name for i, out_node in enumerate(layer.output): layer.output[i] = self.IR_graph.get_node(out_node).real_name def gen_model(self, input_names=None, output_names=None, image_input_names=None, is_bgr=False, red_bias=0.0, green_bias=0.0, blue_bias=0.0, gray_bias=0.0, image_scale=1.0, class_labels=None, predicted_feature_name=None, predicted_probabilities_output=''): input_features, output_features = self._get_inout() is_classifier = class_labels is not None mode = 'classifier' if is_classifier else None self.builder = _NeuralNetworkBuilder(input_features, output_features, mode=mode) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) print("Converting layer {}({})".format(current_node.name, current_node.type)) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("CoreMLEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) # self._connect_coreml_layers() # Add classifier classes (if applicable) if is_classifier: classes_in = class_labels if isinstance(classes_in, _string_types): if not os.path.isfile(classes_in): raise ValueError( "Path to class labels [{}] does not exist.".format( classes_in)) with open(classes_in, 'r') as f: classes = f.read() classes = classes.splitlines() elif type(classes_in) is list: # list[int or str] classes = classes_in else: raise ValueError( 'Class labels must be a list of integers / strings, or a file path' ) if predicted_feature_name is not None: self.builder.set_class_labels( classes, predicted_feature_name=predicted_feature_name, prediction_blob=predicted_probabilities_output) else: self.builder.set_class_labels(classes) # Set pre-processing paramsters self.builder.set_pre_processing_parameters( image_input_names=[input_features[0][0]], #image_input_names, is_bgr=is_bgr, red_bias=red_bias, green_bias=green_bias, blue_bias=blue_bias, gray_bias=gray_bias, image_scale=image_scale) # Return the protobuf spec # model = _MLModel(self.builder.spec) print(self.builder.spec.description) return self.builder.spec @staticmethod def _get_padding(IR_node): auto_pads = IR_node.get_attr('auto_pads') if auto_pads is not None: if auto_pads == 'VALID': return auto_pads else: return 'SAME' pads = IR_node.get_attr('pads') if is_valid_padding(pads): return 'VALID' else: return 'SAME' def _emit_merge(self, IR_node, func): """ Convert concat layer to coreml. """ # Get input and output names input_names = [ self.IR_graph.get_node(inp).real_name for inp in IR_node.in_edges ] self.builder.add_elementwise(name=IR_node.name, input_names=input_names, output_name=IR_node.name, mode=func) def emit_Conv(self, IR_node): """ Convert convolution layer to coreml. """ has_bias = IR_node.get_attr('use_bias') is_deconv = False # TODO: Deconv # Get the weights. output_channels = IR_node.get_attr('kernel_shape')[-1] # Dimensions and weights if is_deconv: raise NotImplementedError() height, width, n_filters, channels = weightList[0].shape W = weightList[0].transpose([0, 1, 3, 2]) output_shape = output_blob_shape[:-1] else: W = self.weights_dict[IR_node.name]['weights'] height, width, channels, n_filters = W.shape output_shape = None b = self.weights_dict[IR_node.name]['bias'] if has_bias else None stride_height, stride_width = IR_node.get_attr( 'strides')[1], IR_node.get_attr('strides')[2] # Dilations dilations = IR_node.get_attr('dilations', [1, 1]) if is_deconv and not dilations == [1, 1]: raise ValueError( "Unsupported non-unity dilation for Deconvolution layer") groups = IR_node.get_attr('groups', 1) kernel_channels = channels padding = self._get_padding(IR_node).lower() self.builder.add_convolution( name=IR_node.real_name, kernel_channels=kernel_channels, output_channels=output_channels, height=height, width=width, stride_height=stride_height, stride_width=stride_width, border_mode=padding, groups=groups, W=W, b=b, has_bias=has_bias, is_deconv=is_deconv, output_shape=output_shape, input_name=self.parent_variable_name(IR_node), output_name=IR_node.real_name, dilation_factors=dilations) def emit_DepthwiseConv(self, IR_node): # depth-wise convolution kernel_channels = 1 is_deconv = False has_bias = IR_node.get_attr('use_bias') depth_multiplier = IR_node.get_attr('kernel_shape')[-1] W = self.weights_dict[IR_node.name]['weights'] height, width, channels, n_filters = W.shape output_shape = None W = np.reshape(W, (height, width, 1, channels * depth_multiplier)) b = self.weights_dict[IR_node.name]['bias'] if has_bias else None # Dilations dilations = IR_node.get_attr('dilations', [1, 1]) padding = self._get_padding(IR_node).lower() output_channels = W.shape[-1] groups = W.shape[-1] stride_height, stride_width = IR_node.get_attr( 'strides')[1], IR_node.get_attr('strides')[2] self.builder.add_convolution( name=IR_node.real_name, kernel_channels=kernel_channels, output_channels=output_channels, height=height, width=width, stride_height=stride_height, stride_width=stride_width, border_mode=padding, groups=groups, W=W, b=b, has_bias=has_bias, is_deconv=is_deconv, output_shape=output_shape, input_name=self.parent_variable_name(IR_node), output_name=IR_node.real_name, dilation_factors=dilations) def emit_Pool(self, IR_node): """ Convert pooling layer to coreml. """ # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name # Pooling layer type pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': layer_type_str = 'MAX' elif pooling_type == 'AVG': layer_type_str = 'AVERAGE' else: raise TypeError("Pooling type %s not supported" % pooling_type) # if it's global, set the global flag global_pooling = IR_node.get_attr('global_pooling', False) dim = len(IR_node.get_attr('strides')) - 2 if global_pooling: if dim == 2: height, width = (0, 0) stride_height = stride_width = 0 padding_type = 'VALID' elif dim == 1: raise NotImplementedError() global_pooling = False _, width, channels = keras_layer.input_shape height = 1 stride_height, stride_width = height, width padding_type = 'VALID' else: raise NotImplementedError() else: height, width = tuple(IR_node.get_attr('kernel_shape')[1:-1]) stride_height, stride_width = tuple( IR_node.get_attr('strides')[1:-1]) # Padding padding_type = self._get_padding(IR_node) self.builder.add_pooling(name=IR_node.name, height=height, width=width, stride_height=stride_height, stride_width=stride_width, layer_type=layer_type_str, padding_type=padding_type, input_name=input_name, output_name=IR_node.name, exclude_pad_area=True, is_global=global_pooling) def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_DataInput(self, IR_node): """ Layers that can be skipped. """ return def emit_Dropout(self, IR_node): """ Layers that can be skipped (because they are train time only. """ IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name def emit_FullyConnected(self, IR_node): """ Convert a dense layer to coreml. """ # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name output_name = IR_node.out_edges[0] has_bias = IR_node.get_attr('use_bias') # Get the weights from keras W = self.weights_dict[IR_node.name]['weights'].T Wb = self.weights_dict[IR_node.name]['bias'].T if has_bias else None output_channels, input_channels = W.shape self.builder.add_inner_product(name=IR_node.name, W=W, b=Wb, input_channels=input_channels, output_channels=output_channels, has_bias=has_bias, input_name=input_name, output_name=IR_node.name) def emit_Flatten(self, IR_node): """ Convert a flatten layer from keras to coreml. """ # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name output_name = IR_node.out_edges[0] """ # blob_order == 0 if the input blob needs not be rearranged # blob_order == 1 if the input blob needs to be rearranged blob_order = 0 # using keras_layer.input.shape have a "?" (Dimension[None] at the front), # making a 3D tensor with unknown batch size 4D if len(keras_layer.input.shape) == 4: blob_order = 1 """ self.builder.add_flatten(name=IR_node.name, mode=1, input_name=input_name, output_name=IR_node.name) def emit_Reshape(self, IR_node): def ShapetrToTuple(string, batch_none=False): if batch_none == True: ls = [int(item) for item in string.split(', ')] ls.insert(0, None) return tuple(ls) else: ls = [int(item) for item in string.split(', ')] return tuple(ls) last_node = self.IR_graph.get_node(IR_node.in_edges[0]).layer input_shape_dims = last_node.attr["_output_shapes"].list.shape target_shape_dims = IR_node.IR_layer.attr["_output_shapes"].list.shape input_shape = ShapetrToTuple(IRGraph.shapeToStr(input_shape_dims[0]), True) target_shape = ShapetrToTuple(IRGraph.shapeToStr(target_shape_dims[0])) # print("input_shape, target_shape",input_shape,target_shape) def get_coreml_target_shape(target_shape): if len(target_shape) == 1: #(D,) coreml_shape = (1, target_shape[0], 1, 1) elif len(target_shape) == 2: #(S,D) coreml_shape = target_shape + (1, 1) elif len(target_shape) == 3: #(H,W,C) coreml_shape = (1, target_shape[2], target_shape[0], target_shape[1]) else: coreml_shape = None return coreml_shape def get_mode(input_shape, target_shape): in_shape = input_shape[1:] if len(in_shape) == 3 or len(target_shape) == 3: return 1 else: return 0 new_shape = get_coreml_target_shape(target_shape) mode = get_mode(input_shape, target_shape) self.builder.add_reshape(name=IR_node.real_name, input_name=self.parent_variable_name(IR_node), output_name=IR_node.real_name, target_shape=new_shape, mode=mode) def emit_Tanh(self, IR_node): assert False code = "{:<15} = Activation(name = '{}', activation = tanh)({})".format( IR_node.replace_scope(IR_node.name), IR_node.name, IR_node.replace_scope(IR_node.in_edges[0])) return code def _emit_activation(self, IR_node, act, params=None): # Get input and output names input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name output_name = IR_node.out_edges[0] self.builder.add_activation( name=IR_node.real_name, non_linearity=act, input_name=self.parent_variable_name(IR_node), output_name=IR_node.real_name, params=params) def emit_Relu(self, IR_node): self._emit_activation(IR_node, 'RELU') def emit_Softmax(self, IR_node): # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name output_name = IR_node.out_edges[0] self.builder.add_softmax(name=IR_node.name, input_name=input_name, output_name=IR_node.name) def emit_Sigmoid(self, IR_node): assert False code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format( IR_node.replace_scope(IR_node.name), IR_node.name, IR_node.replace_scope(IR_node.in_edges[0])) return code def emit_Relu6(self, IR_node): # print(IR_node.name) layer = IR_node.real_name input_name, output_name = (IR_node.IR_layer.input[0], IR_node.IR_layer.name) relu_output_name = output_name + '_relu' self.builder.add_activation(layer, 'RELU', input_name, relu_output_name) # negate it neg_output_name = relu_output_name + '_neg' self.builder.add_activation(layer + '__neg__', 'LINEAR', relu_output_name, neg_output_name, [-1.0, 0]) # apply threshold clip_output_name = relu_output_name + '_clip' self.builder.add_unary(layer + '__clip__', neg_output_name, clip_output_name, 'threshold', alpha=-6.0) # negate it back self.builder.add_activation(layer + '_neg2', 'LINEAR', clip_output_name, output_name, [-1.0, 0]) def emit_Gather(self, IR_node): raise NotImplementedError() W = self.weights_dict[IR_node.name]['weights'] if W.ndim == 2: vocab_size = W.shape[0] output_channels = W.shape[1] builder.add_embedding(name=IR_node.real_name, W=W, b=None, input_dim=vocab_size, output_channels=output_channels, has_bias=False, input_name=input_name, output_name=IR_node.real_name) else: raise NotImplementedError() def emit_RNNs(self, IR_node, func): assert False # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): self._emit_merge(IR_node, 'ADD') def emit_Concat(self, IR_node): self._emit_merge(IR_node, "CONCAT") def emit_BatchNorm(self, IR_node): """ Convert a Batch Normalization layer. """ # Get input and output names input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name axis = IR_node.get_attr('axis', -1) nb_channels = IR_node.get_attr('_output_shapes')[0].dim[axis].size # Set parameters # Parameter arrangement in Keras: gamma, beta, mean, variance weights = self.weights_dict[IR_node.name] mean = weights['mean'] std = weights['var'] gamma = weights.get('scale', np.ones(mean.shape)) beta = weights.get('bias', np.zeros(mean.shape)) # compute adjusted parameters variance = std * std f = 1.0 / np.sqrt(std + IR_node.get_attr('epsilon')) gamma1 = gamma * f beta1 = beta - gamma * mean * f mean[:] = 0.0 #mean variance[:] = 1.0 - .00001 #stddev self.builder.add_batchnorm(name=IR_node.name, channels=nb_channels, gamma=gamma1, beta=beta1, mean=mean, variance=variance, input_name=input_name, output_name=IR_node.name) def emit_pad(self, IR_node): assert False if IR_node.IR_layer.attr['mode'].s == "CONSTANT": func = "ZeroPadding" dim = len(IR_node.IR_layer.attr['padding'].list.i) // 2 padding_str = "" for idx in range(0, dim): padding_str += "({}, {}),".format( IR_node.IR_layer.attr['padding'].list.i[idx + idx], IR_node.IR_layer.attr['padding'].list.i[idx + idx + 1]) code = "{:<15} = {}{}D(name = \"{}\", padding = ({}))({})".format( IR_node.replace_scope(IR_node.name), func, dim, IR_node.name, padding_str, IR_node.replace_scope(IR_node.in_edges[0])) return code def emit_Squeeze(self, IR_node): self.emit_Flatten(IR_node)
class CntkEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16 : "np.float16", graph_pb2.DT_FLOAT32 : "np.float32", graph_pb2.DT_FLOAT64 : "np.float64", graph_pb2.DT_INT16 : "np.int16", graph_pb2.DT_INT32 : "np.int32", graph_pb2.DT_INT64 : "np.int64", graph_pb2.DT_UINT8 : "np.uint8", graph_pb2.DT_UINT16 : "np.uint16" } def __init__(self, model): from six import string_types as _string_types super(CntkEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) self.IR_graph = IRGraph(network_path) super(CntkEmitter, self)._build() @property def header_code(self): return """import numpy as np import cntk from cntk import ops, layers from cntk.contrib.crosstalkcaffe.unimodel.cntkinstance import BlockApiSetup __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): global __weights_dict __weights_dict = load_weights(weight_file) """ def gen_code(self, phase = 'test'): self.phase = phase self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("CntkEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body(1, "return {}".format( ','.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.body_code @staticmethod def _shapeToStr(shapes): new_shape = filter(lambda x:x >- 1, [dim.size for dim in shapes.dim]) return ', '.join('%s' % i for i in new_shape) @staticmethod def is_valid_padding(auto_pad, pads): """ different from utils.is_valid_padding """ if auto_pad: if auto_pad == 'VALID': return True elif auto_pad.startswith('SAME'): return False else: raise ValueError("Unknown padding type{}.".format(auto_pad)) else: lens = len(pads) assert lens % 2 == 0 for i in range(0, lens // 2): if pads[i] != 0: return False return True @staticmethod def is_ceil_mode(pads): lens = len(pads) for i in range(lens // 2 + 1, lens - 1): if pads[i] == pads[i - lens // 2]: return False else: return True def emit_Conv(self, IR_node): if self.weight_loaded: self.used_layers.add(IR_node.type) dim = len(IR_node.get_attr('strides')) - 2 padding = not self.is_valid_padding(IR_node.get_attr('auto_pad'), IR_node.get_attr('pads')) padding = [False] + [padding] * dim self.add_body(1, "{:<15} = convolution({}, strides={}, auto_padding={}, dilation={}, groups={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), tuple(IR_node.get_attr('strides')[1:-1]), padding, tuple(IR_node.get_attr('dilations', [1])), IR_node.get_attr('group', 1), IR_node.name)) else: self.add_body(1, "{:<15} = Convolution(name = '{}', num_filters = {}, filter_shape = ({}), strides = ({},), pad = {}, bias = {})({})\n".format( IR_node.variable_name, IR_node.name, IR_node.get_attr('kernel_shape')[-1], ', '.join('%s' % i for i in IR_node.layer.attr["kernel_shape"].list.i[:-2]), ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i[1:-1]), IR_node.get_attr('auto_pad') != 'VALID', IR_node.get_attr('use_bias'), self.parent_variable_name(IR_node))) def emit_Pool(self, IR_node): input_node = self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name if IR_node.layer.attr['global_pooling'].b: self.used_layers.add('GlobalPooling') self.add_body(1, "{:<15} = global_pooling({}, '{}', name = '{}')".format( IR_node.variable_name, input_node, IR_node.get_attr('pooling_type'), IR_node.name)) else: for e in IR_node.get_attr('dilations', []): assert e == 1 dim = len(IR_node.get_attr('kernel_shape')) - 2 padding = not self.is_valid_padding(IR_node.get_attr('auto_pad'), IR_node.get_attr('pads')) padding = [False] + [padding] * dim ceil_out_dim = self.is_ceil_mode(IR_node.get_attr('pads')) pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': pooling_type = cntk.MAX_POOLING elif pooling_type == 'AVG': pooling_type = cntk.AVG_POOLING else: raise ValueError if self.weight_loaded: self.used_layers.add(IR_node.type) self.add_body(1, "{:<15} = pooling({}, pooling_type={}, pooling_window_shape={}, strides={}, auto_padding={}, ceil_out_dim={})".format( IR_node.variable_name, input_node, pooling_type, tuple(IR_node.get_attr('kernel_shape')[1:-1]), tuple(IR_node.get_attr('strides')[1:-1]), padding, ceil_out_dim )) else: raise NotImplementedError def emit_UNKNOWN(self, IR_node): print(IR_node.IR_layer.name) def emit_DataInput(self, IR_node): shape_str = self._shapeToStr(IR_node.IR_layer.attr["shape"].shape) dtype_str = ", dtype = {}".format(self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'dtype' in IR_node.layer.attr else "" self.add_body(1, "{:<15} = cntk.input_variable(({},) {}, name='{}')".format( IR_node.variable_name, shape_str, dtype_str, IR_node.name)) def emit_Dropout(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) if self.phase == 'train': self.add_body(1, "{:<15} = Dropout({}, name = '{}')({})".format( IR_node.variable_name, 1 - IR_node.get_attr('keep_prob'), IR_node.name, parent.real_variable_name)) else: IR_node.real_name = parent.real_name def emit_FullyConnected(self, IR_node): input_node = self.parent_variable_name(IR_node) if self.weight_loaded: self.used_layers.add(IR_node.type) self.add_body(1, "{:<15} = dense({}, name = '{}')".format( IR_node.variable_name, input_node, IR_node.name)) else: self.add_body(1, "{:<15} = Dense({}, bias = {}, name = '{}')({})".format( IR_node.variable_name, IR_node.layer.attr["units"].i, IR_node.layer.attr['use_bias'].b, IR_node.name, input_node)) def emit_Flatten(self, IR_node): self.add_body(1, "{:<15} = ops.reshape({}, (-1,), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_Reshape(self, IR_node): self.add_body(1, "{:<15} = cntk.reshape({}, shape={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), tuple(IR_node.get_attr('shape')), IR_node.name)) def _emit_activation(self, IR_node, op_name): self.add_body(1, "{:<15} = layers.Activation(activation = {}, name = '{}')({})".format( IR_node.variable_name, op_name, IR_node.name, self.parent_variable_name(IR_node))) def emit_Tanh(self, IR_node): self._emit_activation(IR_node, 'ops.tanh') def emit_Relu(self, IR_node): self._emit_activation(IR_node, 'ops.relu') def emit_Softmax(self, IR_node): self._emit_activation(IR_node, 'ops.softmax') def emit_Sigmoid(self, IR_node): self._emit_activation(IR_node, 'ops.sigmoid') def emit_RNNs(self, IR_node, func): assert False def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): if len(IR_node.in_edges) > 1: inputs = ' + '.join(self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) self.add_body(1, "{:<15} = {}".format( IR_node.variable_name, inputs)) def emit_Sub(self, IR_node): if len(IR_node.in_edges) > 1: inputs = ' - '.join(self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) self.add_body(1, "{:<15} = {}".format( IR_node.variable_name, inputs)) def emit_Mul(self, IR_node): if len(IR_node.in_edges) > 1: inputs = ' * '.join(self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) self.add_body(1, "{:<15} = {}".format( IR_node.variable_name, inputs)) def emit_Constant(self, IR_node): self.add_body(1, "{:<15} = cntk.Constant(value=__weights_dict['{}']['value'])".format( IR_node.variable_name, IR_node.name )) def emit_Concat(self, IR_node): inputs = ', '.join(self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) self.add_body(1, "{:<15} = cntk.splice({}, axis={}, name='{}')".format( IR_node.variable_name, inputs, IR_node.get_attr('axis') - 1, IR_node.name)) def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) self.add_body(1, "{:<15} = batch_normalization({}, epsilon={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('epsilon'), IR_node.name)) def emit_Pad(self, IR_node): if IR_node.get_attr('mode') == 'constant': mode = 'mode = ops.CONSTANT_PAD, constant_value = {}'.format(IR_node.get_attr('constant_values', 0.0)) elif IR_node.get_attr('mode') == 'reflect': mode = 'mode = ops.REFLECT_PAD' elif IR_node.get_attr('mode') == 'SYMMETRIC': mode = 'mode = ops.SYMMETRIC_PAD' else: assert False padding = IR_node.get_attr('pads') padding = convert_onnx_pad_to_tf(padding)[1:] self.add_body(1, "{:<15} = ops.pad({}, pattern={}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), padding, mode)) def emit_Squeeze(self, IR_node): IR_node.real_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name def emit_Log(self, IR_node): self.add_body(1, "{:<15} = _cntk.log({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_Exp(self, IR_node): self.add_body(1, "{:<15} = _cntk.exp({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_Reciprocal(self, IR_node): self.add_body(1, "{:<15} = _cntk.reciprocal({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_ReduceMean(self, IR_node): self.add_body(1, "{:<15} = ops.reduce_mean({}, axis = ({}), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join('%s' % (i - 1) for i in IR_node.get_attr('axes')), IR_node.name)) def emit_LRN(self, IR_node): self.used_layers.add(IR_node.type) self.add_body(1, "{:<15} = lrn({}, k=1, n={}, alpha={}, beta={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['size'].i, IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f, IR_node.name)) def _layer_LRN(self): self.add_body(0, """ def lrn(input, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = BlockApiSetup.lrn(**kwargs)(input) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer """) def _layer_FullyConnected(self): self.add_body(0, """ def dense(input, name, **kwargs): w = __weights_dict[name]['weights'] b = __weights_dict[name]['bias'] if 'bias' in __weights_dict[name] else None return BlockApiSetup.linear(output_shape=w.shape[1], input_shape=w.shape[0], scale_init=w, bias_init=b, name=name, **kwargs)(input) """) def _layer_Conv(self): self.add_body(0, """ def convolution(input, name, **kwargs): dim = __weights_dict[name]['weights'].ndim weight = np.transpose(__weights_dict[name]['weights'], [dim - 1, dim - 2] + list(range(0, dim - 2))) w = cntk.Parameter(init=weight, name=name + '_weight') input = cntk.transpose(input, [dim - 2] + list(range(0, dim - 2))) layer = ops.convolution(w, input, **kwargs) if 'bias' in __weights_dict[name]: bias = np.reshape(__weights_dict[name]['bias'], [-1] + [1] * (dim - 2)) b = cntk.Parameter(init=bias, name=name + '_bias') layer = layer + b layer = cntk.transpose(layer, list(range(1, dim - 1)) + [0]) return layer """) def _layer_Pool(self): self.add_body(0, """ def pooling(input, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = ops.pooling(input, **kwargs) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer """) def _layer_GlobalPooling(self): self.add_body(0, """ def global_pooling(input, type, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = layers.GlobalMaxPooling(**kwargs)(input) if type == 'MAX' else layers.GlobalAveragePooling(**kwargs)(input) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer """) def _layer_BatchNorm(self): self.add_body(0, """ def batch_normalization(input, name, epsilon, **kwargs): mean = cntk.Parameter(init = __weights_dict[name]['mean'], name = name + "_mean") var = cntk.Parameter(init = __weights_dict[name]['var'], name = name + "_var") layer = (input - mean) / cntk.sqrt(var + epsilon) if 'scale' in __weights_dict[name]: scale = cntk.Parameter(init = __weights_dict[name]['scale'], name = name + "_scale") layer = scale * layer if 'bias' in __weights_dict[name]: bias = cntk.Parameter(init = __weights_dict[name]['bias'], name = name + "_bias") layer = layer + bias return layer """)
class OnnxEmitter(Emitter): dtype_map = {graph_pb2.DT_FLOAT32: "TensorProto.FLOAT"} transpose_map = {1: 2, 2: 3, -1: 1} def __init__(self, architecture, weight): super(OnnxEmitter, self).__init__() if os.path.exists(architecture) == False: raise ValueError( "IR architecture file [{}] is not found.".format(architecture)) else: self.IR_graph = IRGraph(architecture) self.IR_graph.build() if os.path.exists(weight) == False: raise ValueError( "IR weight file [{}] is not found.".format(weight)) else: self._load_weights(weight) @property def header_code(self): return """import numpy as np from onnx import helper, TensorProto import onnx __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): global __weights_dict __weights_dict = load_weights(weight_file) """ def gen_code(self, phase): self.phase = phase self.add_body(0, self.header_code) self.inputs = [] self.outputs = [] self.nodes = [] self.initializer = [] for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("OnnxEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self._process_output_layers() self.add_body( 1, "graph = helper.make_graph([{}], 'mmdnn', [{}], [{}], [{}])". format(', '.join(self.nodes), ', '.join(self.inputs), ', '.join(self.outputs), ', '.join(self.initializer))) self.add_body( 1, "return helper.make_model(graph, opset_imports=[helper.make_opsetid('', 6)])" ) return self.body_code def run(self, dstNetworkPath, dstWeightPath=None, phase='test'): super(OnnxEmitter, self).run(dstNetworkPath, dstWeightPath, phase) self.save_weights(self.weights_dict, dstWeightPath) def check_if_need_transpose(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) while parent.type == 'Flatten': parent = self.IR_graph.get_parent(parent.name, [0]) dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) if dim > 2: original_dims = self.weights_dict[IR_node.name]['weights'].shape dims = [ i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:] ] + [-1] self.weights_dict[IR_node.name]['weights'] = self.weights_dict[ IR_node.name]['weights'] self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], dims) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], original_dims) def _process_output_layers(self): for name in self.IR_graph.output_layers: IR_node = self.IR_graph.get_node( self.IR_graph.get_node(name).real_name) shape_str = IRGraph.shapeToStr( IR_node.layer.attr["_output_shapes"].list.shape[0]) if IR_node.layer.attr['dtype'].type == graph_pb2.DT_UNDEFINED: IR_node.layer.attr['dtype'].type = graph_pb2.DT_FLOAT32 dtype_str = self.dtype_map[IR_node.layer.attr['dtype'].type] self.add_body( 1, "{:<15} = helper.make_tensor_value_info('{}', {}, ({},))". format(IR_node.variable_name + '_out', IR_node.variable_name, dtype_str, shape_str)) self.outputs.append(IR_node.variable_name + '_out') def emit_DataInput(self, IR_node): shape = [ dim.size if dim.size != -1 else 1 for dim in IR_node.IR_layer.attr["shape"].shape.dim ] shape_str = ', '.join('%s' % i for i in shape) if IR_node.layer.attr['dtype'].type == graph_pb2.DT_UNDEFINED: IR_node.layer.attr['dtype'].type = graph_pb2.DT_FLOAT32 dtype_str = self.dtype_map[IR_node.layer.attr['dtype'].type] self.add_body( 1, "{:<15} = helper.make_tensor_value_info('{}', {}, ({},))".format( IR_node.variable_name + '_orig', IR_node.variable_name + '_orig', dtype_str, shape_str)) self.add_body( 1, "{:15} = helper.make_node('Transpose', inputs=['{}'], outputs=['{}'], perm=[0, 3, 1, 2])" .format(IR_node.variable_name, IR_node.variable_name + '_orig', IR_node.variable_name)) self.inputs.append(IR_node.variable_name + '_orig') self.nodes.append(IR_node.variable_name) def emit_Conv(self, IR_node): kernel_shape = list(IR_node.get_attr('kernel_shape'))[:-2] dilations = list( IR_node.get_attr('dilations', [1] * (len(kernel_shape) + 2)))[1:-1] group = IR_node.get_attr('group', 1) if IR_node.type == 'DepthwiseConv': group = IR_node.IR_layer.attr["kernel_shape"].list.i[-2] self.weights_dict[IR_node.name]['weights'] = np.swapaxes( self.weights_dict[IR_node.name]['weights'], -1, -2) pads = IR_node.get_attr('pads') pad_length = len(pads) pads = pads[1:pad_length // 2 - 1] + pads[pad_length // 2 + 1:pad_length - 1] strides = list(IR_node.get_attr('strides'))[1:-1] use_bias = IR_node.get_attr('use_bias') self.add_body( 1, "{:15} = __weights_dict['{}']['weights']".format( IR_node.variable_name + '_weight_array', IR_node.name)) self.add_body( 1, "{} = {}.transpose([3,2,0,1])".format( IR_node.variable_name + '_weight_array', IR_node.variable_name + '_weight_array')) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))" .format(IR_node.variable_name + '_weight', IR_node.variable_name + '_weight', IR_node.variable_name + '_weight_array', IR_node.variable_name + '_weight_array', IR_node.variable_name + '_weight_array')) if use_bias: self.add_body( 1, "{:15} = __weights_dict['{}']['bias']".format( IR_node.variable_name + '_bias_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))" .format(IR_node.variable_name + '_bias', IR_node.variable_name + '_bias', IR_node.variable_name + '_bias_array', IR_node.variable_name + '_bias_array', IR_node.variable_name + '_bias_array')) self.add_body( 1, "{:15} = helper.make_node('Conv', inputs=['{}', '{}', '{}'],outputs=['{}'], dilations={}, group={}, kernel_shape={}, pads={}, strides={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name + '_weight', IR_node.variable_name + '_bias', IR_node.variable_name, dilations, group, kernel_shape, pads, strides)) self.nodes.append(IR_node.variable_name + '_bias') else: self.add_body( 1, "{:15} = helper.make_node('Conv', inputs=['{}', '{}'],outputs=['{}'], dilations={}, group={}, kernel_shape={}, pads={}, strides={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name + '_weight', IR_node.variable_name, dilations, group, kernel_shape, pads, strides)) self.nodes.append(IR_node.variable_name + '_weight') self.nodes.append(IR_node.variable_name) def emit_BatchNorm(self, IR_node): epsilon = IR_node.get_attr('epsilon') if IR_node.get_attr('scale'): self.add_body( 1, "{:15} = __weights_dict['{}']['scale']".format( IR_node.variable_name + '_scale_array', IR_node.name)) else: self.add_body( 1, "{:15} = np.ndarray(__weights_dict['{}']['bias'].shape, dtype=__weights_dict['{}']['bias'].dtype)" .format(IR_node.variable_name + '_scale_array', IR_node.name, IR_node.name)) self.add_body( 1, "{:15}.fill(1)".format(IR_node.variable_name + '_scale_array')) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))" .format(IR_node.variable_name + '_scale', IR_node.variable_name + '_scale', IR_node.variable_name + '_scale_array', IR_node.variable_name + '_scale_array', IR_node.variable_name + '_scale_array')) self.add_body( 1, "{:15} = __weights_dict['{}']['bias']".format( IR_node.variable_name + '_bias_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))" .format(IR_node.variable_name + '_bias', IR_node.variable_name + '_bias', IR_node.variable_name + '_bias_array', IR_node.variable_name + '_bias_array', IR_node.variable_name + '_bias_array')) self.add_body( 1, "{:15} = __weights_dict['{}']['mean']".format( IR_node.variable_name + '_mean_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))" .format(IR_node.variable_name + '_mean', IR_node.variable_name + '_mean', IR_node.variable_name + '_mean_array', IR_node.variable_name + '_mean_array', IR_node.variable_name + '_mean_array')) self.add_body( 1, "{:15} = __weights_dict['{}']['var']".format( IR_node.variable_name + '_var_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))" .format(IR_node.variable_name + '_var', IR_node.variable_name + '_var', IR_node.variable_name + '_var_array', IR_node.variable_name + '_var_array', IR_node.variable_name + '_var_array')) self.add_body( 1, "{:15} = helper.make_node('BatchNormalization', inputs=['{}', '{}', '{}', '{}', '{}'],outputs=['{}'], epsilon={}, is_test={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name + '_scale', IR_node.variable_name + '_bias', IR_node.variable_name + '_mean', IR_node.variable_name + '_var', IR_node.variable_name, epsilon, 0 if self.phase == 'train' else 1)) self.nodes.append(IR_node.variable_name + '_scale') self.nodes.append(IR_node.variable_name + '_bias') self.nodes.append(IR_node.variable_name + '_mean') self.nodes.append(IR_node.variable_name + '_var') self.nodes.append(IR_node.variable_name) def emit_Relu(self, IR_node): self.add_body( 1, "{:15} = helper.make_node('Relu', inputs=['{}'], outputs=['{}'])". format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name)) self.nodes.append(IR_node.variable_name) def emit_Add(self, IR_node): input_layers = ', '.join(( "'" + self.IR_graph.get_parent(IR_node.name, [num]).real_variable_name) + "'" for num in range(0, len(IR_node.in_edges))) self.add_body( 1, "{:15} = helper.make_node('Add', inputs=[{}], outputs=['{}'])". format(IR_node.variable_name, input_layers, IR_node.variable_name)) self.nodes.append(IR_node.variable_name) def emit_Pool(self, IR_node): pooling_type = IR_node.get_attr('pooling_type') if IR_node.layer.attr['global_pooling'].b: if pooling_type == 'AVG': self.add_body( 1, "{:15} = helper.make_node('GlobalAveragePool', inputs=['{}'], outputs=['{}'])" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name)) self.nodes.append(IR_node.variable_name) else: print("OnnxEmitter has not supported Global Pool type [%s]." % (pooling_type)) self.emit_UNKNOWN(IR_node) else: if pooling_type in ['AVG', 'MAX']: if pooling_type == 'AVG': op_name = 'AveragePool' elif pooling_type == 'MAX': op_name = 'MaxPool' kernel_shape = list(IR_node.get_attr('kernel_shape')[1:-1]) pads = IR_node.get_attr('pads') pad_length = len(pads) pads = pads[1:pad_length // 2 - 1] + pads[pad_length // 2 + 1:pad_length - 1] strides = list(IR_node.get_attr('strides')[1:-1]) self.add_body( 1, "{:15} = helper.make_node('{}', inputs=['{}'],outputs=['{}'], kernel_shape={}, pads={}, strides={})" .format(IR_node.variable_name, op_name, self.parent_variable_name(IR_node), IR_node.variable_name, kernel_shape, pads, strides)) self.nodes.append(IR_node.variable_name) else: print("OnnxEmitter has not supported Pool type [%s]." % (pooling_type)) self.emit_UNKNOWN(IR_node) def emit_FullyConnected(self, IR_node): self.check_if_need_transpose(IR_node) self.add_body( 1, "{:15} = __weights_dict['{}']['weights']".format( IR_node.variable_name + '_weight_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))" .format(IR_node.variable_name + '_weight', IR_node.variable_name + '_weight', IR_node.variable_name + '_weight_array', IR_node.variable_name + '_weight_array', IR_node.variable_name + '_weight_array')) self.add_body( 1, "{:15} = __weights_dict['{}']['bias']".format( IR_node.variable_name + '_bias_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))" .format(IR_node.variable_name + '_bias', IR_node.variable_name + '_bias', IR_node.variable_name + '_bias_array', IR_node.variable_name + '_bias_array', IR_node.variable_name + '_bias_array')) self.add_body( 1, "{:15} = helper.make_node('Gemm', inputs=['{}', '{}', '{}'],outputs=['{}'])" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name + '_weight', IR_node.variable_name + '_bias', IR_node.variable_name)) self.nodes.append(IR_node.variable_name + '_weight') self.nodes.append(IR_node.variable_name + '_bias') self.nodes.append(IR_node.variable_name) def emit_Pad(self, IR_node): mode = IR_node.layer.attr['mode'].s.decode() pads = IR_node.get_attr('pads') pad_length = len(pads) pads = [0, 0] + pads[1:pad_length // 2 - 1] + [ 0, 0 ] + pads[pad_length // 2 + 1:pad_length - 1] self.add_body( 1, "{:15} = helper.make_node('Pad', inputs=['{}'], outputs=['{}'], mode='{}', pads={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name, mode, pads)) self.nodes.append(IR_node.variable_name) def emit_Concat(self, IR_node): axis = IR_node.get_attr('axis') - 2 inputs = ', '.join("'" + self.IR_graph.get_node(i).real_variable_name + "'" for i in IR_node.in_edges) self.add_body( 1, "{:15} = helper.make_node('Concat', inputs=[{}], outputs=['{}'], axis={})" .format(IR_node.variable_name, inputs, IR_node.variable_name, axis)) self.nodes.append(IR_node.variable_name) def emit_Flatten(self, IR_node): self.add_body( 1, "{:15} = helper.make_node('Flatten', inputs=['{}'], outputs=['{}'])" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name)) self.nodes.append(IR_node.variable_name) def emit_Softmax(self, IR_node): self.add_body( 1, "{:15} = helper.make_node('Softmax', inputs=['{}'], outputs=['{}'])" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name)) self.nodes.append(IR_node.variable_name) def emit_Constant(self, IR_node): self.add_body( 1, "{:15} = __weights_dict['{}']['value']".format( IR_node.variable_name + '_value_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))" .format(IR_node.variable_name, IR_node.variable_name, IR_node.variable_name + '_value_array', IR_node.variable_name + '_value_array', IR_node.variable_name + '_value_array')) self.nodes.append(IR_node.variable_name) def emit_Sub(self, IR_node): inputs = ', '.join("'" + self.IR_graph.get_node(i).real_variable_name + "'" for i in IR_node.in_edges) self.add_body( 1, "{:15} = helper.make_node('Sub', inputs=[{}], outputs=['{}'], broadcast=1)" .format(IR_node.variable_name, inputs, IR_node.variable_name)) self.nodes.append(IR_node.variable_name) def emit_Mul(self, IR_node): inputs = ', '.join("'" + self.IR_graph.get_node(i).real_variable_name + "'" for i in IR_node.in_edges) self.add_body( 1, "{:15} = helper.make_node('Mul', inputs=[{}], outputs=['{}'], broadcast=1)" .format(IR_node.variable_name, inputs, IR_node.variable_name)) self.nodes.append(IR_node.variable_name) def emit_Dropout(self, IR_node): self.add_body( 1, "{:15} = helper.make_node('Dropout', inputs=['{}'], outputs=['{}'], is_test={}, ratio={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name, 0 if self.phase == 'train' else 1, 1 - IR_node.get_attr('keep_prob'))) self.nodes.append(IR_node.variable_name) def emit_Squeeze(self, IR_node): IR_node.real_name = self.IR_graph.get_node( IR_node.in_edges[0]).real_name def emit_ReduceMean(self, IR_node): axes = IR_node.layer.attr['axes'].list.i[:] axes = ','.join('%s' % OnnxEmitter.transpose_map[i] for i in axes) self.add_body( 1, "{:15} = helper.make_node('ReduceMean', inputs=['{}'], outputs=['{}'], axes=[{}], keepdims={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name, axes, 1 if IR_node.layer.attr['keepdims'].b else 0)) self.nodes.append(IR_node.variable_name) def emit_Reshape(self, IR_node): shape = [ item if item != -1 else 1 for item in IR_node.get_attr('shape') ] if len(shape) == 4: shape = [shape[i] for i in [0, 3, 1, 2]] shape_str = ', '.join('%s' % i for i in shape) self.add_body( 1, "{:15} = np.array([{}], dtype=np.int64)".format( IR_node.variable_name + '_shape_array', shape_str)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))" .format(IR_node.variable_name + '_shape', IR_node.variable_name + '_shape', IR_node.variable_name + '_shape_array', IR_node.variable_name + '_shape_array', IR_node.variable_name + '_shape_array')) self.add_body( 1, "{:15} = helper.make_node('Reshape', inputs=['{}', '{}'], outputs=['{}'])" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name + '_shape', IR_node.variable_name)) self.nodes.append(IR_node.variable_name + '_shape') self.nodes.append(IR_node.variable_name) def emit_LRN(self, IR_node): alpha = IR_node.get_attr('alpha') beta = IR_node.get_attr('beta') bias = IR_node.get_attr('bias', 1.0) size = IR_node.get_attr('size') * 2 - 1 self.add_body( 1, "{:15} = helper.make_node('LRN', inputs=['{}'], outputs=['{}'], alpha={}, beta={}, bias={}, size={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name, alpha, beta, bias, size)) self.nodes.append(IR_node.variable_name) def emit_Relu6(self, IR_node): self.add_body( 1, "{:15} = helper.make_node('Clip', inputs=['{}'], outputs=['{}'], min=0.0, max=6.0)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name)) self.nodes.append(IR_node.variable_name) def emit_DepthwiseConv(self, IR_node): self.emit_Conv(IR_node) def emit_Slice(self, IR_node): starts = IR_node.get_attr('starts') starts = [starts[0], starts[-1]] + starts[1:-1] ends = IR_node.get_attr('ends') ends = [ends[0], ends[-1]] + ends[1:-1] ends = [i if i != 0 else sys.maxsize for i in ends] self.add_body( 1, "{:15} = helper.make_node('Slice', inputs=['{}'], outputs=['{}'], starts={}, ends={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name, starts, ends)) self.nodes.append(IR_node.variable_name) def emit_LeakyRelu(self, IR_node): alpha = IR_node.get_attr('alpha') self.add_body( 1, "{:15} = helper.make_node('LeakyRelu', inputs=['{}'], outputs=['{}'], alpha={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name, alpha)) self.nodes.append(IR_node.variable_name) def emit_SpaceToDepth(self, IR_node): blocksize = IR_node.get_attr('blocksize') self.add_body( 1, "{:15} = helper.make_node('SpaceToDepth', inputs=['{}'], outputs=['{}'], blocksize={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name, blocksize)) self.nodes.append(IR_node.variable_name) def emit_UNKNOWN(self, IR_node): print(IR_node.IR_layer.name)
class PytorchEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16: "torch.float16", graph_pb2.DT_FLOAT32: "torch.float32", graph_pb2.DT_FLOAT64: "torch.float64", graph_pb2.DT_INT16: "torch.int16", graph_pb2.DT_INT32: "torch.int32", graph_pb2.DT_INT64: "torch.int64", graph_pb2.DT_UINT8: "torch.uint8", graph_pb2.DT_UINT16: "torch.uint16" } # Base Functions def __init__(self, model): super(PytorchEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] weight_path = model[1] self.init_code = str() self.IR_graph = IRGraph(network_path) self.IR_graph.build() self._load_weights(weight_path) def run(self, dstNetworkPath, dstWeightPath=None, phase='test'): super(PytorchEmitter, self).run(dstNetworkPath, dstWeightPath, phase) if self.weight_loaded: self.save_weights(self.weights_dict, dstWeightPath) def add_init(self, indent, codes): if isinstance(codes, _string_types): codes = [codes] for code in codes: self.init_code += (" " * indent) + code + '\n' def parent_variable_name(self, IR_node, path=[0], weight_type='weights'): if not IR_node.in_edges and IR_node.name in self.weights_dict.keys(): self.weights_dict[IR_node.name][weight_type] = self.weights_dict[ IR_node.name][weight_type] return "torch.from_numpy(__weights_dict['{}']['{}'])".format( IR_node.name, weight_type) return super(PytorchEmitter, self).parent_variable_name(IR_node, path=path) @property def header_code(self): return """import numpy as np import torch import torch.nn as nn import torch.nn.functional as F __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict class KitModel(nn.Module): """ def gen_code(self, phase): self.add_init( 1, """ def __init__(self, weight_file): super(KitModel, self).__init__() global __weights_dict __weights_dict = load_weights(weight_file) """) self.add_body(1, "def forward(self, x):") for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(current_node) else: print("Pytorch Emitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body( 2, "return {}".format(', '.join([ self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers if self.IR_graph.get_node(name).type != 'Pack' ]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.header_code + '\n' + self.init_code + '\n' + self.body_code def _defuse_padding(self, IR_node, extra_str=""): input_node = self.parent_variable_name(IR_node) if IR_node.get_attr('auto_pad') == 'VALID': return input_node if is_valid_padding(IR_node.get_attr("pads")) == True: return input_node padding = self._convert_padding(IR_node) input_node = IR_node.variable_name + '_pad' self.add_body( 2, "{:<15} = F.pad({}, {}{})".format( input_node, self.parent_variable_name(IR_node), padding, extra_str)) return input_node def emit_Conv(self, IR_node): self.used_layers.add('Conv') dim = len(IR_node.get_attr('strides')) - 2 in_channels = IR_node.get_attr('kernel_shape')[-2] filter = IR_node.get_attr('kernel_shape')[-1] kernel = IR_node.get_attr('kernel_shape')[:-2] strides = IR_node.get_attr('strides')[1:-1] if IR_node.type == 'DepthwiseConv': group = in_channels filter *= group else: group = IR_node.get_attr('group', 1) self.add_init( 2, "self.{} = self.__conv({}, name='{}', in_channels={}, out_channels={}, kernel_size={}, stride={}, groups={}, bias={})" .format( IR_node.variable_name, dim, IR_node.name, in_channels, filter, tuple(kernel), tuple(strides), # padding, group, IR_node.get_attr('use_bias'))) input_node = self._defuse_padding(IR_node) self.add_body( 2, "{:<15} = self.{}({})".format(IR_node.variable_name, IR_node.variable_name, input_node)) if self.weight_loaded: if IR_node.type == 'DepthwiseConv': self.weights_dict[IR_node.name]['weights'] = np.swapaxes( self.weights_dict[IR_node.name]['weights'], -1, -2) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim))) @staticmethod def is_ceil_mode(pads): lens = len(pads) for i in range(lens // 2 + 1, lens - 1): if pads[i] == pads[i - lens // 2]: return False else: return True def emit_Pool(self, IR_node): dim = len(IR_node.get_attr('strides')) - 2 if IR_node.get_attr('pooling_type') == "MAX": pool_name = "max_pool{}d".format(dim) # exstr = ", value=float('-Inf')" elif IR_node.get_attr('pooling_type') == "AVG": pool_name = "avg_pool{}d".format(dim) # exstr = "" else: raise ValueError() if IR_node.layer.attr['global_pooling'].b: self.add_body( 2, "{:<15} = F.{}(input = {}, kernel_size = {}.size()[2:])". format(IR_node.variable_name, pool_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node))) else: if IR_node.get_attr('pooling_type') == "MAX": # Change to padding defuse input_node = self._defuse_padding(IR_node, ", value=float('-inf')") for e in IR_node.get_attr('dilations', []): assert e == 1 pool_size = IR_node.get_attr('kernel_shape')[1:-1] strides = IR_node.get_attr('strides')[1:-1] self.add_body( 2, "{:<15} = F.{}({}, kernel_size={}, stride={}, padding={}, ceil_mode={})" .format(IR_node.variable_name, pool_name, input_node, tuple(pool_size), tuple(strides), 0, False)) elif IR_node.get_attr('pooling_type') == "AVG": for e in IR_node.get_attr('dilations', []): assert e == 1 pool_size = IR_node.get_attr('kernel_shape')[1:-1] strides = IR_node.get_attr('strides')[1:-1] padding = IR_node.get_attr('pads')[1:dim] ceil_mode = self.is_ceil_mode(IR_node.get_attr('pads')) # input_node = self._defuse_padding(IR_node, exstr) self.add_body( 2, "{:<15} = F.{}({}, kernel_size={}, stride={}, padding={}, ceil_mode={})" .format(IR_node.variable_name, pool_name, self.parent_variable_name(IR_node), tuple(pool_size), tuple(strides), tuple(padding), ceil_mode)) else: raise ValueError() def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_DataInput(self, IR_node): # Ignore it in Pytorch IR_node.real_name = 'x' def emit_Dropout(self, IR_node): self.add_body( 2, "{:<15} = F.dropout(input = {}, p = {}, training = self.training, inplace = True)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr["keep_prob"].f)) def check_if_need_transpose(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) while parent.type == 'Flatten' or parent.type == 'Dropout': parent = self.IR_graph.get_parent(parent.name, [0]) dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) if dim > 2: original_dims = self.weights_dict[IR_node.name]['weights'].shape dims = [ i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:] ] + [-1] self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], dims) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], original_dims) def emit_FullyConnected(self, IR_node): self.used_layers.add(IR_node.type) in_features = 1 for i in self.IR_graph.get_parent( IR_node.name, [0]).layer.attr['_output_shapes'].list.shape[0].dim[1:]: in_features *= i.size self.add_init( 2, "self.{} = self.__dense(name = '{}', in_features = {}, out_features = {}, bias = {})" .format(IR_node.variable_name, IR_node.name, in_features, IR_node.layer.attr["units"].i, IR_node.IR_layer.attr["use_bias"].b)) input_node = self.parent_variable_name(IR_node) if len( self.IR_graph.get_parent( IR_node.name, [0]).get_attr('_output_shapes')[0].dim) > 2: input_node = "{}.view({}.size(0), -1)".format( input_node, input_node) self.add_body( 2, "{:<15} = self.{}({})".format(IR_node.variable_name, IR_node.variable_name, input_node)) if self.weight_loaded: self.check_if_need_transpose(IR_node) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], (1, 0)) def emit_Flatten(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name self.add_body( 2, "{:<15} = {}.view({}.size(0), -1)".format(IR_node.variable_name, parent, parent)) def emit_Reshape(self, IR_node): shape_list = IR_node.get_attr('shape') shape_str = ','.join([str(int(i)) for i in shape_list]) self.add_body( 2, "{:<15} = torch.reshape(input = {}, shape = ({}))".format( IR_node.variable_name, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name, shape_str)) def emit_Tanh(self, IR_node): self.add_body( 2, "{:<15} = F.tanh({})".format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) def emit_Relu(self, IR_node): self.add_body( 2, "{:<15} = F.relu({})".format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) def emit_LeakyRelu(self, IR_node): self.add_body( 2, "{:<15} = F.leaky_relu({}, negative_slope={})".format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name, IR_node.get_attr('alpha'))) def emit_Relu6(self, IR_node): self.add_body( 2, "{:<15} = F.relu6({})".format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) def emit_Softmax(self, IR_node): self.add_body( 2, "{:<15} = F.softmax({})".format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) def emit_Sigmoid(self, IR_node): code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format( IR_node.variable_name, IR_node.name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name) return code def emit_Embedding(self, IR_node): self.used_layers.add("Embedding") self.add_init( 2, "self.{} = self.__embedding('{}', num_embeddings={}, embedding_dim={})" .format( IR_node.variable_name, IR_node.name, IR_node.get_attr('input_dim'), #2-D IR_node.get_attr('output_dim'))) self.add_body( 2, "{:<15} = self.{}({})".format( IR_node.variable_name, IR_node.variable_name, "torch.LongTensor(np.array({}))".format( self.parent_variable_name(IR_node)))) def emit_RNNs(self, IR_node, func): raise NotImplementedError() # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): self.add_body( 2, "{:<15} = {}".format( IR_node.variable_name, ' + '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) def emit_Sub(self, IR_node): self.add_body( 2, "{:<15} = {}".format( IR_node.variable_name, ' - '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) def emit_Mul(self, IR_node): self.add_body( 2, "{:<15} = {}".format( IR_node.variable_name, ' * '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) def emit_MatMul(self, IR_node): self.add_body( 2, "{:<15} = torch.matmul({})".format( IR_node.variable_name, ' , '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) def emit_Constant(self, IR_node): self.add_init( 2, "self.{:<15} = torch.autograd.Variable(torch.Tensor(__weights_dict['{}']['value']), requires_grad=False)" .format(IR_node.variable_name, IR_node.name)) # self.add_init(2, "self.{:<15} = torch.from_numpy(__weights_dict['{}']['value'])".format( # IR_node.variable_name, # IR_node.name)) IR_node.real_name = "self." + IR_node.variable_name def _convert_axis(self, IR_node, axis): ndim = len( self.IR_graph.get_parent(IR_node.name, [0]).get_attr('_output_shapes')[0].dim) if axis == 0: return 0 elif axis == ndim - 1: return 1 else: return axis + 1 def emit_Concat(self, IR_node): axis = self._convert_axis(IR_node, IR_node.get_attr('axis')) self.add_body( 2, "{:<15} = torch.cat(({}), {})".format( IR_node.variable_name, ', '.join( self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges), axis, )) def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2 output_shape = IR_node.layer.attr['_output_shapes'].list.shape[0] if IR_node.get_attr('data_format', "NHWC") == "NCHW": num_features = output_shape.dim[1].size else: num_features = output_shape.dim[-1].size self.add_init( 2, "self.{} = self.__batch_normalization({}, '{}', num_features={}, eps={}, momentum={})" .format( IR_node.variable_name, dim, IR_node.name, num_features, IR_node.layer.attr['epsilon'].f, IR_node.layer.attr['momentum'].f, )) self.add_body( 2, "{:<15} = self.{}({})".format(IR_node.variable_name, IR_node.variable_name, self.parent_variable_name(IR_node))) def emit_Scale(self, IR_node): self.used_layers.add(IR_node.type) dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2 self.add_init( 2, "self.{} = self.__scale({}, '{}', num_features={})".format( IR_node.variable_name, dim, IR_node.name, IR_node.layer. attr['_output_shapes'].list.shape[0].dim[-1].size)) self.add_body( 2, "{:<15} = self.{}({})".format(IR_node.variable_name, IR_node.variable_name, self.parent_variable_name(IR_node))) def emit_Squeeze(self, IR_node): self.add_body( 2, "{:<15} = torch.squeeze({})".format( IR_node.variable_name, self.parent_variable_name(IR_node))) @staticmethod def _convert_padding(IR_node): padding = IR_node.get_attr('pads') padding = convert_onnx_pad_to_tf(padding)[1:-1] new_padding = [] for pad in padding: new_padding.insert(0, pad) return tuple(np.array(new_padding).reshape(-1).tolist()) def emit_Pad(self, IR_node): if IR_node.get_attr('mode').lower() == 'constant': mode = "mode = 'constant', value = {}".format(0) elif IR_node.get_attr('mode').lower() == 'reflect': mode = "mode = 'reflect'" elif IR_node.get_attr('mode').upper() == 'SYMMETRIC': mode = "mode = 'replicate'" else: assert False padding = self._convert_padding(IR_node) self.add_body( 2, "{:<15} = F.pad({}, {}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), padding, mode)) def emit_ReduceMean(self, IR_node): axes = [ self._convert_axis(IR_node, x) for x in IR_node.get_attr('axes') ] input_node = self.parent_variable_name(IR_node) for axis in sorted(axes, reverse=True): self.add_body( 2, "{:<15} = torch.mean({}, {}, {})".format( IR_node.variable_name, input_node, axis, IR_node.get_attr("keepdims"))) input_node = IR_node.variable_name def emit_LRN(self, IR_node): self.add_body( 2, "{:<15} = F.local_response_norm({}, size={}, alpha={}, beta={}, k={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('size') * 2 - 1, IR_node.get_attr('alpha'), IR_node.get_attr('beta'), IR_node.get_attr('k', 1))) def emit_DepthwiseConv(self, IR_node): self.emit_Conv(IR_node) def emit_Const(self, IR_node): if 'dtype' in IR_node.layer.attr: dtype_str = "dtype={}".format( self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'int' in dtype_str: self.add_body( 2, "{:<15} = torch.tensor({}, {})".format( IR_node.variable_name, IR_node.layer.attr['value'].i, dtype_str)) else: self.add_body( 2, "{:<15} = torch.tensor({}, {})".format( IR_node.variable_name, IR_node.layer.attr['value'].f, dtype_str)) else: dtype_str = "dtype=torch.float32" self.add_body( 2, "{:<15} = torch.tensor({}, {})".format( IR_node.variable_name, IR_node.layer.attr['value'].f, dtype_str)) def emit_Shape(self, IR_node): self.add_body( 2, "{:<15} = list({}.size())".format( IR_node.variable_name, self.parent_variable_name(IR_node))) def emit_Pack(self, IR_node): self.add_body( 2, "{:<15} = {}".format( IR_node.variable_name, '[' + ','.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges) + ']', )) def emit_Slice(self, IR_node): starts = IR_node.get_attr('starts') if len(starts) > 1: starts = [starts[0], starts[-1]] + starts[1:-1] ends = IR_node.get_attr('ends') if len(ends) > 1: ends = [ends[0], ends[-1]] + ends[1:-1] extra_str = "" for idx, _ in enumerate(starts): if idx: extra_str += ", " extra_str += "{}:".format(starts[idx]) if ends[idx]: extra_str += "{}".format(ends[idx]) self.add_body( 2, "{:<15} = {}[{}]".format(IR_node.variable_name, self.parent_variable_name(IR_node), extra_str)) def emit_Split(self, IR_node): print(IR_node.layer) assert False def emit_Gather(self, IR_node): pass # self.used_layers.add("Embedding") # shape = tuple(IR_node.get_attr('shape')) # self.add_init(2, "self.{} = self.__embedding('{}', num_embeddings={}, embedding_dim={})".format( # IR_node.variable_name, # IR_node.name, # shape[0], #2-D # shape[1] # )) # self.add_body(2, "{:<15} = self.{}({})".format( # IR_node.variable_name, # IR_node.variable_name, # "torch.LongTensor(np.array({}))".format(self.parent_variable_name(IR_node)) # )) def emit_Transpose(self, IR_node): self.add_body( 2, "{:<15} = {}.permute({})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1]))) def _layer_Embedding(self): self.add_body( 0, """ @staticmethod def __embedding(name, **kwargs): layer = nn.Embedding(**kwargs) #shape layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) return layer """) # def _layer_Sim_tfGather(self): # self.add_body(0, """ # @staticmethod # def __sim_tf_gather(params, indices, axis=0): # indices = np.array(indices) # output_shape = list(indices.shape)+list(torch.Tensor(params).shape)[1:] # output_tensor = torch.Tensor(size = output_shape) # from itertools import product # row_indices = [] # for s in list(indices.shape)[:len(list(indices.shape))-1]: # row_indices.append(tuple(range(s))) # row_indices = list(product(*tuple(row_indices))) # for row_index in row_indices: # index = torch.LongTensor(indices[row_index]) # output_tensor[tuple(row_index)] = torch.index_select(params, axis, index) # return output_tensor # """) def _layer_Conv(self): self.add_body( 0, """ @staticmethod def __conv(dim, name, **kwargs): if dim == 1: layer = nn.Conv1d(**kwargs) elif dim == 2: layer = nn.Conv2d(**kwargs) elif dim == 3: layer = nn.Conv3d(**kwargs) else: raise NotImplementedError() layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) return layer""") def _layer_FullyConnected(self): self.add_body( 0, """ @staticmethod def __dense(name, **kwargs): layer = nn.Linear(**kwargs) layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) return layer""") def _layer_BatchNorm(self): self.add_body( 0, """ @staticmethod def __batch_normalization(dim, name, **kwargs): if dim == 1: layer = nn.BatchNorm1d(**kwargs) elif dim == 2: layer = nn.BatchNorm2d(**kwargs) elif dim == 3: layer = nn.BatchNorm3d(**kwargs) else: raise NotImplementedError() if 'scale' in __weights_dict[name]: layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['scale'])) else: layer.weight.data.fill_(1) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) else: layer.bias.data.fill_(0) layer.state_dict()['running_mean'].copy_(torch.from_numpy(__weights_dict[name]['mean'])) layer.state_dict()['running_var'].copy_(torch.from_numpy(__weights_dict[name]['var'])) return layer""") def _layer_Scale(self): self.add_body( 0, """ # from torch.nn.parameter import Parameter class _Scale(nn.Module): def __init__(self, num_features, affine=True): super(KitModel._Scale, self).__init__() self.num_features = num_features self.affine = affine self.running_mean = torch.zeros(num_features) self.running_var = torch.ones(num_features) self.training = False self.eps = 1e-5 if self.affine: self.weight = nn.Parameter(torch.Tensor(num_features)) self.bias = nn.Parameter(torch.Tensor(num_features)) else: self.register_parameter('weight', None) self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): if self.affine: self.weight.data.uniform_() self.bias.data.zero_() def _check_input_dim(self, input): raise NotImplementedError def forward(self, input): self._check_input_dim(input) return F.batch_norm( input, self.running_mean, self.running_var, self.weight, self.bias, self.training, 0 , self.eps) class Scale1d(_Scale): def _check_input_dim(self, input): if input.dim() != 2 and input.dim() != 3: raise ValueError('expected 2D or 3D input (got {}D input)' .format(input.dim())) class Scale2d(_Scale): def _check_input_dim(self, input): if input.dim() != 4: raise ValueError('expected 4D input (got {}D input)' .format(input.dim())) class Scale3d(_Scale): def _check_input_dim(self, input): if input.dim() != 5: raise ValueError('expected 5D input (got {}D input)' .format(input.dim())) @staticmethod def __scale(dim, name, **kwargs): if dim == 1: layer = KitModel.Scale1d(**kwargs) elif dim == 2: layer = KitModel.Scale2d(**kwargs) elif dim == 3: layer = KitModel.Scale3d(**kwargs) else: raise NotImplementedError() if 'scale' in __weights_dict[name]: layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['scale'])) else: layer.weight.data.fill_(1) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) else: layer.bias.data.fill_(0) return layer""")
class MXNetEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16 : "float16", graph_pb2.DT_FLOAT32 : "float32", graph_pb2.DT_FLOAT64 : "float64", graph_pb2.DT_INT32 : "int32", graph_pb2.DT_UINT8 : "uint8" } activation_map = { "relu" : "Relu", "sigmoid" : "Sigmoid", "tanh" : "Tanh", "elu" : "Elu" } transpose_map = { 1 : 2, 2 : 3, -1 : 1 } channels_last = ['NDHWC', 'NHWC'] def __init__(self, model): super(MXNetEmitter, self).__init__() from six import string_types as _string_types if isinstance(model, _string_types): network_path = model self.weight_loaded = False elif len(model) == 3: network_path = model[0] weight_path = model[1] self.output_weights_file = model[2] self.weights = np.load(weight_path).item() self.weight_loaded = True self.output_weights = dict() else: raise ValueError("the # of input arguments [{}] is not supported" % len(model)) self.IR_graph = IRGraph(network_path) self.IR_graph.build() @property def header_code(self): return """import mxnet as mx import numpy as np import math # mxnet-cpu only support channel first, default convert the model and weight as channel first def RefactorModel(): """ def gen_code(self, phase): self.IR_layer_map = dict() self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: self.IR_layer_map[layer] = self.IR_graph.get_node(layer) shape = dict() for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if len(current_node.in_edges) == 0: current_node.in_edges.append('data') if node_type.lower() in MXNetEmitter.activation_map: func = getattr(self, "emit_Activation") line = func(current_node, MXNetEmitter.activation_map[node_type.lower()].lower()) self.add_body(1, line) elif hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(current_node) self.add_body(1, line) else: print("MXNet Emitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) if node_type == "DataInput": cur_shape = list() first = True for dim in current_node.IR_layer.attr["shape"].shape.dim: if dim.size == -1 and first: cur_shape.append(1) print("Detect input layer [{}] using infer batch size, set it as default value [1]".format(current_node.name)) else: if dim.size == -1: print("Warning: user should change input size manually") cur_shape.append(dim.size) first = False cur_shape.insert(1, cur_shape.pop()) shape[current_node.name] = ', '.join('%s' % i for i in cur_shape) if self.weight_loaded: fullpath = os.path.abspath(self.output_weights_file) dirname = os.path.dirname(fullpath) if not os.path.exists(dirname): os.makedirs(dirname) with open(self.output_weights_file, 'wb') as outfile: np.save(outfile, self.output_weights) comment = "\n # if a GPU is available, change mx.cpu() to mx.gpu()" last_line = "{:<15} = mx.mod.Module(symbol = {}, context = mx.cpu(), data_names = ['{}'])".format( "model", ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers]), ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.input_layers])) self.add_body(1, comment) self.add_body(1, last_line) self.add_body(1, "return model") weight_code = "" if not self.weight_loaded: weight_code += "# emitter does not detect any import weights, you may generate weights file manually\n" weight_code += self.gen_weight_code(shape, phase) main_code = "if __name__ == '__main__':\n model = RefactorModel()\n" if self.weight_loaded: main_code += " # remember to adjust params path\n model = deploy_weight(model, '{}')\n".format(self.output_weights_file) if phase == 'train': train_code = """def train(model): import logging logging.getLogger().setLevel(logging.DEBUG) model.fit(train_iter, # train data eval_data = val_iter, # validation data optimizer = 'sgd', # Defaults to 'sgd' optimizer_params = {'learning_rate':0.01}, # use fixed learning rate eval_metric = 'acc', # report accuracy during training, other possible predefined metrics are: 'ce', 'f1', 'mae', 'mse', 'rmse', 'top_k_accuracy' batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches num_epoch = 10) # train for at most 10 dataset passes\n\n """ code = self.body_code + weight_code + train_code + main_code else: test_code = """import matplotlib.pyplot as plt from collections import namedtuple Batch = namedtuple('Batch', ['data']) def get_image(url, show = False): import cv2 # download and show the image fname = mx.test_utils.download(url) img = cv2.cvtColor(cv2.imread(fname), cv2.COLOR_BGR2RGB) if img is None: return None if show: plt.imshow(img) plt.axis('off') # convert into format (batch, RGB, width, height) img = cv2.resize(img, (224, 224)) img = np.swapaxes(img, 0, 2) img = np.swapaxes(img, 1, 2) img = img[np.newaxis, :] return img def predict(model, labels, url): # to show the image, change the argument show into True img = get_image(url, show = False) # compute the predict probabilities model.forward(Batch([mx.nd.array(img)])) prob = model.get_outputs()[0].asnumpy() # print the top-5 prob = np.squeeze(prob) a = np.argsort(prob)[::-1] for i in a[0:5]: print('prbability = %f, class = %s' %(prob[i], labels[i]))\n\n """ main_code += """ # # call function predict # with open('synset.txt', 'r') as f: # labels = [l.rstrip() for l in f] # predict(model, labels, 'http://writm.com/wp-content/uploads/2016/08/Cat-hd-wallpapers.jpg') """ code = self.body_code + weight_code + test_code + main_code return code def gen_weight_code(self, shape, phase): str = "def deploy_weight(model, weight_file):\n" str += """ if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() arg_params = dict() aux_params = dict() for weight_name, weight_data in weights_dict.items(): weight_name = str(weight_name) if "moving" in weight_name: aux_params[weight_name] = mx.nd.array(weight_data) else: arg_params[weight_name] = mx.nd.array(weight_data) """ if phase == 'train': str += " model.bind(for_training = True, data_shapes = [" else: str += " model.bind(for_training = False, data_shapes = [" first = True for k, v in shape.items(): if not first: str += ", " str += "('" + k + "', " + "(" + v + "))" first = False str += "])\n" str += " model.set_params(arg_params = arg_params, aux_params = aux_params, allow_missing = True)\n\n return model\n\n\n" return str @staticmethod def calculate_same_pad(data_shape, kernel, stride): if (data_shape % stride == 0): pad = max(kernel - stride, 0) else: pad = max(kernel - (data_shape % stride), 0) if pad % 2 == 0: return False, pad else: return True, pad @staticmethod def transfer_pad(pad_list): defuse_pad = False pad = list() assert len(pad_list) % 2 == 0 mid = int(len(pad_list)/2) pad_first = pad_list[1:mid-1] pad_second = pad_list[mid+1:-1] for i in range(0, mid-2): if not pad_first[i] == pad_second[i]: defuse_pad = True if defuse_pad: pad.extend([0] * 4) for i in range(0, mid-2): pad.extend([pad_first[i], pad_second[i]]) else: pad = pad_first return defuse_pad, pad @staticmethod def transpose(data, dim): if dim == 1: data = data.transpose((2, 1, 0)) elif dim == 2: data = data.transpose((3, 2, 0, 1)) elif dim == 3: data = data.transpose((4, 3, 0, 1, 2)) else: raise ValueError("The weight of dim {} cannot transpose" % dim) return data def set_pad(self, IR_node, code, pad, _max_pool): if _max_pool: constant_value = "float('-inf')" else: constant_value = "0.0" code = "{:<15} = mx.sym.pad(data = {}, mode = 'constant', pad_width={}, constant_value = {}, name = '{}')".format( IR_node.variable_name + "_pad", self.parent_variable_name(IR_node), tuple(pad), constant_value, IR_node.name + "_pad") for e in IR_node.in_edges: if e == 'data': continue self.IR_layer_map[e].out_edges = [x if not self.IR_layer_map[x].name == IR_node.variable_name else IR_node.variable_name + "_pad" for x in self.IR_layer_map[e].out_edges] return code def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_FullyConnected(self, IR_node): if self.weight_loaded: weight_dict = self.weights[IR_node.name] parent = self.IR_graph.get_parent(IR_node.name, [0]) while parent.type == "Flatten": parent = self.IR_graph.get_parent(parent.name, [0]) dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) if dim > 2: original_dims = weight_dict['weights'].shape dims = [i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1] weight_dict['weights'] = np.reshape(weight_dict['weights'], dims) weight_dict['weights'] = np.transpose(weight_dict['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) weight_dict['weights'] = np.reshape(weight_dict['weights'], original_dims) self.output_weights[IR_node.name + "_weight"] = weight_dict['weights'].transpose((1, 0)) num_hidden = IR_node.IR_layer.attr["units"].i no_bias = not IR_node.IR_layer.attr["use_bias"].b if not no_bias and self.weight_loaded: self.output_weights[IR_node.name + "_bias"] = weight_dict['bias'] code = "{:<15} = mx.sym.FullyConnected(data = {}, num_hidden = {}, no_bias = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), num_hidden, no_bias, IR_node.name) return code def _emit_convolution(self, IR_node, pattern): if self.weight_loaded: weight_dict = self.weights[IR_node.name] weights = weight_dict['weights'] dim = len(IR_node.IR_layer.attr["kernel_shape"].list.i) - 2 kernel = list() for idx in range(0, dim): kernel.append(IR_node.IR_layer.attr["kernel_shape"].list.i[idx]) stride = list() for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]: stride.append(e) dilate = list() for e in IR_node.IR_layer.attr["dilations"].list.i[1:-1]: dilate.append(e) dilate = ', '.join('%s' % i for i in dilate) defuse_pad = False pad = list() if "pads" in IR_node.IR_layer.attr: output_shape = list() for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim: output_shape.append(e.size) # print("Warning: MXNet Convolution Layer pad does not match IR Convolution Layer pad") defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["pads"].list.i) num_filter = 0 if pattern == "Deconvolution": num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-2] else: num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-1] use_bias = IR_node.get_attr('use_bias', False) if use_bias and self.weight_loaded: self.output_weights[IR_node.name + "_bias"] = weight_dict['bias'] if pattern == "DepthwiseConv": num_group = IR_node.IR_layer.attr["kernel_shape"].list.i[-2] num_filter = num_filter * num_group pattern = "Convolution" if self.weight_loaded: weights = np.swapaxes(weights, -1, -2) else: num_group = IR_node.get_attr('group', 1) # layout = IR_node.IR_layer.attr["data_format"].s if dim == 1: layout = 'NCW' elif dim == 2: layout = 'NCHW' elif dim == 3: layout = 'NCDHW' if self.weight_loaded: # if layout not in MXNetEmitter.channels_last: weights = MXNetEmitter.transpose(weights, dim) self.output_weights[IR_node.name + "_weight"] = weights code = "" if not defuse_pad: code += "{:<15} = mx.sym.{}(data={}, kernel={}, stride={}, dilate = ({}), pad={}, num_filter = {}, num_group = {}, no_bias = {}, layout = '{}', name = '{}')".format( IR_node.variable_name, pattern, self.parent_variable_name(IR_node), tuple(kernel), tuple(stride), dilate, tuple(pad), num_filter, num_group, not use_bias, layout, IR_node.name) else: code += self.set_pad(IR_node, code, pad, False) code += "\n {:<15} = mx.sym.{}(data={}, kernel={}, stride={}, dilate = ({}), num_filter = {}, num_group = {}, no_bias = {}, layout = '{}', name = '{}')".format( IR_node.variable_name, pattern, IR_node.variable_name + "_pad", tuple(kernel), tuple(stride), dilate, num_filter, num_group, not use_bias, layout, IR_node.name) return code def emit_Conv(self, IR_node): return self._emit_convolution(IR_node, "Convolution") def emit_DepthwiseConv(self, IR_node): return self._emit_convolution(IR_node, "DepthwiseConv") def emit_ConvTranspose(self, IR_node): return self._emit_convolution(IR_node, "Deconvolution") def emit_DataInput(self, IR_node): shape = list() shape.extend(IR_node.IR_layer.attr["shape"].list.i) code = "{:<15} = mx.sym.var('{}')".format(IR_node.variable_name, IR_node.name) return code # Add LeakyReLU Elu(slope not support) def emit_Activation(self, IR_node, act_type): act_type = act_type func_name = "" if act_type == "elu": func_name = "LeakyReLU" else: func_name = "Activation" code = "{:<15} = mx.sym.{}(data = {}, act_type = '{}', name = '{}')".format( IR_node.variable_name, func_name, self.parent_variable_name(IR_node), act_type, IR_node.name) return code def emit_BatchNorm(self, IR_node): if self.weight_loaded: weight_dict = self.weights[IR_node.name] # axis = IR_node.IR_layer.attr["axis"].i axis = 1 eps = IR_node.IR_layer.attr["epsilon"].f momentum = IR_node.IR_layer.attr["momentum"].f fix_gamma = not IR_node.IR_layer.attr["scale"].b if self.weight_loaded: if not fix_gamma: self.output_weights[IR_node.name + "_gamma"] = weight_dict['scale'] self.output_weights[IR_node.name + "_beta"] = weight_dict['bias'] # not supported yet use_global_stats = "False" if self.weight_loaded: self.output_weights[IR_node.name + "_moving_var"] = weight_dict['var'] self.output_weights[IR_node.name + "_moving_mean"] = weight_dict['mean'] code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), axis, eps, momentum, fix_gamma, use_global_stats, IR_node.name) return code def emit_Pool(self, IR_node): global_pool = IR_node.IR_layer.attr["global_pooling"].b kernel = list() if global_pool: kernel = [1] * (len(IR_node.IR_layer.attr["strides"].list.i) - 2) else: for e in IR_node.IR_layer.attr["kernel_shape"].list.i[1:-1]: kernel.append(e) pool_type = IR_node.get_attr('pooling_type').lower() stride = list() for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]: stride.append(e) defuse_pad = False pad = list() if "pads" in IR_node.IR_layer.attr: output_shape = list() for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim: output_shape.append(e.size) # print("Warning: MXNet Pooling Layer pad does not match IR Pooling Layer pad") defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["pads"].list.i) code = "" if not defuse_pad: code += "{:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel={}, pool_type = '{}', stride={}, pad={}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), global_pool, tuple(kernel), pool_type, tuple(stride), tuple(pad), IR_node.name) else: code += self.set_pad(IR_node, code, pad, pool_type == "max") code += "\n {:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel={}, pool_type = '{}', stride={}, name = '{}')".format( IR_node.variable_name, IR_node.variable_name + "_pad", global_pool, tuple(kernel), pool_type, tuple(stride), IR_node.name) return code def emit_SoftmaxOutput(self, IR_node): code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format( IR_node.variable_name, self.parent_variable_name(IR_node) ) return code def emit_Softmax(self, IR_node): code = "" if len(IR_node.out_edges) == 0: code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format( IR_node.variable_name, self.parent_variable_name(IR_node)) else: axis = IR_node.IR_layer.attr["dim"].i code = "{:<15} = mx.sym.softmax(data = {}, axis = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), axis, IR_node.name) return code def emit_Squeeze(self, IR_node): return self.emit_Flatten(IR_node) # def emit_ConvTranspose(self, IR_node): # if self.weight_loaded: # weight_dict = self.weights[IR_node.name] # weights = weight_dict['weights'] # dim = len(IR_node.IR_layer.attr["kernel_shape"].list.i) - 2 # kernel = list() # for idx in range(0, dim): # kernel.append(IR_node.IR_layer.attr["kernel_shape"].list.i[idx]) # stride = list() # for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]: # stride.append(e) # dilate = list() # for e in IR_node.IR_layer.attr["dilations"].list.i[1:-1]: # dilate.append(e) # dilate = ', '.join('%s' % i for i in dilate) # defuse_pad = False # pad = list() # if "pads" in IR_node.IR_layer.attr: # output_shape = list() # for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim: # output_shape.append(e.size) # # print("Warning: MXNet Deconvolution Layer pad does not match IR Deconvolution Layer pad") # defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["pads"].list.i) # pad = ', '.join('%s' % i for i in pad) # kernel = ', '.join('%s' % i for i in kernel) # stride = ', '.join('%s' % i for i in stride) # num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-2] # no_bias = not IR_node.IR_layer.attr["use_bias"].b # if not no_bias and self.weight_loaded: # self.output_weights[IR_node.replace_scope(IR_node.name) + "_bias"] = weight_dict['bias'] # # layout = IR_node.IR_layer.attr["data_format"].s # if dim == 1: # layout = 'NCW' # elif dim == 2: # layout = 'NCHW' # elif dim == 3: # layout = 'NCDHW' # if self.weight_loaded: # # if layout not in MXNetEmitter.channels_last: # weights = MXNetEmitter.transpose(weights, dim) # self.output_weights[IR_node.replace_scope(IR_node.name) + "_weight"] = weights # code = "" # if not defuse_pad: # code = "{:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), pad = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format( # IR_node.replace_scope(IR_node.name), # IR_node.replace_scope(IR_node.in_edges[0]), # kernel, # stride, # dilate, # pad, # num_filter, # no_bias, # layout, # IR_node.replace_scope(IR_node.name)) # else: # code = self.set_pad(IR_node, code, pad) # code += "\n {:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format( # IR_node.replace_scope(IR_node.name), IR_node.replace_scope(IR_node.name) + "_pad", kernel, stride, dilate, num_filter, no_bias, layout, IR_node.replace_scope(IR_node.name)) # return code def emit_Embedding(self, IR_node): input_dim = IR_node.IR_layer.attr["input_dim"].i output_dim = IR_node.IR_layer.attr["output_dim"].i dtype = MXNetEmitter.dtype_map.get(IR_node.layer.attr["dtype"].type, "float32") code = "{:<15} = mx.sym.Embedding(data = {}, input_dim = {}, output_dim = {}, dtype = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), input_dim, output_dim, dtype, IR_node.name) return code # def emit_LeakyReLU(self, IR_node): # # IR only support Elu, the same problem with func emit_Activation # code = "{:<15} = mx.sym.LeakyReLU(data = {}, )".format() # return code # raise NotImplementedError def emit_Dropout(self, IR_node): p = IR_node.IR_layer.attr["keep_prob"].f mode = IR_node.IR_layer.attr["mode"].s.lower().decode() if 'mode' in IR_node.layer.attr else 'training' code = "{:<15} = mx.sym.Dropout(data = {}, p = {}, mode = '{}', name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), p, mode, IR_node.name) return code # reverse cannot support yet def emit_Reshape(self, IR_node): shape = list() for e in IR_node.IR_layer.attr["shape"].list.i: shape.append(e) shape = ', '.join('%s' % i for i in shape) reverse = False code = "{:<15} = mx.sym.reshape(data = {}, shape = ({}), reverse = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), shape, reverse, IR_node.name) return code def emit_Flatten(self, IR_node): # code = "{:<15} = mx.sym.transpose(data = {}, axes = (0, 2, 3, 1))\n".format("trans", self.parent_variable_name(IR_node)) code = "{:<15} = mx.sym.flatten(data = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name) return code @staticmethod def _convert_axis(IR_node, axis): ndim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) if axis == 0: return 0 elif axis == ndim - 1: return 1 else: return axis + 1 def emit_Concat(self, IR_node): dim = MXNetEmitter._convert_axis(IR_node, IR_node.IR_layer.attr["axis"].i) code = "{:<15} = mx.sym.concat({}, dim = {}, name = '{}')".format( IR_node.variable_name, ', '.join(self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges), dim, IR_node.name) return code def emit_Cast(self, IR_node): dtype = IR_node.IR_layer.attr["dtype"].type code = "{:<15} = mx.sym.cast(data = {}, dtype = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), dtype, IR_node.name) return code def emit_Expand_dims(self, IR_node): axis = IR_node.IR_layer.attr["axis"].i code = "{:<15} = mx.sym.expand_dims(data = {}, axis = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), axis, IR_node.name) return code def emit_Pad(self, IR_node): mode = IR_node.IR_layer.attr["mode"].s.lower().decode() pad_width = list() pad_width.extend([0]*4) padding = convert_onnx_pad_to_tf(IR_node.get_attr("pads"))[1:-1] for padding_pair in padding: pad_width.extend(padding_pair) pad_width = ', '.join('%s' % i for i in pad_width) code = "{:<15} = mx.sym.pad(data = {}, mode = '{}', pad_width = ({}), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), mode, pad_width, IR_node.name) return code def emit_Add(self, IR_node): code = "{:<15} = mx.sym.broadcast_add({}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_Mul(self, IR_node): code = "{:<15} = mx.sym.broadcast_mul({}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_ReduceMean(self, IR_node): axes = IR_node.layer.attr['axes'].list.i[:] axes = ','.join('%s' % MXNetEmitter.transpose_map[i] for i in axes) code = "{:<15} = mx.sym.mean(data = {}, axis = ({}), keepdims = {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), axes, IR_node.layer.attr['keepdims'].b) return code def emit_LRN(self, IR_node): code = "{:<15} = mx.sym.LRN(data = {}, alpha = {}, beta = {}, knorm = {}, nsize = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f, IR_node.layer.attr['k'].f, IR_node.layer.attr['size'].i * 2 - 1, IR_node.name) return code def emit_Constant(self, IR_node): raise NotImplementedError() code = "{:<15} = mx.sym.identity(name='{}')".format(IR_node.variable_name, IR_node.name) self.output_weights[IR_node.name + '_data'] = self.weights[IR_node.name]['value'] return code def emit_Sub(self, IR_node): code = "{:<15} = mx.sym.broadcast_sub({}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_Relu6(self, IR_node): self.add_body(1, self.emit_Activation(IR_node, 'relu')) old_name = IR_node.variable_name IR_node.real_name = IR_node.real_name + "_clip" self.add_body(1, "{:<15} = mx.sym.clip({}, a_min=0, a_max=6, name='{}')".format( IR_node.real_variable_name, old_name, IR_node.real_name)) return ""
class MXNetEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16 : "float16", graph_pb2.DT_FLOAT32 : "float32", graph_pb2.DT_FLOAT64 : "float64", graph_pb2.DT_INT32 : "int32", graph_pb2.DT_UINT8 : "uint8" } activation_map = { "relu" : "Relu", "sigmoid" : "Sigmoid", "tanh" : "Tanh", "elu" : "Elu" } transpose_map = { 1 : 2, 2 : 3, -1 : 1 } channels_last = ['NDHWC', 'NHWC'] def __init__(self, model): super(MXNetEmitter, self).__init__() from six import string_types as _string_types if isinstance(model, _string_types): network_path = model self.weight_loaded = False elif len(model) == 3: network_path = model[0] weight_path = model[1] self.output_weights_file = model[2] self.weights = np.load(weight_path).item() self.weight_loaded = True self.output_weights = dict() else: raise ValueError("the # of input arguments [{}] is not supported" % len(model)) self.IR_graph = IRGraph(network_path) self.IR_graph.build() @property def header_code(self): return """import mxnet as mx import numpy as np import math # mxnet-cpu only support channel first, default convert the model and weight as channel first def RefactorModel(): """ def gen_code(self, phase): self.IR_layer_map = dict() self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: self.IR_layer_map[layer] = self.IR_graph.get_node(layer) shape = dict() for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if len(current_node.in_edges) == 0: current_node.in_edges.append('data') if node_type.lower() in MXNetEmitter.activation_map: func = getattr(self, "emit_Activation") line = func(current_node, MXNetEmitter.activation_map[node_type.lower()].lower()) self.add_body(1, line) elif hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(current_node) if line != None: self.add_body(1, line) else: print("MXNet Emitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) if node_type == "DataInput": cur_shape = list() first = True for dim in current_node.IR_layer.attr["shape"].shape.dim: if dim.size == -1 and first: cur_shape.append(1) print("Detect input layer [{}] using infer batch size, set it as default value [1]".format(current_node.name)) else: if dim.size == -1: print("Warning: user should change input size manually") cur_shape.append(dim.size) first = False cur_shape.insert(1, cur_shape.pop()) shape[current_node.name] = ', '.join('%s' % i for i in cur_shape) if self.weight_loaded: fullpath = os.path.abspath(self.output_weights_file) dirname = os.path.dirname(fullpath) if not os.path.exists(dirname): os.makedirs(dirname) with open(self.output_weights_file, 'wb') as outfile: np.save(outfile, self.output_weights) comment = "\n # if a GPU is available, change mx.cpu() to mx.gpu()" last_line = "{:<15} = mx.mod.Module(symbol = {}, context = mx.cpu(), data_names = ['{}'])".format( "model", ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers if self.IR_graph.get_node(name).type != 'Pack']), ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.input_layers if self.IR_graph.get_node(name).type != 'Const'])) self.add_body(1, comment) self.add_body(1, last_line) self.add_body(1, "return model") weight_code = "" if not self.weight_loaded: weight_code += "# emitter does not detect any import weights, you may generate weights file manually\n" weight_code += self.gen_weight_code(shape, phase) main_code = "if __name__ == '__main__':\n model = RefactorModel()\n" if self.weight_loaded: main_code += " # remember to adjust params path\n model = deploy_weight(model, '{}')\n".format(self.output_weights_file) if phase == 'train': train_code = """def train(model): import logging logging.getLogger().setLevel(logging.DEBUG) model.fit(train_iter, # train data eval_data = val_iter, # validation data optimizer = 'sgd', # Defaults to 'sgd' optimizer_params = {'learning_rate':0.01}, # use fixed learning rate eval_metric = 'acc', # report accuracy during training, other possible predefined metrics are: 'ce', 'f1', 'mae', 'mse', 'rmse', 'top_k_accuracy' batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches num_epoch = 10) # train for at most 10 dataset passes\n\n """ code = self.body_code + weight_code + train_code + main_code else: test_code = """from collections import namedtuple Batch = namedtuple('Batch', ['data']) def get_image(url, show=False): import cv2 # download and show the image fname = mx.test_utils.download(url) img = cv2.cvtColor(cv2.imread(fname), cv2.COLOR_BGR2RGB) if img is None: return None if show: import matplotlib.pyplot as plt plt.imshow(img) plt.axis('off') # convert into format (batch, RGB, width, height) img = cv2.resize(img, (224, 224)) img = np.swapaxes(img, 0, 2) img = np.swapaxes(img, 1, 2) img = img[np.newaxis, :] return img def predict(model, labels, url): # to show the image, change the argument show into True img = get_image(url, show = False) # compute the predict probabilities model.forward(Batch([mx.nd.array(img)])) prob = model.get_outputs()[0].asnumpy() # print the top-5 prob = np.squeeze(prob) a = np.argsort(prob)[::-1] for i in a[0:5]: print('prbability = %f, class = %s' %(prob[i], labels[i]))\n\n """ main_code += """ # # call function predict # with open('synset.txt', 'r') as f: # labels = [l.rstrip() for l in f] # predict(model, labels, 'http://writm.com/wp-content/uploads/2016/08/Cat-hd-wallpapers.jpg') """ code = self.body_code + weight_code + test_code + main_code return code def gen_weight_code(self, shape, phase): str = "def deploy_weight(model, weight_file):\n" str += """ if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() arg_params = dict() aux_params = dict() for weight_name, weight_data in weights_dict.items(): weight_name = str(weight_name) if "moving" in weight_name: aux_params[weight_name] = mx.nd.array(weight_data) else: arg_params[weight_name] = mx.nd.array(weight_data) """ if phase == 'train': str += " model.bind(for_training = True, data_shapes = [" else: str += " model.bind(for_training = False, data_shapes = [" first = True for k, v in shape.items(): if not first: str += ", " str += "('" + k + "', " + "(" + v + "))" first = False str += "])\n" str += " model.set_params(arg_params = arg_params, aux_params = aux_params, allow_missing = True)\n\n return model\n\n\n" return str @staticmethod def calculate_same_pad(data_shape, kernel, stride): if (data_shape % stride == 0): pad = max(kernel - stride, 0) else: pad = max(kernel - (data_shape % stride), 0) if pad % 2 == 0: return False, pad else: return True, pad @staticmethod def transfer_pad(pad_list): defuse_pad = False pad = list() assert len(pad_list) % 2 == 0 mid = int(len(pad_list)/2) pad_first = pad_list[1:mid-1] pad_second = pad_list[mid+1:-1] for i in range(0, mid-2): if not pad_first[i] == pad_second[i]: defuse_pad = True if defuse_pad: pad.extend([0] * 4) for i in range(0, mid-2): pad.extend([pad_first[i], pad_second[i]]) else: pad = pad_first return defuse_pad, pad @staticmethod def transpose(data, dim): if dim == 1: data = data.transpose((2, 1, 0)) elif dim == 2: data = data.transpose((3, 2, 0, 1)) elif dim == 3: data = data.transpose((4, 3, 0, 1, 2)) else: raise ValueError("The weight of dim {} cannot transpose" % dim) return data def set_pad(self, IR_node, code, pad, _max_pool): if _max_pool: constant_value = "float('-inf')" else: constant_value = "0.0" code = "{:<15} = mx.sym.pad(data = {}, mode = 'constant', pad_width={}, constant_value = {}, name = '{}')".format( IR_node.variable_name + "_pad", self.parent_variable_name(IR_node), tuple(pad), constant_value, IR_node.name + "_pad") for e in IR_node.in_edges: if e == 'data': continue self.IR_layer_map[e].out_edges = [x if not self.IR_layer_map[x].name == IR_node.variable_name else IR_node.variable_name + "_pad" for x in self.IR_layer_map[e].out_edges] return code def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_FullyConnected(self, IR_node): if self.weight_loaded: weight_dict = self.weights[IR_node.name] parent = self.IR_graph.get_parent(IR_node.name, [0]) while parent.type == "Flatten" or parent.type == 'Dropout': parent = self.IR_graph.get_parent(parent.name, [0]) dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) if dim > 2: original_dims = weight_dict['weights'].shape dims = [i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1] weight_dict['weights'] = np.reshape(weight_dict['weights'], dims) weight_dict['weights'] = np.transpose(weight_dict['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) weight_dict['weights'] = np.reshape(weight_dict['weights'], original_dims) self.output_weights[IR_node.name + "_weight"] = weight_dict['weights'].transpose((1, 0)) num_hidden = IR_node.IR_layer.attr["units"].i no_bias = not IR_node.IR_layer.attr["use_bias"].b if not no_bias and self.weight_loaded: self.output_weights[IR_node.name + "_bias"] = weight_dict['bias'] code = "{:<15} = mx.sym.FullyConnected(data = {}, num_hidden = {}, no_bias = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), num_hidden, no_bias, IR_node.name) return code def _emit_convolution(self, IR_node, pattern): if self.weight_loaded: weight_dict = self.weights[IR_node.name] weights = weight_dict['weights'] dim = len(IR_node.IR_layer.attr["kernel_shape"].list.i) - 2 kernel = list() for idx in range(0, dim): kernel.append(IR_node.IR_layer.attr["kernel_shape"].list.i[idx]) stride = list() for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]: stride.append(e) dilate = list() for e in IR_node.IR_layer.attr["dilations"].list.i[1:-1]: dilate.append(e) if dilate == []: dilate = [1, 1] dilate = ', '.join('%s' % i for i in dilate) defuse_pad = False pad = list() if "pads" in IR_node.IR_layer.attr: output_shape = list() for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim: output_shape.append(e.size) # print("Warning: MXNet Convolution Layer pad does not match IR Convolution Layer pad") defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["pads"].list.i) num_filter = 0 if pattern == "Deconvolution": num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-2] else: num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-1] use_bias = IR_node.get_attr('use_bias', False) if use_bias and self.weight_loaded: self.output_weights[IR_node.name + "_bias"] = weight_dict['bias'] if pattern == "DepthwiseConv": num_group = IR_node.IR_layer.attr["kernel_shape"].list.i[-2] num_filter = num_filter * num_group pattern = "Convolution" if self.weight_loaded: weights = np.swapaxes(weights, -1, -2) else: num_group = IR_node.get_attr('group', 1) # layout = IR_node.IR_layer.attr["data_format"].s if dim == 1: layout = 'NCW' elif dim == 2: layout = 'NCHW' elif dim == 3: layout = 'NCDHW' if self.weight_loaded: # if layout not in MXNetEmitter.channels_last: weights = MXNetEmitter.transpose(weights, dim) self.output_weights[IR_node.name + "_weight"] = weights code = "" if not defuse_pad: code += "{:<15} = mx.sym.{}(data={}, kernel={}, stride={}, dilate = ({}), pad={}, num_filter = {}, num_group = {}, no_bias = {}, layout = '{}', name = '{}')".format( IR_node.variable_name, pattern, self.parent_variable_name(IR_node), tuple(kernel), tuple(stride), dilate, tuple(pad), num_filter, num_group, not use_bias, layout, IR_node.name) else: code += self.set_pad(IR_node, code, pad, False) code += "\n {:<15} = mx.sym.{}(data={}, kernel={}, stride={}, dilate = ({}), num_filter = {}, num_group = {}, no_bias = {}, layout = '{}', name = '{}')".format( IR_node.variable_name, pattern, IR_node.variable_name + "_pad", tuple(kernel), tuple(stride), dilate, num_filter, num_group, not use_bias, layout, IR_node.name) return code def emit_Conv(self, IR_node): return self._emit_convolution(IR_node, "Convolution") def emit_DepthwiseConv(self, IR_node): return self._emit_convolution(IR_node, "DepthwiseConv") def emit_ConvTranspose(self, IR_node): return self._emit_convolution(IR_node, "Deconvolution") def emit_DataInput(self, IR_node): shape = list() shape.extend(IR_node.IR_layer.attr["shape"].list.i) code = "{:<15} = mx.sym.var('{}')".format(IR_node.variable_name, IR_node.name) return code # Add LeakyReLU Elu(slope not support) def emit_Activation(self, IR_node, act_type): act_type = act_type func_name = "" if act_type == "elu": func_name = "LeakyReLU" else: func_name = "Activation" code = "{:<15} = mx.sym.{}(data = {}, act_type = '{}', name = '{}')".format( IR_node.variable_name, func_name, self.parent_variable_name(IR_node), act_type, IR_node.name) return code def emit_BatchNorm(self, IR_node): IR_node_after = self.IR_graph.get_son(IR_node.name, [0]) if IR_node_after.type == 'Scale': if self.weight_loaded: weight_dict = self.weights[IR_node.name] weight_dict_scale = self.weights[IR_node_after.name] # axis = IR_node.IR_layer.attr["axis"].i axis = 1 eps = IR_node.IR_layer.attr["epsilon"].f momentum = IR_node.IR_layer.attr["momentum"].f fix_gamma = not IR_node.IR_layer.attr["scale"].b if self.weight_loaded: if not fix_gamma: self.output_weights[IR_node.name + "_gamma"] = np.multiply(weight_dict['scale'], weight_dict_scale['scale']) self.output_weights[IR_node.name + "_beta"] = np.multiply(weight_dict['bias'], weight_dict_scale['scale']) + weight_dict_scale['bias'] # not supported yet use_global_stats = "False" if self.weight_loaded: self.output_weights[IR_node.name + "_moving_var"] = weight_dict['var'] self.output_weights[IR_node.name + "_moving_mean"] = weight_dict['mean'] code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), axis, eps, momentum, fix_gamma, use_global_stats, IR_node.name) return code else: if self.weight_loaded: weight_dict = self.weights[IR_node.name] # axis = IR_node.IR_layer.attr["axis"].i axis = 1 eps = IR_node.IR_layer.attr["epsilon"].f momentum = IR_node.IR_layer.attr["momentum"].f fix_gamma = not IR_node.IR_layer.attr["scale"].b if self.weight_loaded: if not fix_gamma: self.output_weights[IR_node.name + "_gamma"] = weight_dict['scale'] self.output_weights[IR_node.name + "_beta"] = weight_dict['bias'] # not supported yet use_global_stats = "False" if self.weight_loaded: self.output_weights[IR_node.name + "_moving_var"] = weight_dict['var'] self.output_weights[IR_node.name + "_moving_mean"] = weight_dict['mean'] code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), axis, eps, momentum, fix_gamma, use_global_stats, IR_node.name) return code def emit_Scale(self, IR_node): if self.weight_loaded: weight_dict = self.weights[IR_node.name] # axis = IR_node.IR_layer.attr["axis"].i axis = 1 eps = 0.0 momentum = 0.0 fix_gamma = not IR_node.IR_layer.attr["scale"].b if self.weight_loaded: if not fix_gamma: self.output_weights[IR_node.name + "_gamma"] = weight_dict['scale'] self.output_weights[IR_node.name + "_beta"] = weight_dict['bias'] # not supported yet use_global_stats = "False" if self.weight_loaded: self.output_weights[IR_node.name + "_moving_var"] = weight_dict['scale_var'] self.output_weights[IR_node.name + "_moving_mean"] = weight_dict['scale_mean'] code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), axis, eps, momentum, fix_gamma, use_global_stats, IR_node.name) return code def emit_Pool(self, IR_node): global_pool = IR_node.IR_layer.attr["global_pooling"].b kernel = list() if global_pool: kernel = [1] * (len(IR_node.IR_layer.attr["strides"].list.i) - 2) else: for e in IR_node.IR_layer.attr["kernel_shape"].list.i[1:-1]: kernel.append(e) pool_type = IR_node.get_attr('pooling_type').lower() stride = list() for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]: stride.append(e) defuse_pad = False pad = list() if "pads" in IR_node.IR_layer.attr: output_shape = list() for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim: output_shape.append(e.size) # print("Warning: MXNet Pooling Layer pad does not match IR Pooling Layer pad") defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["pads"].list.i) code = "" if not defuse_pad: code += "{:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel={}, pool_type = '{}', stride={}, pad={}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), global_pool, tuple(kernel), pool_type, tuple(stride), tuple(pad), IR_node.name) else: code += self.set_pad(IR_node, code, pad, pool_type == "max") code += "\n {:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel={}, pool_type = '{}', stride={}, name = '{}')".format( IR_node.variable_name, IR_node.variable_name + "_pad", global_pool, tuple(kernel), pool_type, tuple(stride), IR_node.name) return code def emit_SoftmaxOutput(self, IR_node): code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format( IR_node.variable_name, self.parent_variable_name(IR_node) ) return code def emit_Softmax(self, IR_node): code = "" if len(IR_node.out_edges) == 0: code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format( IR_node.variable_name, self.parent_variable_name(IR_node)) else: axis = IR_node.IR_layer.attr["dim"].i code = "{:<15} = mx.sym.softmax(data = {}, axis = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), axis, IR_node.name) return code def emit_Squeeze(self, IR_node): return self.emit_Flatten(IR_node) # def emit_ConvTranspose(self, IR_node): # if self.weight_loaded: # weight_dict = self.weights[IR_node.name] # weights = weight_dict['weights'] # dim = len(IR_node.IR_layer.attr["kernel_shape"].list.i) - 2 # kernel = list() # for idx in range(0, dim): # kernel.append(IR_node.IR_layer.attr["kernel_shape"].list.i[idx]) # stride = list() # for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]: # stride.append(e) # dilate = list() # for e in IR_node.IR_layer.attr["dilations"].list.i[1:-1]: # dilate.append(e) # dilate = ', '.join('%s' % i for i in dilate) # defuse_pad = False # pad = list() # if "pads" in IR_node.IR_layer.attr: # output_shape = list() # for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim: # output_shape.append(e.size) # # print("Warning: MXNet Deconvolution Layer pad does not match IR Deconvolution Layer pad") # defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["pads"].list.i) # pad = ', '.join('%s' % i for i in pad) # kernel = ', '.join('%s' % i for i in kernel) # stride = ', '.join('%s' % i for i in stride) # num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-2] # no_bias = not IR_node.IR_layer.attr["use_bias"].b # if not no_bias and self.weight_loaded: # self.output_weights[IR_node.replace_scope(IR_node.name) + "_bias"] = weight_dict['bias'] # # layout = IR_node.IR_layer.attr["data_format"].s # if dim == 1: # layout = 'NCW' # elif dim == 2: # layout = 'NCHW' # elif dim == 3: # layout = 'NCDHW' # if self.weight_loaded: # # if layout not in MXNetEmitter.channels_last: # weights = MXNetEmitter.transpose(weights, dim) # self.output_weights[IR_node.replace_scope(IR_node.name) + "_weight"] = weights # code = "" # if not defuse_pad: # code = "{:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), pad = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format( # IR_node.replace_scope(IR_node.name), # IR_node.replace_scope(IR_node.in_edges[0]), # kernel, # stride, # dilate, # pad, # num_filter, # no_bias, # layout, # IR_node.replace_scope(IR_node.name)) # else: # code = self.set_pad(IR_node, code, pad) # code += "\n {:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format( # IR_node.replace_scope(IR_node.name), IR_node.replace_scope(IR_node.name) + "_pad", kernel, stride, dilate, num_filter, no_bias, layout, IR_node.replace_scope(IR_node.name)) # return code def emit_Embedding(self, IR_node): input_dim = IR_node.IR_layer.attr["input_dim"].i output_dim = IR_node.IR_layer.attr["output_dim"].i dtype = MXNetEmitter.dtype_map.get(IR_node.layer.attr["dtype"].type, "float32") code = "{:<15} = mx.sym.Embedding(data = {}, input_dim = {}, output_dim = {}, dtype = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), input_dim, output_dim, dtype, IR_node.name) return code def emit_LeakyRelu(self, IR_node): alpha = IR_node.IR_layer.attr['alpha'].f code = "{:<15} = mx.sym.LeakyReLU(data = {}, slope = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), alpha, IR_node.name ) return code def emit_Elu(self, IR_node): alpha = IR_node.IR_layer.attr['alpha'].f code = "{:<15} = mx.sym.LeakyReLU(data = {}, slope = {}, act_type = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), alpha, 'elu', IR_node.name ) return code def emit_Dropout(self, IR_node): p = IR_node.IR_layer.attr["keep_prob"].f mode = IR_node.IR_layer.attr["mode"].s.lower().decode() if 'mode' in IR_node.layer.attr else 'training' code = "{:<15} = mx.sym.Dropout(data = {}, p = {}, mode = '{}', name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), p, mode, IR_node.name) return code # reverse cannot support yet def emit_Reshape(self, IR_node): shape = list() for e in IR_node.IR_layer.attr["shape"].list.i: shape.append(e) shape = ', '.join('%s' % i for i in shape) reverse = False code = "{:<15} = mx.sym.reshape(data = {}, shape = ({}), reverse = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), shape, reverse, IR_node.name) return code def emit_Flatten(self, IR_node): # code = "{:<15} = mx.sym.transpose(data = {}, axes = (0, 2, 3, 1))\n".format("trans", self.parent_variable_name(IR_node)) code = "{:<15} = mx.sym.flatten(data = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name) return code @staticmethod def _convert_axis(IR_node, axis): ndim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) if axis == 0: return 0 elif axis == ndim - 1: return 1 else: return axis + 1 def emit_Concat(self, IR_node): dim = MXNetEmitter._convert_axis(IR_node, IR_node.IR_layer.attr["axis"].i) code = "{:<15} = mx.sym.concat({}, dim = {}, name = '{}')".format( IR_node.variable_name, ', '.join(self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges), dim, IR_node.name) return code def emit_Cast(self, IR_node): dtype = IR_node.IR_layer.attr["dtype"].type code = "{:<15} = mx.sym.cast(data = {}, dtype = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), dtype, IR_node.name) return code def emit_Expand_dims(self, IR_node): axis = IR_node.IR_layer.attr["axis"].i code = "{:<15} = mx.sym.expand_dims(data = {}, axis = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), axis, IR_node.name) return code def emit_Pad(self, IR_node): mode = IR_node.IR_layer.attr["mode"].s.lower().decode() pad_width = list() pad_width.extend([0]*4) padding = convert_onnx_pad_to_tf(IR_node.get_attr("pads"))[1:-1] for padding_pair in padding: pad_width.extend(padding_pair) pad_width = ', '.join('%s' % i for i in pad_width) code = "{:<15} = mx.sym.pad(data = {}, mode = '{}', pad_width = ({}), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), mode, pad_width, IR_node.name) return code def emit_Add(self, IR_node): code = "{:<15} = mx.sym.broadcast_add({}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_Mul(self, IR_node): code = "{:<15} = mx.sym.broadcast_mul({}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_ReduceMean(self, IR_node): axes = IR_node.layer.attr['axes'].list.i[:] axes = ','.join('%s' % MXNetEmitter.transpose_map[i] for i in axes) code = "{:<15} = mx.sym.mean(data = {}, axis = ({}), keepdims = {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), axes, IR_node.layer.attr['keepdims'].b) return code def emit_LRN(self, IR_node): code = "{:<15} = mx.sym.LRN(data = {}, alpha = {}, beta = {}, knorm = {}, nsize = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f, IR_node.layer.attr['k'].f, IR_node.layer.attr['size'].i * 2 - 1, IR_node.name) return code def emit_Constant(self, IR_node): raise NotImplementedError() code = "{:<15} = mx.sym.identity(name='{}')".format(IR_node.variable_name, IR_node.name) self.output_weights[IR_node.name + '_data'] = self.weights[IR_node.name]['value'] return code def emit_Sub(self, IR_node): code = "{:<15} = mx.sym.broadcast_sub({}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_Relu6(self, IR_node): self.add_body(1, self.emit_Activation(IR_node, 'relu')) old_name = IR_node.variable_name IR_node.real_name = IR_node.real_name + "_clip" self.add_body(1, "{:<15} = mx.sym.clip({}, a_min=0, a_max=6, name='{}')".format( IR_node.real_variable_name, old_name, IR_node.real_name)) return "" # def emit_Slice(self, IR_node): # starts = IR_node.get_attr('starts') # starts = [starts[0], starts[-1]] + starts[1:-1] # ends = IR_node.get_attr('ends') # ends = [ends[0], ends[-1]] + ends[1:-1] # ends = [i if i else None for i in ends] # strides = IR_node.get_attr('strides') # if strides: # strides = [strides[0], strides[-1]] + strides[1:-1] # self.add_body(1, "{:<15} = mx.sym.slice({}, begin={}, end={}, step={}, name='{}')".format( # IR_node.real_variable_name, # self.parent_variable_name(IR_node), # starts, # ends, # strides, # IR_node.name # )) # return "" def emit_Slice(self, IR_node): pass def emit_Const(self, IR_node): pass def emit_Shape(self, IR_node): pass def emit_Pack(self, IR_node): pass
class OnnxEmitter(Emitter): dtype_map = {graph_pb2.DT_FLOAT32: "TensorProto.FLOAT"} def __init__(self, architecture, weight): super(OnnxEmitter, self).__init__() if os.path.exists(architecture) == False: raise ValueError( "IR architecture file [{}] is not found.".format(architecture)) else: self.IR_graph = IRGraph(architecture) self.IR_graph.build() if os.path.exists(weight) == False: raise ValueError( "IR weight file [{}] is not found.".format(weight)) else: self._load_weights(weight) @property def header_code(self): return """import numpy as np from onnx import helper, TensorProto import onnx __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): global __weights_dict __weights_dict = load_weights(weight_file) """ def gen_code(self, phase): self.phase = phase self.add_body(0, self.header_code) self.inputs = [] self.outputs = [] self.nodes = [] self.initializer = [] for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("OnnxEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self._process_output_layers() self.add_body( 1, "graph = helper.make_graph([{}], 'mmdnn', [{}], [{}], [{}])". format(', '.join(self.nodes), ', '.join(self.inputs), ', '.join(self.outputs), ', '.join(self.initializer))) self.add_body(1, "return helper.make_model(graph)") return self.body_code def _process_output_layers(self): for name in self.IR_graph.output_layers: IR_node = self.IR_graph.get_node(name) shape_str = IRGraph.shapeToStr( IR_node.layer.attr["_output_shapes"].list.shape[0]) if IR_node.layer.attr['dtype'].type == graph_pb2.DT_UNDEFINED: IR_node.layer.attr['dtype'].type = graph_pb2.DT_FLOAT32 dtype_str = self.dtype_map[IR_node.layer.attr['dtype'].type] self.add_body( 1, "{:<15} = helper.make_tensor_value_info('{}', {}, ({},))". format(IR_node.variable_name + '_out', IR_node.variable_name, dtype_str, shape_str)) self.outputs.append(IR_node.variable_name + '_out') def emit_DataInput(self, IR_node): shape = [ dim.size if dim.size != -1 else 1 for dim in IR_node.IR_layer.attr["shape"].shape.dim ] shape_str = ', '.join('%s' % i for i in shape) dtype_str = self.dtype_map[IR_node.layer.attr['dtype'].type] self.add_body( 1, "{:<15} = helper.make_tensor_value_info('{}', {}, ({},))".format( IR_node.variable_name + '_orig', IR_node.variable_name + '_orig', dtype_str, shape_str)) self.add_body( 1, "{:15} = helper.make_node('Transpose', inputs=['{}'], outputs=['{}'], perm=[0, 3, 1, 2])" .format(IR_node.variable_name, IR_node.variable_name + '_orig', IR_node.variable_name)) self.inputs.append(IR_node.variable_name + '_orig') self.nodes.append(IR_node.variable_name) def emit_Conv(self, IR_node): dilations = list(IR_node.get_attr('dilations'))[1:-1] group = IR_node.get_attr('group', 1) kernel_shape = list(IR_node.get_attr('kernel_shape'))[:2] pads = IR_node.get_attr('pads') pad_length = len(pads) pads = pads[1:pad_length // 2 - 1] + pads[pad_length // 2 + 1:pad_length - 1] strides = list(IR_node.get_attr('strides'))[1:-1] self.add_body( 1, "{:15} = __weights_dict['{}']['weights']".format( IR_node.variable_name + '_weight_array', IR_node.name)) self.add_body( 1, "{} = {}.transpose([3,2,0,1])".format( IR_node.variable_name + '_weight_array', IR_node.variable_name + '_weight_array')) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))" .format(IR_node.variable_name + '_weight', IR_node.variable_name + '_weight', IR_node.variable_name + '_weight_array', IR_node.variable_name + '_weight_array', IR_node.variable_name + '_weight_array')) self.add_body( 1, "{:15} = helper.make_node('Conv', inputs=['{}', '{}'],outputs=['{}'], dilations={}, group={}, kernel_shape={}, pads={}, strides={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name + '_weight', IR_node.variable_name, dilations, group, kernel_shape, pads, strides)) self.nodes.append(IR_node.variable_name + '_weight') self.nodes.append(IR_node.variable_name) def emit_BatchNorm(self, IR_node): epsilon = IR_node.get_attr('epsilon') self.add_body( 1, "{:15} = __weights_dict['{}']['scale']".format( IR_node.variable_name + '_scale_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))" .format(IR_node.variable_name + '_scale', IR_node.variable_name + '_scale', IR_node.variable_name + '_scale_array', IR_node.variable_name + '_scale_array', IR_node.variable_name + '_scale_array')) self.add_body( 1, "{:15} = __weights_dict['{}']['bias']".format( IR_node.variable_name + '_bias_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))" .format(IR_node.variable_name + '_bias', IR_node.variable_name + '_bias', IR_node.variable_name + '_bias_array', IR_node.variable_name + '_bias_array', IR_node.variable_name + '_bias_array')) self.add_body( 1, "{:15} = __weights_dict['{}']['mean']".format( IR_node.variable_name + '_mean_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))" .format(IR_node.variable_name + '_mean', IR_node.variable_name + '_mean', IR_node.variable_name + '_mean_array', IR_node.variable_name + '_mean_array', IR_node.variable_name + '_mean_array')) self.add_body( 1, "{:15} = __weights_dict['{}']['var']".format( IR_node.variable_name + '_var_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))" .format(IR_node.variable_name + '_var', IR_node.variable_name + '_var', IR_node.variable_name + '_var_array', IR_node.variable_name + '_var_array', IR_node.variable_name + '_var_array')) self.add_body( 1, "{:15} = helper.make_node('BatchNormalization', inputs=['{}', '{}', '{}', '{}', '{}'],outputs=['{}'], epsilon={}, is_test={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name + '_scale', IR_node.variable_name + '_bias', IR_node.variable_name + '_mean', IR_node.variable_name + '_var', IR_node.variable_name, epsilon, 0 if self.phase == 'train' else 1)) self.nodes.append(IR_node.variable_name + '_scale') self.nodes.append(IR_node.variable_name + '_bias') self.nodes.append(IR_node.variable_name + '_mean') self.nodes.append(IR_node.variable_name + '_var') self.nodes.append(IR_node.variable_name) def emit_Relu(self, IR_node): self.add_body( 1, "{:15} = helper.make_node('Relu', inputs=['{}'], outputs=['{}'])". format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name)) self.nodes.append(IR_node.variable_name) def emit_Add(self, IR_node): input_layers = ', '.join(("'" + self.IR_graph.get_parent( IR_node.variable_name, [num]).real_variable_name) + "'" for num in range(0, len(IR_node.in_edges))) self.add_body( 1, "{:15} = helper.make_node('Add', inputs=[{}], outputs=['{}'])". format(IR_node.variable_name, input_layers, IR_node.variable_name)) self.nodes.append(IR_node.variable_name) def emit_Pool(self, IR_node): pooling_type = IR_node.get_attr('pooling_type') if IR_node.layer.attr['global_pooling'].b: if pooling_type == 'AVG': self.add_body( 1, "{:15} = helper.make_node('GlobalAveragePool', inputs=['{}'], outputs=['{}'])" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name)) self.nodes.append(IR_node.variable_name) else: print("OnnxEmitter has not supported Global Pool type [%s]." % (pooling_type)) self.emit_UNKNOWN(IR_node) else: if pooling_type in ['AVG', 'MAX']: if pooling_type == 'AVG': op_name = 'AveragePool' elif pooling_type == 'MAX': op_name = 'MaxPool' kernel_shape = list(IR_node.get_attr('kernel_shape')[1:-1]) pads = IR_node.get_attr('pads') pad_length = len(pads) pads = pads[1:pad_length // 2 - 1] + pads[pad_length // 2 + 1:pad_length - 1] strides = list(IR_node.get_attr('strides')[1:-1]) self.add_body( 1, "{:15} = helper.make_node('{}', inputs=['{}'],outputs=['{}'], kernel_shape={}, pads={}, strides={})" .format(IR_node.variable_name, op_name, self.parent_variable_name(IR_node), IR_node.variable_name, kernel_shape, pads, strides)) self.nodes.append(IR_node.variable_name) else: print("OnnxEmitter has not supported Pool type [%s]." % (pooling_type)) self.emit_UNKNOWN(IR_node) def emit_FullyConnected(self, IR_node): self.add_body( 1, "{:15} = __weights_dict['{}']['weights']".format( IR_node.variable_name + '_weight_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))" .format(IR_node.variable_name + '_weight', IR_node.variable_name + '_weight', IR_node.variable_name + '_weight_array', IR_node.variable_name + '_weight_array', IR_node.variable_name + '_weight_array')) self.add_body( 1, "{:15} = __weights_dict['{}']['bias']".format( IR_node.variable_name + '_bias_array', IR_node.name)) self.add_body( 1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))" .format(IR_node.variable_name + '_bias', IR_node.variable_name + '_bias', IR_node.variable_name + '_bias_array', IR_node.variable_name + '_bias_array', IR_node.variable_name + '_bias_array')) self.add_body( 1, "{:15} = helper.make_node('Gemm', inputs=['{}', '{}', '{}'],outputs=['{}'])" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name + '_weight', IR_node.variable_name + '_bias', IR_node.variable_name)) self.nodes.append(IR_node.variable_name + '_weight') self.nodes.append(IR_node.variable_name + '_bias') self.nodes.append(IR_node.variable_name) def emit_Pad(self, IR_node): mode = IR_node.layer.attr['mode'].s.decode() pads = IR_node.get_attr('pads') pad_length = len(pads) pads = [0, 0] + pads[1:pad_length // 2 - 1] + [ 0, 0 ] + pads[pad_length // 2 + 1:pad_length - 1] self.add_body( 1, "{:15} = helper.make_node('Pad', inputs=['{}'], outputs=['{}'], mode='{}', pads={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name, mode, pads)) self.nodes.append(IR_node.variable_name) def emit_Concat(self, IR_node): axis = IR_node.get_attr('axis') - 2 inputs = ', '.join("'" + self.IR_graph.get_node(i).real_variable_name + "'" for i in IR_node.in_edges) self.add_body( 1, "{:15} = helper.make_node('Concat', inputs=[{}], outputs=['{}'], axis={})" .format(IR_node.variable_name, inputs, IR_node.variable_name, axis)) self.nodes.append(IR_node.variable_name) def emit_Flatten(self, IR_node): self.add_body( 1, "{:15} = helper.make_node('Flatten', inputs=['{}'], outputs=['{}'])" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name)) self.nodes.append(IR_node.variable_name) def emit_Softmax(self, IR_node): self.add_body( 1, "{:15} = helper.make_node('Softmax', inputs=['{}'], outputs=['{}'])" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.variable_name)) self.nodes.append(IR_node.variable_name) def emit_UNKNOWN(self, IR_node): print(IR_node.IR_layer.name)
class CntkEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16: "np.float16", graph_pb2.DT_FLOAT32: "np.float32", graph_pb2.DT_FLOAT64: "np.float64", graph_pb2.DT_INT16: "np.float16", # Cntk does not support Int. graph_pb2.DT_INT32: "np.float32", # Cntk does not support Int. graph_pb2.DT_INT64: "np.float64", # Cntk does not support Int. graph_pb2.DT_UINT8: "np.uint8", graph_pb2.DT_UINT16: "np.uint16" } naive_scope_pattern = ['gru_cell', 'lstm_cell'] def __init__(self, model): from six import string_types as _string_types super(CntkEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) self.IR_graph = IRGraph(network_path) super(CntkEmitter, self)._build() self.yolo_parameter = [] folder = Folder(self.IR_graph, self.weights_dict) folder.fold() @property def header_code(self): return """import numpy as np import cntk from cntk import ops, layers from cntk.contrib.crosstalkcaffe.unimodel.cntkinstance import BlockApiSetup __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file, allow_pickle=True).item() except: weights_dict = np.load(weight_file, allow_pickle=True, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): global __weights_dict __weights_dict = load_weights(weight_file) """ def gen_code(self, phase='test'): self.phase = phase self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(current_node) if line: self.add_body(1, line) else: print("CntkEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body( 1, "return {}".format(','.join([ self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers ]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() self.add_body(0, "") for code in self.layers_codes.values(): self.add_body(0, code) return self.body_code @staticmethod def _shapeToStr(shapes): new_shape = filter(lambda x: x > -1, [dim.size for dim in shapes.dim]) return ', '.join('%s' % i for i in new_shape) @staticmethod def is_valid_padding(auto_pad, pads): """ different from utils.is_valid_padding """ if auto_pad: if auto_pad == 'VALID': return True elif auto_pad.startswith('SAME'): return False else: raise ValueError("Unknown padding type{}.".format(auto_pad)) else: lens = len(pads) assert lens % 2 == 0 for i in range(0, lens // 2): if pads[i] != 0: return False return True @staticmethod def is_ceil_mode(pads): lens = len(pads) for i in range(lens // 2 + 1, lens - 1): if pads[i] == pads[i - lens // 2]: return False else: return True def _defuse_padding(self, IR_node): auto_pad = IR_node.get_attr('auto_pad') if auto_pad: input_node = self.parent_variable_name(IR_node) if auto_pad == 'VALID': padding = False elif auto_pad.startswith("SAME"): padding = True else: raise ValueError("Unknown padding type [{}].".format(auto_pad)) return input_node, padding else: padding = IR_node.get_attr('pads') if not is_valid_padding(padding): dim = len(padding) // 2 padding_str = list() for i in xrange(1, dim): padding_str.append((padding[i], padding[i + dim])) input_node = IR_node.variable_name + '_pad' self.add_body( 1, "{:<15} = cntk.pad({}, pattern={})".format( input_node, self.parent_variable_name(IR_node), padding_str)) else: input_node = self.parent_variable_name(IR_node) return input_node, False def emit_Conv(self, IR_node): codes = list() if self.weight_loaded: self.used_layers.add('Conv') input_node, padding = self._defuse_padding(IR_node) dim = len(IR_node.get_attr('strides')) - 2 padding = [False] + [padding] * dim if IR_node.type == 'DepthwiseConv': groups = IR_node.get_attr('kernel_shape')[-2] codes.append( "__weights_dict['{}']['weights'] = np.swapaxes(__weights_dict['{}']['weights'], -1, -2)" .format(IR_node.real_name, IR_node.real_name)) else: groups = IR_node.get_attr('group', 1) codes.append( "{:<15} = convolution({}, is_transpose={}, strides={}, auto_padding={}, dilation={}, groups={}, name='{}')" .format(IR_node.variable_name, input_node, IR_node.type == 'ConvTranspose', tuple(IR_node.get_attr('strides')[1:-1]), padding, tuple(IR_node.get_attr('dilations', [1])), groups, IR_node.name)) else: codes.append( "{:<15} = Convolution(name = '{}', num_filters = {}, filter_shape = ({}), strides = ({},), pad = {}, bias = {})({})\n" .format( IR_node.variable_name, IR_node.name, IR_node.get_attr('kernel_shape')[-1], ', '.join('%s' % i for i in IR_node.layer.attr["kernel_shape"].list.i[:-2]), ', '.join( '%s' % i for i in IR_node.layer.attr['strides'].list.i[1:-1]), IR_node.get_attr('auto_pad') != 'VALID', IR_node.get_attr('use_bias'), self.parent_variable_name(IR_node))) return codes def emit_Pool(self, IR_node): input_node = self.IR_graph.get_node( IR_node.in_edges[0]).real_variable_name if IR_node.layer.attr['global_pooling'].b: self.used_layers.add('GlobalPooling') code = "{:<15} = global_pooling({}, '{}', name = '{}')".format( IR_node.variable_name, input_node, IR_node.get_attr('pooling_type'), IR_node.name) else: for e in IR_node.get_attr('dilations', []): assert e == 1 dim = len(IR_node.get_attr('kernel_shape')) - 2 padding = not self.is_valid_padding(IR_node.get_attr('auto_pad'), IR_node.get_attr('pads')) padding = [False] + [padding] * dim ceil_out_dim = self.is_ceil_mode(IR_node.get_attr('pads')) pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': pooling_type = cntk.MAX_POOLING elif pooling_type == 'AVG': pooling_type = cntk.AVG_POOLING else: raise ValueError if self.weight_loaded: self.used_layers.add(IR_node.type) code = "{:<15} = pooling({}, pooling_type={}, pooling_window_shape={}, strides={}, auto_padding={}, ceil_out_dim={})".format( IR_node.variable_name, input_node, pooling_type, tuple(IR_node.get_attr('kernel_shape')[1:-1]), tuple(IR_node.get_attr('strides')[1:-1]), padding, ceil_out_dim) else: raise NotImplementedError return code def emit_UNKNOWN(self, IR_node): print(IR_node.IR_layer.name) def emit_DataInput(self, IR_node): shape_str = self._shapeToStr(IR_node.IR_layer.attr["shape"].shape) dtype_str = ", dtype = {}".format( self.dtype_map[IR_node.layer.attr['dtype']. type]) if 'dtype' in IR_node.layer.attr else "" code = "{:<15} = cntk.sequence.input_variable(({},) {}, name='{}')".format( IR_node.variable_name, shape_str, dtype_str, IR_node.name) return code def emit_Dropout(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) if self.phase == 'train': code = "{:<15} = Dropout({}, name = '{}')({})".format( IR_node.variable_name, 1 - IR_node.get_attr('keep_prob'), IR_node.name, parent.real_variable_name) return code else: IR_node.real_name = parent.real_name def emit_FullyConnected(self, IR_node): input_node = self.parent_variable_name(IR_node) if self.weight_loaded: self.used_layers.add(IR_node.type) code = "{:<15} = dense({}, name = '{}')".format( IR_node.variable_name, input_node, IR_node.name) else: code = "{:<15} = Dense({}, bias = {}, name = '{}')({})".format( IR_node.variable_name, IR_node.layer.attr["units"].i, IR_node.layer.attr['use_bias'].b, IR_node.name, input_node) return code def emit_Flatten(self, IR_node): code = "{:<15} = ops.reshape({}, (-1,), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name) return code def emit_Reshape(self, IR_node): code = "{:<15} = cntk.reshape({}, shape={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), tuple(IR_node.get_attr('shape')), IR_node.name) return code def _emit_activation(self, IR_node, op_name): code = "{:<15} = layers.Activation(activation = {}, name = '{}')({})".format( IR_node.variable_name, op_name, IR_node.name, self.parent_variable_name(IR_node)) return code def emit_Tanh(self, IR_node): return self._emit_activation(IR_node, 'ops.tanh') def emit_Relu(self, IR_node): return self._emit_activation(IR_node, 'ops.relu') def emit_Softmax(self, IR_node): return self._emit_activation(IR_node, 'ops.softmax') def emit_Sigmoid(self, IR_node): return self._emit_activation(IR_node, 'ops.sigmoid') def emit_RNNs(self, IR_node, func): assert False def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): if len(IR_node.in_edges) > 1: inputs = ' + '.join( self.parent_variable_name(IR_node, i) for i in IR_node.in_edges) code = "{:<15} = {}".format(IR_node.variable_name, inputs) return code def emit_Sub(self, IR_node): if len(IR_node.in_edges) > 1: inputs = ' - '.join( self.parent_variable_name(IR_node, i) for i in IR_node.in_edges) code = "{:<15} = {}".format(IR_node.variable_name, inputs) return code def emit_Mul(self, IR_node): if len(IR_node.in_edges) > 1: inputs = ' * '.join( self.parent_variable_name(IR_node, i) for i in IR_node.in_edges) code = "{:<15} = {}".format(IR_node.variable_name, inputs) return code def emit_Constant(self, IR_node): if IR_node.get_attr('value'): code = "{:<15} = cntk.Constant(value={})".format( IR_node.variable_name, IR_node.get_attr('value')) else: code = "{:<15} = cntk.Constant(value=__weights_dict['{}']['value'])".format( IR_node.variable_name, IR_node.name) return code def emit_Concat(self, IR_node): inputs = ', '.join( self.parent_variable_name(IR_node, i) for i in IR_node.in_edges) for s in IR_node.in_edges: node = self.IR_graph.get_node(s) code = "{:<15} = cntk.splice({}, axis={}, name='{}')".format( IR_node.variable_name, inputs, IR_node.get_attr('axis') - 1, # why -1 ? IR_node.name) return code def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) code = "{:<15} = batch_normalization({}, epsilon={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('epsilon'), IR_node.name) return code def emit_Pad(self, IR_node): if IR_node.get_attr('mode') == 'constant': mode = 'mode = ops.CONSTANT_PAD, constant_value = {}'.format( IR_node.get_attr('constant_values', 0.0)) elif IR_node.get_attr('mode') == 'reflect': mode = 'mode = ops.REFLECT_PAD' elif IR_node.get_attr('mode') == 'SYMMETRIC': mode = 'mode = ops.SYMMETRIC_PAD' else: assert False padding = IR_node.get_attr('pads') padding = convert_onnx_pad_to_tf(padding)[1:] code = "{:<15} = ops.pad({}, pattern={}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), padding, mode) return code def emit_Squeeze(self, IR_node): IR_node.real_name = self.IR_graph.get_node( IR_node.in_edges[0]).real_name def emit_Log(self, IR_node): code = "{:<15} = _cntk.log({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name) return code def emit_Exp(self, IR_node): code = "{:<15} = _cntk.exp({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name) return code def emit_Embedding(self, IR_node): codes = list() codes.append( "{}_P = cntk.one_hot({}, __weights_dict['{}']['weights'].shape[0])" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name)) codes.append( "{:<15} = layers.Embedding(weights=__weights_dict['{}']['weights'])({}_P)" .format( IR_node.variable_name, # IR_node.get_attr('output_dim'), IR_node.name, IR_node.variable_name)) return codes def emit_Reciprocal(self, IR_node): code = "{:<15} = _cntk.reciprocal({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name) return code def emit_ReduceMean(self, IR_node): code = "{:<15} = ops.reduce_mean({}, axis = ({}), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join('%s' % (i - 1) for i in IR_node.get_attr('axes')), IR_node.name) return code def emit_LRN(self, IR_node): self.used_layers.add(IR_node.type) code = "{:<15} = lrn({}, k=1, n={}, alpha={}, beta={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['size'].i, IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f, IR_node.name) return code # ?? def emit_LeakRelu(self, IR_node): code = "{:<15} = _cntk.relu({}) - {} * _cntk.relu(-{})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('alpha'), self.parent_variable_name(IR_node)) return code def emit_LeakyRelu(self, IR_node): self.used_layers.add(IR_node.type) code = "{:<15} = _leaky_relu({}, {}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('alpha'), IR_node.name) return code def emit_UpSampling2D(self, IR_node): self.used_layers.add(IR_node.type) code = "{:<15} = Upsampling2D({}, stride = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('scales')[0], IR_node.name) return code def emit_ConvTranspose(self, IR_node): return self.emit_Conv(IR_node) def emit_yolo(self, IR_node): self.used_layers.add(IR_node.type) code = "{:<15} = {}".format(IR_node.variable_name, self.parent_variable_name(IR_node)) # print(IR_node.layer) self.yolo_parameter = [ IR_node.get_attr('anchors'), IR_node.get_attr('classes'), IR_node.get_attr("ignore_thresh"), IR_node.get_attr("jitter") ] # assert False return code def emit_Crop(self, IR_node): self.used_layers.add(IR_node.type) output_shape = IR_node.get_attr('_output_shapes')[0] output_shape = shape_to_list(output_shape)[1:] code = "{:<15} = _crop({}, {}, {}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('border')[:2], output_shape, IR_node.real_name) return code def emit_Relu6(self, IR_node): codes = list() codes.append(self.emit_Relu(IR_node)) codes.append("{:<15} = cntk.clip({}, 0, 6, name='{}_clip')".format( IR_node.variable_name + "_clip", IR_node.variable_name, IR_node.name)) IR_node.real_name = IR_node.name + '_clip' return codes def emit_DepthwiseConv(self, IR_node): return self.emit_Conv(IR_node) # def emit_Unstack(self, IR_node): # num_str = "{}.shape[{}]".format(self.parent_variable_name(IR_node), IR_node.get_attr('axis')) # axis = IR_node.get_attr('axis') # parent_variable_shape = "list({}.shape)".format(self.parent_variable_name(IR_node) # if self.IR_graph.get_parent(IR_node.name, [0]).type != 'Embedding' # else self.parent_variable_name(IR_node)+'.E') # if axis==1: # shape_str = "tuple([{}[0]*{}[{}], 1].extend({}[{}+1:]))".format( # parent_variable_shape, # parent_variable_shape, # str(axis), # parent_variable_shape, # str(axis)) # else: # shape_str = "tuple([{}[0]*{}[{}]].extend({}[1:{}]).append(1).extend({}[{}+1:]))".format( # parent_variable_shape, # parent_variable_shape, # str(axis), # parent_variable_shape, # str(axis), # parent_variable_shape, # str(axis)) # code = "{:<15} = cntk.reshape({}, {}, name='{}')".format( # IR_node.variable_name, # self.parent_variable_name(IR_node), # shape_str, # IR_node.variable_name) # code = "{: <15} = cntk.reshape({}, {}.shape, name='{}')".format( # IR_node.variable_name, # self.parent_variable_name(IR_node), # self.parent_variable_name(IR_node), # IR_node.name # ) # return code def emit_Shape(self, IR_node): parent_node = self.IR_graph.get_parent(IR_node.name, [0]) code = "{:<15} = {}.shape".format( IR_node.variable_name, self.parent_variable_name(IR_node) if parent_node.type != 'Embedding' else self.parent_variable_name(IR_node) + ".E") return code def emit_Slice(self, IR_node): starts = IR_node.get_attr('starts') if len(starts) > 1: starts = [starts[0], starts[-1]] + starts[1:-1] ends = IR_node.get_attr('ends') if len(ends) > 1: ends = [ends[0], ends[-1]] + ends[1:-1] extra_str = "" for idx, _ in enumerate(starts): if idx: extra_str += ", " extra_str += "{}:".format(starts[idx]) if ends[idx]: extra_str += "{}".format(ends[idx]) code = "{:<15} = {}[{}]".format(IR_node.variable_name, self.parent_variable_name(IR_node), extra_str) return code def emit_Split(self, IR_node): self.used_layers.add(IR_node.type) axis = IR_node.get_attr('axis') split_num = IR_node.get_attr('split') code = "{:<15} = split(input={}, axis={}, split_num={})".format( IR_node.variable_name, self.parent_variable_name(IR_node), str(axis), str(split_num)) return code # def emit_Fill(self, IR_node): # code = "{:<15} = cntk.Constant({}, {}, name='{}')".format( # IR_node.variable_name, # IR_node.get_attr('value'), # self.parent_variable_name(IR_node), # IR_node.name) # return code def emit_Unsqueeze(self, IR_node): code = "{:<15} = cntk.expand_dims({}, axis={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('axes')[0], IR_node.name) return code def emit_Scope(self, IR_node): pattern = IR_node.pattern if pattern not in self.naive_scope_pattern and re.sub( r'(_\d+)*$', '', IR_node.pattern) not in self.naive_scope_pattern: func = getattr(self, "_emit_" + pattern) code = func(IR_node) else: code = "{:<15} = __{}({})".format( IR_node.real_variable_name, IR_node.pattern, ', '.join( self.parent_variable_name(IR_node, s) for s in IR_node.in_edges)) self._gen_scope_code(IR_node) return code def _gen_scope_code(self, scope_node): def _scope_func(scope_name, params, code, return_var): code = """ def __{}({}): {} return {} """.format(scope_name, params, code, ', '.join(return_var)) return code if not self.layers_codes.get(scope_node.pattern, None): body_code = str() for node_name in scope_node.topology_list: node = self.IR_graph.get_node(node_name) node_type = node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(node) if line != None: body_code += " " + line + '\n' else: print("CntkEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(node) # param_code does not need parameter slice. input_params = scope_node.input_params param_code = ', '.join(input_params) function_code = _scope_func(scope_node.pattern, param_code, body_code, scope_node.return_variables) self.layers_codes[scope_node.pattern] = function_code def _emit_h_zero(self, IR_node): code = "{:<15} = cntk.Constant({}, (1, {}))".format( IR_node.variable_name, IR_node.get_attr('fill_value'), IR_node.get_attr('fill_size')) return code def _layer_Crop(self): self.add_body( 0, ''' def _crop(input, border, output_shape, **kwargs): dim = len(output_shape) output_shape = [output_shape[-1]] + output_shape[:-1] ref_tensor = np.zeros(shape=output_shape, dtype=np.float32) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = cntk.crop_manual(node_input=input, node_referent=ref_tensor, offset_x=border[0], offset_y=border[1]) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer ''') def _layer_LeakyRelu(self): self.add_body( 0, ''' def _leaky_relu(x, leak, name): return cntk.param_relu(cntk.constant((np.ones(x.shape)*leak).astype(np.float32)), x, name = name) ''') def _layer_yolo(self): self.add_body( 0, ''' def yolo_parameter(): return {} '''.format(self.yolo_parameter)) def _layer_UpSampling2D(self): self.add_body( 0, ''' def Upsampling2D(x, stride, name): assert stride == 2 xr = cntk.reshape(x, (x.shape[0], 1, x.shape[1], 1, x.shape[2])) xx = cntk.splice(xr, xr, axis = -2) xy = cntk.splice(xx, xx, axis = -4) r = cntk.reshape(xy, (x.shape[0] * 2, x.shape[1] * 2, x.shape[2]), name = name) return r ''') def _layer_LRN(self): self.add_body( 0, """ def lrn(input, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = BlockApiSetup.lrn(**kwargs)(input) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer """) def _layer_FullyConnected(self): self.add_body( 0, """ def dense(input, name, **kwargs): w = __weights_dict[name]['weights'] b = __weights_dict[name]['bias'] if 'bias' in __weights_dict[name] else None return BlockApiSetup.linear(output_shape=w.shape[1], input_shape=w.shape[0], scale_init=w, bias_init=b, name=name, **kwargs)(input) """) def _layer_Conv(self): self.add_body( 0, """ def convolution(input, is_transpose, name, **kwargs): dim = __weights_dict[name]['weights'].ndim if is_transpose: weight = np.transpose(__weights_dict[name]['weights'], [dim - 2, dim - 1] + list(range(0, dim - 2))) kwargs.pop('groups', None) else: weight = np.transpose(__weights_dict[name]['weights'], [dim - 1, dim - 2] + list(range(0, dim - 2))) w = cntk.Parameter(init=weight, name=name + '_weight') input = cntk.transpose(input, [dim - 2] + list(range(0, dim - 2))) if is_transpose: layer = ops.convolution_transpose(w, input, **kwargs) else: layer = ops.convolution(w, input, **kwargs) if 'bias' in __weights_dict[name]: bias = np.reshape(__weights_dict[name]['bias'], [-1] + [1] * (dim - 2)) b = cntk.Parameter(init=bias, name=name + '_bias') layer = layer + b layer = cntk.transpose(layer, list(range(1, dim - 1)) + [0]) return layer """) def _layer_Pool(self): self.add_body( 0, """ def pooling(input, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = ops.pooling(input, **kwargs) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer """) def _layer_GlobalPooling(self): self.add_body( 0, """ def global_pooling(input, type, **kwargs): dim = len(input.output.shape) input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) layer = layers.GlobalMaxPooling(**kwargs)(input) if type == 'MAX' else layers.GlobalAveragePooling(**kwargs)(input) layer = cntk.transpose(layer, list(range(1, dim)) + [0]) return layer """) def _layer_BatchNorm(self): self.add_body( 0, """ def batch_normalization(input, name, epsilon, **kwargs): mean = cntk.Parameter(init = __weights_dict[name]['mean'], name = name + "_mean") var = cntk.Parameter(init = __weights_dict[name]['var'], name = name + "_var") layer = (input - mean) / cntk.sqrt(var + epsilon) if 'scale' in __weights_dict[name]: scale = cntk.Parameter(init = __weights_dict[name]['scale'], name = name + "_scale") layer = scale * layer if 'bias' in __weights_dict[name]: bias = cntk.Parameter(init = __weights_dict[name]['bias'], name = name + "_bias") layer = layer + bias return layer """) def _layer_Split(self): self.add_body( 0, """ def split(input, axis, split_num): split_len = input.shape[axis] res = [] st = 0 for i in range(split_num): ed = st + split_len//split_num res.append(cntk.slice(input, axis, st, ed)) st += split_len//split_num return res """)
class CaffeEmitter(Emitter): def __init__(self, model): from six import string_types as _string_types super(CaffeEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) self.IR_graph = IRGraph(network_path) super(CaffeEmitter, self)._build() @property def header_code(self): return """import numpy as np import sys, argparse import caffe from caffe import layers as L from caffe import params as P from caffe import to_proto from six import text_type as _text_type __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): n = caffe.NetSpec() """ @property def end_code(self): return """ return n def make_net(prototxt): n = KitModel() with open(prototxt, 'w') as fpb: print(n.to_proto(), file=fpb) def gen_weight(weight_file, model, prototxt): global __weights_dict __weights_dict = load_weights(weight_file) net = caffe.Net(prototxt, caffe.TRAIN) for key in __weights_dict: if 'weights' in __weights_dict[key]: net.params[key][0].data.flat = __weights_dict[key]['weights'] elif 'mean' in __weights_dict[key]: net.params[key][0].data.flat = __weights_dict[key]['mean'] net.params[key][1].data.flat = __weights_dict[key]['var'] if 'scale' in __weights_dict[key]: net.params[key][2].data.flat = __weights_dict[key]['scale'] elif 'scale' in __weights_dict[key]: net.params[key][0].data.flat = __weights_dict[key]['scale'] if 'bias' in __weights_dict[key]: net.params[key][1].data.flat = __weights_dict[key]['bias'] net.save(model) return net if __name__=='__main__': parser = argparse.ArgumentParser(description='Generate caffe model and prototxt') parser.add_argument('--weight_file', '-w', type=_text_type, default='IR weight file') parser.add_argument('--prototxt', '-p', type=_text_type, default='caffe_converted.prototxt') parser.add_argument('--model', '-m', type=_text_type, default='caffe_converted.caffemodel') args = parser.parse_args() make_net(args.prototxt) gen_weight(args.weight_file, args.model, args.prototxt) """ def gen_code(self, phase = 'test'): self.phase = phase self.add_body(0, self.header_code) # for test # with open("graph.txt", 'w') as f: # for layer in self.IR_graph.topological_sort: # current_node = self.IR_graph.get_node(layer) # print("========current_node=========\n{}".format(current_node.layer), file=f) # test end for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type #print("========current_node={}".format(current_node.layer)) if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("CaffeEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body(0, "") self.add_body(0,self.end_code) return self.body_code def run(self, dstNetworkPath, dstWeightPath = None, phase = 'test'): super(CaffeEmitter, self).run(dstNetworkPath, dstWeightPath, phase) if self.weight_loaded: self.save_weights(self.weights_dict, dstWeightPath) @staticmethod def _shapeToStr(shapes): return [dim.size if dim.size > 0 else 1 for dim in shapes.dim] def check_if_need_transpose(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) while parent.type == 'Flatten': parent = self.IR_graph.get_parent(parent.name, [0]) dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) if dim > 2: original_dims = self.weights_dict[IR_node.name]['weights'].shape dims = [i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1] self.weights_dict[IR_node.name]['weights'] = np.reshape(self.weights_dict[IR_node.name]['weights'], dims) self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) self.weights_dict[IR_node.name]['weights'] = np.reshape(self.weights_dict[IR_node.name]['weights'], original_dims) def emit_Conv(self, IR_node): self.add_body(1, "n.{:<15} = L.Convolution(n.{}, kernel_size={}, stride={}, num_output={}, pad={}, group={}, \ bias_term={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('kernel_shape')[0], IR_node.get_attr('strides')[1], IR_node.get_attr('kernel_shape')[-1], IR_node.get_attr('pads')[1], IR_node.get_attr('group', 1), IR_node.get_attr('use_bias', False))) dim = len(IR_node.get_attr('strides')) - 2 if self.weight_loaded: self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim))) self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(IR_node.name) # keys = [] # for key in self.weights_dict[IR_node.name].keys(): # keys.append(key) # print("=======Layer: {}, keys: {}".format(IR_node.name, keys)) def emit_Pool(self, IR_node): pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': pooling_type = P.Pooling.MAX elif pooling_type == 'AVG': pooling_type = P.Pooling.AVE elif pooling_type == 'STOCHASTIC': pooling_type = P.Pooling.STOCHASTIC else: raise ValueError if IR_node.layer.attr['global_pooling'].b: self.used_layers.add('GlobalPooling') self.add_body(1, "n.{:<15} = L.Pooling(n.{}, pool={}, stride={}, global_pooling=True, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), pooling_type, IR_node.get_attr('strides')[1])) else: self.add_body(1, "n.{:<15} = L.Pooling(n.{}, pool={}, kernel_size={}, pad_h={}, pad_w={}, stride={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), pooling_type, IR_node.get_attr('kernel_shape')[1], IR_node.get_attr('pads')[1], IR_node.get_attr('pads')[2], IR_node.get_attr('strides')[1])) def emit_UNKNOWN(self, IR_node): print(IR_node.IR_layer.name) def emit_DataInput(self, IR_node): shape = self._shapeToStr(IR_node.get_attr('shape')) shape = [shape[0], shape[-1]] + shape[1:-1] self.add_body(1, "n.{:<15} = L.Input(shape=[dict(dim={})], ntop=1)".format( IR_node.variable_name, shape)) def emit_Dropout(self, IR_node): in_place = True self.add_body(1, "n.{:<15} = L.Dropout(n.{}, dropout_ratio={} , in_place={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), 1 - IR_node.get_attr('keep_prob'), in_place)) def emit_FullyConnected(self, IR_node): self.add_body(1, "n.{:<15} = L.InnerProduct(n.{}, num_output={}, bias_term={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr["units"].i, IR_node.get_attr('use_bias', False))) if self.weight_loaded: self.check_if_need_transpose(IR_node) self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], (1, 0)) self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(IR_node.name) def emit_BatchNorm(self, IR_node): self.add_body(1, "n.{:<15} = L.BatchNorm(n.{}, eps={}, use_global_stats={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('epsilon'), self.phase == 'test' )) scale_layer_var_name = IR_node.variable_name + "_scale" self.add_body(1, "n.{:<15} = L.Scale(n.{}, bias_term={}, ntop=1)".format( scale_layer_var_name, IR_node.variable_name, IR_node.get_attr('bias', False) )) IR_node.real_name = IR_node.name + "_scale" if self.weight_loaded: self.weights_dict[scale_layer_var_name] = dict() if 'scale' in self.weights_dict[IR_node.name]: self.weights_dict[scale_layer_var_name]['scale'] = self.weights_dict[IR_node.name]['scale'] #self.weights_dict[IR_node.name].pop('scale', None) self.weights_dict[IR_node.name]['scale'] = 1 self.weights_dict[scale_layer_var_name]['bias'] = self.weights_dict[IR_node.name]['bias'] self.weights_dict[IR_node.name].pop('bias', None) self.weights_dict[IR_node.variable_name] = self.weights_dict.pop(IR_node.name) def emit_LRN(self, IR_node): self.add_body(1, "n.{:<15} = L.LRN(n.{}, local_size={}, alpha={}, beta={}, k={})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('size') * 2 - 1, IR_node.get_attr('alpha'), IR_node.get_attr('beta'), IR_node.get_attr('k') )) def emit_Add(self, IR_node): input_layers = ', '.join(('n.' + self.IR_graph.get_parent(IR_node.name, [num]).real_variable_name) for num in range(0, len(IR_node.in_edges))) self.add_body(1, "n.{:<15} = L.Eltwise({}, operation=1, ntop=1)".format( IR_node.variable_name, input_layers, )) def emit_Flatten(self, IR_node): IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name def emit_Concat(self, IR_node): axis_array = (2, 3, 1, 0) axis = axis_array.index(IR_node.get_attr('axis')) input_layers = ', '.join(('n.' + self.IR_graph.get_node(edge).real_variable_name) for edge in IR_node.in_edges) self.add_body(1, "n.{:<15} = L.Concat({}, axis={})".format( IR_node.variable_name, input_layers, axis )) # def emit_Tanh(self, IR_node): # self._emit_activation(IR_node, 'ops.tanh') def emit_Relu(self, IR_node): in_place = True self.add_body(1, "n.{:<15} = L.ReLU(n.{}, in_place={}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node), in_place)) def emit_Softmax(self, IR_node): self.add_body(1, "n.{:<15} = L.Softmax(n.{}, ntop=1)".format( IR_node.variable_name, self.parent_variable_name(IR_node)))
class TensorflowEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16: "tf.float16", graph_pb2.DT_FLOAT32: "tf.float32", graph_pb2.DT_FLOAT64: "tf.float64", graph_pb2.DT_INT16: "tf.int16", graph_pb2.DT_INT32: "tf.int32", graph_pb2.DT_INT64: "tf.int64", graph_pb2.DT_UINT8: "tf.uint8", graph_pb2.DT_UINT16: "tf.uint16" } @property def header_code(self): return """import tensorflow as tf __weights_dict = dict() is_train = {} def load_weights(weight_file): import numpy as np if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict def KitModel(weight_file = None): global __weights_dict __weights_dict = load_weights(weight_file) """.format(self.trainable) def __init__(self, model): super(TensorflowEmitter, self).__init__() from six import string_types as _string_types if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) self.IR_graph = IRGraph(network_path) super(TensorflowEmitter, self)._build() def gen_code(self, phase): self.trainable = (phase == 'train') self.add_body(0, self.header_code) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("TensorflowEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body( 1, "return {}, {}".format( ', '.join([ self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.input_layers ]), ', '.join([ self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers ]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.body_code @staticmethod def _shapeToStr(shapes): ret = [dim.size if dim.size != -1 else 'None' for dim in shapes.dim] return ', '.join('%s' % i for i in ret) def emit_Conv(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')[1:-1]) input_node, padding = self._defuse_padding(IR_node) self.add_body( 1, "{:<15} = convolution({}, group={}, strides=[{}], padding='{}', name='{}')" .format(IR_node.variable_name, input_node, IR_node.get_attr('group', 1), strides_str, padding, IR_node.name)) def _defuse_padding(self, IR_node, extra_str=""): auto_pad = IR_node.get_attr('auto_pad') if auto_pad: input_node = self.parent_variable_name(IR_node) if auto_pad == 'VALID': padding = 'VALID' elif auto_pad.startswith("SAME"): padding = 'SAME' else: raise ValueError("Unknown padding type [{}].".format(auto_pad)) return input_node, padding else: padding = IR_node.get_attr("pads") padding = convert_onnx_pad_to_tf(padding) if is_valid_padding(padding) == False: input_node = IR_node.variable_name + '_pad' self.add_body( 1, "{:<15} = tf.pad({}, paddings = {}{})".format( input_node, self.parent_variable_name(IR_node), padding, extra_str)) else: input_node = self.parent_variable_name(IR_node) return input_node, 'VALID' def emit_Constant(self, IR_node): self.add_body( 1, "{:<15} = tf.constant(__weights_dict['{}']['value'], name='{}')". format(IR_node.variable_name, IR_node.name, IR_node.name)) def emit_Pool(self, IR_node): pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': op = 'max_pool' padding_const = ", constant_values=float('-Inf')" elif pooling_type == 'AVG': op = 'avg_pool' padding_const = "" else: raise ValueError("unknown pooling type [{}].".format(pooling_type)) arrlen = len(IR_node.get_attr('strides')) dim_str = '3d' if arrlen == 5 else "" if IR_node.layer.attr['global_pooling'].b: self.add_body( 1, "{:<15} = tf.nn.{}{}({}, [1] + {}.get_shape().as_list()[1:-1] + [1], strides = [1] * {}, padding = 'VALID', name = '{}')" .format(IR_node.variable_name, op, dim_str, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node), arrlen, IR_node.name)) else: kernel_shape_str = ', '.join( '%s' % i for i in IR_node.get_attr('kernel_shape')) strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')) input_node, padding = self._defuse_padding(IR_node, padding_const) self.add_body( 1, "{:<15} = tf.nn.{}{}({}, [{}], [{}], padding='{}', name='{}')". format(IR_node.variable_name, op, dim_str, input_node, kernel_shape_str, strides_str, padding, IR_node.name)) def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_Add(self, IR_node): self.add_body( 1, "{:<15} = {}".format( IR_node.variable_name, ' + '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) def emit_DataInput(self, IR_node): assert not IR_node.in_edges shape_str = self._shapeToStr(IR_node.layer.attr["shape"].shape) if 'dtype' in IR_node.layer.attr: dtype_str = "{}, ".format( self.dtype_map[IR_node.layer.attr['dtype'].type]) else: dtype_str = "tf.float32," code = "{:<15} = tf.placeholder({} shape = ({}), name = '{}')".format( IR_node.variable_name, dtype_str, shape_str, IR_node.name) self.add_body(1, code) def emit_Dropout(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) if self.trainable: self.add_body( 1, "{:<15} = Dropout(name = '{}', dropout_rate = {})({})".format( IR_node.variable_name, IR_node.name, 1 - IR_node.IR_layer.attr["keep_prob"].f, parent.real_variable_name)) else: IR_node.real_name = parent.real_name def emit_FullyConnected(self, IR_node): if IR_node.name in self.weights_dict and 'weights' in self.weights_dict[ IR_node.name]: kernel_str = "kernel_initializer = tf.constant_initializer(__weights_dict['{}']['weights']), ".format( IR_node.name) else: kernel_str = "" if IR_node.name in self.weights_dict and 'bias' in self.weights_dict[ IR_node.name]: bias_str = "bias_initializer = tf.constant_initializer(__weights_dict['{}']['bias']), ".format( IR_node.name) else: bias_str = "" code = "{:<15} = tf.layers.dense({}, {}, {}{}use_bias = {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['units'].i, kernel_str, bias_str, IR_node.layer.attr['use_bias'].b) self.add_body(1, code) def emit_Flatten(self, IR_node): #self._emit_unary_operation(IR_node, "contrib.layers.flatten") self.add_body( 1, "{:<15} = tf.contrib.layers.flatten({})".format( IR_node.variable_name, self.parent_variable_name(IR_node))) def emit_Mul(self, IR_node): self.add_body( 1, "{:<15} = {}".format( IR_node.variable_name, ' * '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) def emit_Reshape(self, IR_node): self.add_body( 1, "{:<15} = tf.reshape({}, [{}], '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join('%s' % i for i in IR_node.get_attr('shape')), IR_node.name)) def emit_Sub(self, IR_node): self.add_body( 1, "{:<15} = {}".format( IR_node.variable_name, ' - '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) def _emit_unary_operation(self, IR_node, op_name): self.add_body( 1, "{:<15} = tf.{}({}, name = '{}')".format( IR_node.variable_name, op_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_Tanh(self, IR_node): self._emit_unary_operation(IR_node, 'tanh') def emit_Elu(self, IR_node): self._emit_unary_operation(IR_node, 'nn.elu') def emit_Relu(self, IR_node): self._emit_unary_operation(IR_node, 'nn.relu') def emit_Relu6(self, IR_node): self._emit_unary_operation(IR_node, 'nn.relu6') def emit_CRelu(self, IR_node): self._emit_unary_operation(IR_node, 'nn.crelu') def emit_PRelu(self, IR_node): self.used_layers.add(IR_node.type) self.add_body( 1, "{:<15} = prelu({}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name)) def emit_Softmax(self, IR_node): self._emit_unary_operation(IR_node, 'nn.softmax') def emit_Sigmoid(self, IR_node): self._emit_unary_operation(IR_node, 'sigmoid') def emit_Embedding(self, IR_node): raise NotImplementedError() ret = "{:<15} = Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format( IR_node.name, IR_node.IR_layer.attr['input_dim'].i, IR_node.IR_layer.attr['output_dim'].i, IR_node.IR_layer.attr['mask_zero'].b, IR_node.in_edges[0]) return ret assert False def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Concat(self, IR_node): self.add_body( 1, "{:<15} = tf.concat([{}], {}, name = '{}')".format( IR_node.variable_name, ', '.join( self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges), IR_node.layer.attr['axis'].i, IR_node.name)) def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) self.add_body( 1, "{:<15} = batch_normalization({}, variance_epsilon={}, name='{}')". format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('epsilon'), IR_node.name)) def emit_Pad(self, IR_node): padding = IR_node.get_attr('pads') padding = convert_onnx_pad_to_tf(padding) mode = IR_node.get_attr('mode', 'constant') if mode == 'constant' or mode == 'reflect': mode = mode.upper() elif mode == 'edge': mode = 'SYMMETRIC' else: raise NotImplementedError( "Not support padding mode {}.".format(mode)) self.add_body( 1, "{:<15} = tf.pad({}, {}, '{}', name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), padding, mode, IR_node.variable_name)) def emit_Squeeze(self, IR_node): self.add_body( 1, "{:<15} = tf.squeeze({}, [{}], name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ', '.join('%s' % axis for axis in IR_node.layer.attr['axes'].list.i), IR_node.name)) def emit_ReduceMean(self, IR_node): self.add_body( 1, "{:<15} = tf.reduce_mean({}, [{}], {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), ','.join('%s' % i for i in IR_node.get_attr('axes')), IR_node.get_attr('keepdims'), IR_node.name)) def emit_LRN(self, IR_node): self.add_body( 1, "{:<15} = tf.nn.lrn({}, {}, alpha = {}, beta = {}, name = '{}')". format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.get_attr('size') - 1, IR_node.layer.attr['alpha'].f / (IR_node.layer.attr['size'].i * 2 - 1), IR_node.get_attr('beta'), IR_node.name)) def emit_SeparableConv(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')) input_node, padding = self._defuse_padding(IR_node) self.add_body( 1, "{:<15} = separable_convolution({}, strides = [{}], padding = '{}', name = '{}')" .format(IR_node.variable_name, input_node, strides_str, padding, IR_node.name)) def emit_DepthwiseConv(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i) input_node, padding = self._defuse_padding(IR_node) self.add_body( 1, "{:<15} = depthwise_convolution({}, strides = [{}], padding = '{}', name = '{}')" .format(IR_node.variable_name, input_node, strides_str, padding, IR_node.name)) def emit_Crop(self, IR_node): border = IR_node.get_attr('border') assert len(border) == 4 output_shape = IR_node.get_attr('_output_shapes')[0] output_shape = shape_to_list(output_shape) self.add_body( 1, "{:<15} = tf.image.crop_to_bounding_box({}, offset_height={}, offset_width={}, target_height={}, target_width={})" .format(IR_node.variable_name, self.parent_variable_name(IR_node), border[0], border[2], output_shape[1], output_shape[2])) def _layer_Conv(self): self.add_body( 0, """ def convolution(input, name, group, **kwargs): w = tf.Variable(__weights_dict[name]['weights'], trainable=is_train, name=name + "_weight") if group == 1: layer = tf.nn.convolution(input, w, **kwargs) else: weight_groups = tf.split(w, num_or_size_splits=group, axis=-1) xs = tf.split(input, num_or_size_splits=group, axis=-1) convolved = [tf.nn.convolution(x, weight, **kwargs) for (x, weight) in zip(xs, weight_groups)] layer = tf.concat(convolved, axis=-1) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable=is_train, name=name + "_bias") layer = layer + b return layer""") def _layer_PRelu(self): self.add_body( 0, """ def prelu(input, name): gamma = tf.Variable(__weights_dict[name]['gamma'], name=name + "_gamma", trainable=is_train) return tf.maximum(0.0, input) + gamma * tf.minimum(0.0, input) """) def _layer_BatchNorm(self): self.add_body( 0, """ def batch_normalization(input, name, **kwargs): mean = tf.Variable(__weights_dict[name]['mean'], name = name + "_mean", trainable = is_train) variance = tf.Variable(__weights_dict[name]['var'], name = name + "_var", trainable = is_train) offset = tf.Variable(__weights_dict[name]['bias'], name = name + "_bias", trainable = is_train) if 'bias' in __weights_dict[name] else None scale = tf.Variable(__weights_dict[name]['scale'], name = name + "_scale", trainable = is_train) if 'scale' in __weights_dict[name] else None return tf.nn.batch_normalization(input, mean, variance, offset, scale, name = name, **kwargs) """) def _layer_SeparableConv(self): self.add_body( 0, """ def separable_convolution(input, name, **kwargs): depthwise = tf.Variable(__weights_dict[name]['depthwise_filter'], trainable = is_train, name = name + "_df") pointwise = tf.Variable(__weights_dict[name]['pointwise_filter'], trainable = is_train, name = name + "_pf") layer = tf.nn.separable_conv2d(input, depthwise, pointwise, **kwargs) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias") layer = layer + b return layer""") def _layer_DepthwiseConv(self): self.add_body( 0, """ def depthwise_convolution(input, name, **kwargs): depthwise = tf.Variable(__weights_dict[name]['weights'], trainable = is_train, name = name + "_df") layer = tf.nn.depthwise_conv2d(input, depthwise, **kwargs) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias") layer = layer + b return layer""")
class PytorchEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16: "float16", graph_pb2.DT_FLOAT32: "float32", graph_pb2.DT_FLOAT64: "float64", graph_pb2.DT_INT16: "int16", graph_pb2.DT_INT32: "int32", graph_pb2.DT_INT64: "int64", graph_pb2.DT_UINT8: "uint8", graph_pb2.DT_UINT16: "uint16" } # Base Functions def __init__(self, model): super(PytorchEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] weight_path = model[1] self.init_code = str() self.IR_graph = IRGraph(network_path) self.IR_graph.build() self._load_weights(weight_path) def run(self, dstNetworkPath, dstWeightPath=None, phase='test'): super(PytorchEmitter, self).run(dstNetworkPath, dstWeightPath, phase) if self.weight_loaded: self.save_weights(self.weights_dict, dstWeightPath) def add_init(self, indent, codes): if isinstance(codes, _string_types): codes = [codes] for code in codes: self.init_code += (" " * indent) + code + '\n' @property def header_code(self): return """import numpy as np import torch import torch.nn as nn import torch.nn.functional as F __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict class KitModel(nn.Module): """ def gen_code(self, phase): self.add_init( 1, """ def __init__(self, weight_file): super(KitModel, self).__init__() global __weights_dict __weights_dict = load_weights(weight_file) """) self.add_body(1, "def forward(self, x):") for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(current_node) else: print("Pytorch Emitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body( 2, "return {}".format(','.join([ self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers ]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.header_code + '\n' + self.init_code + '\n' + self.body_code def _defuse_padding(self, IR_node, extra_str=""): input_node = self.parent_variable_name(IR_node) if IR_node.get_attr('auto_pad') == 'VALID': return input_node if is_valid_padding(IR_node.get_attr("pads")) == True: return input_node padding = self._convert_padding(IR_node) input_node = IR_node.variable_name + '_pad' self.add_body( 2, "{:<15} = F.pad({}, {}{})".format( input_node, self.parent_variable_name(IR_node), padding, extra_str)) return input_node def emit_Conv(self, IR_node): self.used_layers.add(IR_node.type) dim = len(IR_node.get_attr('strides')) - 2 in_channels = IR_node.get_attr('kernel_shape')[-2] filter = IR_node.get_attr('kernel_shape')[-1] kernel = IR_node.get_attr('kernel_shape')[:-2] strides = IR_node.get_attr('strides')[1:-1] self.add_init( 2, "self.{} = self.__conv({}, name='{}', in_channels={}, out_channels={}, kernel_size={}, stride={}, groups={}, bias={})" .format( IR_node.variable_name, dim, IR_node.name, in_channels, filter, tuple(kernel), tuple(strides), # padding, IR_node.get_attr('group', 1), IR_node.get_attr('use_bias'))) input_node = self._defuse_padding(IR_node) self.add_body( 2, "{:<15} = self.{}({})".format(IR_node.variable_name, IR_node.variable_name, input_node)) if self.weight_loaded: self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim))) @staticmethod def is_ceil_mode(pads): lens = len(pads) for i in range(lens // 2 + 1, lens - 1): if pads[i] == pads[i - lens // 2]: return False else: return True def emit_Pool(self, IR_node): dim = len(IR_node.get_attr('strides')) - 2 if IR_node.get_attr('pooling_type') == "MAX": pool_name = "max_pool{}d".format(dim) # exstr = ", value=float('-Inf')" elif IR_node.get_attr('pooling_type') == "AVG": pool_name = "avg_pool{}d".format(dim) # exstr = "" else: raise ValueError() if IR_node.layer.attr['global_pooling'].b: self.add_body( 2, "{:<15} = F.{}(input = {}, kernel_size = {}.size()[2:])". format(IR_node.variable_name, pool_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node))) else: for e in IR_node.get_attr('dilations', []): assert e == 1 pool_size = IR_node.get_attr('kernel_shape')[1:-1] strides = IR_node.get_attr('strides')[1:-1] padding = IR_node.get_attr('pads')[1:dim] ceil_mode = self.is_ceil_mode(IR_node.get_attr('pads')) # input_node = self._defuse_padding(IR_node, exstr) self.add_body( 2, "{:<15} = F.{}({}, kernel_size={}, stride={}, padding={}, ceil_mode={})" .format(IR_node.variable_name, pool_name, self.parent_variable_name(IR_node), tuple(pool_size), tuple(strides), tuple(padding), ceil_mode)) def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_DataInput(self, IR_node): # Ignore it in Pytorch IR_node.real_name = 'x' def emit_Dropout(self, IR_node): self.add_body( 2, "{:<15} = F.dropout(input = {}, p = {}, training = self.training, inplace = True)" .format(IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr["keep_prob"].f)) def check_if_need_transpose(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) while parent.type == 'Flatten': parent = self.IR_graph.get_parent(parent.name, [0]) dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) if dim > 2: original_dims = self.weights_dict[IR_node.name]['weights'].shape dims = [ i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:] ] + [-1] self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], dims) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) self.weights_dict[IR_node.name]['weights'] = np.reshape( self.weights_dict[IR_node.name]['weights'], original_dims) def emit_FullyConnected(self, IR_node): self.used_layers.add(IR_node.type) in_features = 1 for i in self.IR_graph.get_parent( IR_node.name, [0]).layer.attr['_output_shapes'].list.shape[0].dim[1:]: in_features *= i.size self.add_init( 2, "self.{} = self.__dense(name = '{}', in_features = {}, out_features = {}, bias = {})" .format(IR_node.variable_name, IR_node.name, in_features, IR_node.layer.attr["units"].i, IR_node.IR_layer.attr["use_bias"].b)) input_node = self.parent_variable_name(IR_node) if len( self.IR_graph.get_parent( IR_node.name, [0]).get_attr('_output_shapes')[0].dim) > 2: input_node = "{}.view({}.size(0), -1)".format( input_node, input_node) self.add_body( 2, "{:<15} = self.{}({})".format(IR_node.variable_name, IR_node.variable_name, input_node)) if self.weight_loaded: self.check_if_need_transpose(IR_node) self.weights_dict[IR_node.name]['weights'] = np.transpose( self.weights_dict[IR_node.name]['weights'], (1, 0)) def emit_Flatten(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name self.add_body( 2, "{:<15} = {}.view({}.size(0), -1)".format(IR_node.variable_name, parent, parent)) def emit_Reshape(self, IR_node): raise NotImplementedError shape_str = IRGraph.shapeToStr(IR_node.IR_layer.attr["shape"].shape, True) self.add_body( 1, "{:<15} = Reshape(name = \"{}\", target_shape = ({}))({})".format( IR_node.variable_name, IR_node.name, shape_str, self.IR_graph.get_node( IR_node.in_edges[0]).real_variable_name)) def emit_Tanh(self, IR_node): raise NotImplementedError() code = "{:<15} = Activation(name = '{}', activation = 'tanh')({})".format( IR_node.replace_scope(IR_node.name), IR_node.name, IR_node.replace_scope(IR_node.in_edges[0])) return code def emit_Relu(self, IR_node): self.add_body( 2, "{:<15} = F.relu({})".format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) def emit_Softmax(self, IR_node): self.add_body( 2, "{:<15} = F.softmax({})".format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) def emit_Sigmoid(self, IR_node): code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format( IR_node.variable_name, IR_node.name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name) return code def emit_Embedding(self, IR_node): raise NotImplementedError() ret = "{:<15} = Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format( IR_node.name, IR_node.IR_layer.attr['input_dim'].i, IR_node.IR_layer.attr['output_dim'].i, IR_node.IR_layer.attr['mask_zero'].b, IR_node.in_edges[0]) return ret def emit_RNNs(self, IR_node, func): raise NotImplementedError() # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): self.add_body( 2, "{:<15} = {}".format( IR_node.variable_name, ' + '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) def emit_Sub(self, IR_node): self.add_body( 2, "{:<15} = {}".format( IR_node.variable_name, ' - '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) def emit_Mul(self, IR_node): self.add_body( 2, "{:<15} = {}".format( IR_node.variable_name, ' * '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) def emit_Constant(self, IR_node): self.add_init( 2, "self.{:<15} = torch.autograd.Variable(torch.Tensor(__weights_dict['{}']['value']), requires_grad=False)" .format(IR_node.variable_name, IR_node.name)) # self.add_init(2, "self.{:<15} = torch.from_numpy(__weights_dict['{}']['value'])".format( # IR_node.variable_name, # IR_node.name)) IR_node.real_name = "self." + IR_node.variable_name @staticmethod def _convert_axis(IR_node, axis): ndim = len(IR_node.get_attr('_output_shapes')[0].dim) if axis == 0: return 0 elif axis == ndim - 1: return 1 else: return axis + 1 def emit_Concat(self, IR_node): axis = self._convert_axis(IR_node, IR_node.get_attr('axis')) self.add_body( 2, "{:<15} = torch.cat(({}), {})".format( IR_node.variable_name, ', '.join( self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges), axis, )) def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2 self.add_init( 2, "self.{} = self.__batch_normalization({}, '{}', num_features={}, eps={}, momentum={})" .format( IR_node.variable_name, dim, IR_node.name, IR_node.layer.attr['_output_shapes'].list.shape[0].dim[-1]. size, IR_node.layer.attr['epsilon'].f, IR_node.layer.attr['momentum'].f, )) self.add_body( 2, "{:<15} = self.{}({})".format(IR_node.variable_name, IR_node.variable_name, self.parent_variable_name(IR_node))) def emit_Squeeze(self, IR_node): self.add_body( 2, "{:<15} = torch.squeeze({})".format( IR_node.variable_name, self.parent_variable_name(IR_node))) @staticmethod def _convert_padding(IR_node): padding = IR_node.get_attr('pads') padding = convert_onnx_pad_to_tf(padding)[1:-1] new_padding = [] for pad in padding: new_padding.insert(0, pad) return tuple(np.array(new_padding).reshape(-1).tolist()) def emit_Pad(self, IR_node): if IR_node.get_attr('mode') == 'constant': mode = "mode = 'constant', value = {}".format(0) elif IR_node.get_attr('mode') == 'reflect': mode = "mode = 'reflect'" elif IR_node.get_attr('mode') == 'SYMMETRIC': mode = "mode = 'replicate'" else: assert False padding = self._convert_padding(IR_node) self.add_body( 2, "{:<15} = F.pad({}, {}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), padding, mode)) def emit_ReduceMean(self, IR_node): axes = [ self._convert_axis(IR_node, x) for x in IR_node.get_attr('axes') ] input_node = self.parent_variable_name(IR_node) for axis in sorted(axes, reverse=True): self.add_body( 2, "{:<15} = torch.mean({}, {}, {})".format( IR_node.variable_name, input_node, axis, IR_node.get_attr("keepdims"))) input_node = IR_node.variable_name def emit_LRN(self, IR_node): self.used_layers.add(IR_node.type) self.add_body( 2, "{:<15} = self.LRN(size = {}, alpha = {}, beta = {})({})".format( IR_node.variable_name, IR_node.layer.attr['size'].i * 2 - 1, IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f, self.parent_variable_name(IR_node))) def _layer_Conv(self): self.add_body( 0, """ @staticmethod def __conv(dim, name, **kwargs): if dim == 1: layer = nn.Conv1d(**kwargs) elif dim == 2: layer = nn.Conv2d(**kwargs) elif dim == 3: layer = nn.Conv3d(**kwargs) else: raise NotImplementedError() layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) return layer""") def _layer_FullyConnected(self): self.add_body( 0, """ @staticmethod def __dense(name, **kwargs): layer = nn.Linear(**kwargs) layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) return layer""") def _layer_BatchNorm(self): self.add_body( 0, """ @staticmethod def __batch_normalization(dim, name, **kwargs): if dim == 1: layer = nn.BatchNorm1d(**kwargs) elif dim == 2: layer = nn.BatchNorm2d(**kwargs) elif dim == 3: layer = nn.BatchNorm3d(**kwargs) else: raise NotImplementedError() if 'scale' in __weights_dict[name]: layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['scale'])) else: layer.weight.data.fill_(1) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) else: layer.bias.data.fill_(0) layer.state_dict()['running_mean'].copy_(torch.from_numpy(__weights_dict[name]['mean'])) layer.state_dict()['running_var'].copy_(torch.from_numpy(__weights_dict[name]['var'])) return layer""") def _layer_LRN(self): self.add_body( 0, """ class LRN(nn.Module): def __init__(self, size=1, alpha=1.0, beta=0.75, ACROSS_CHANNELS=False): super(KitModel.LRN, self).__init__() self.ACROSS_CHANNELS = ACROSS_CHANNELS if self.ACROSS_CHANNELS: self.average=nn.AvgPool3d(kernel_size=(size, 1, 1), stride=1, padding=(int((size-1.0)/2), 0, 0)) else: self.average=nn.AvgPool2d(kernel_size=size, stride=1, padding=int((size-1.0)/2)) self.alpha = alpha self.beta = beta def forward(self, x): if self.ACROSS_CHANNELS: div = x.pow(2).unsqueeze(1) div = self.average(div).squeeze(1) div = div.mul(self.alpha).add(1.0).pow(self.beta) else: div = x.pow(2) div = self.average(div) div = div.mul(self.alpha).add(1.0).pow(self.beta) x = x.div(div) return x""")
class MXNetEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16 : "float16", graph_pb2.DT_FLOAT32 : "float32", graph_pb2.DT_FLOAT64 : "float64", graph_pb2.DT_INT32 : "int32", graph_pb2.DT_UINT8 : "uint8" } activation_map = { "relu" : "Relu", "sigmoid" : "Sigmoid", "tanh" : "Tanh", "elu" : "Elu" # Not support yet # "softrelu" : "SoftReLU" } transpose_map = { 1 : 2, 2 : 3, -1 : 1 } channels_last = ['NDHWC', 'NHWC'] def __init__(self, model): from six import string_types as _string_types if isinstance(model, _string_types): network_path = model self.weight_loaded = False elif len(model) == 4: network_path = model[0] weight_path = model[1] self.input_shape = model[2] self.output_weights_file = model[3] self.weights = np.load(weight_path).item() self.weight_loaded = True self.output_weights = dict() else: raise ValueError("the # of input arguments [{}] is not supported" % len(model)) self.IR_graph = IRGraph(network_path) self.IR_graph.build() def _gen_header(self): str = """import mxnet as mx import numpy as np # mxnet-cpu only support channel first, default convert the model and weight as channel first """ return str def gen_codes(self, phase): self.IR_layer_map = dict() for layer in self.IR_graph.topological_sort: self.IR_layer_map[layer] = self.IR_graph.get_node(layer) header = self._gen_header() network_code = header + "def RefactorModel():\n" shape = dict() for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if len(current_node.in_edges) == 0: current_node.in_edges.append('data') if node_type.lower() in MXNetEmitter.activation_map: func = getattr(self, "emit_Activation") line = func(current_node, node_type.lower()) network_code += " " + line + "\n" elif hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(current_node) network_code += " " + line + "\n" else: print("MXNet Emitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) if node_type == "DataInput": cur_shape = list() first = True for dim in current_node.IR_layer.attr["shape"].shape.dim: if dim.size == -1 and first: cur_shape.append(1) print("Detect input layer [{}] using infer batch size, set it as default value [1]".format(current_node.name)) else: if dim.size == -1: print("Warning: user should change input size manually") cur_shape.append(dim.size) first = False cur_shape.insert(1, cur_shape.pop()) shape[current_node.name] = ', '.join('%s' % i for i in cur_shape) # output_weights_file = raw_input("Please type the path you want to save your MXNet model weights: ") if self.weight_loaded: dirname = os.path.dirname(self.output_weights_file) if not os.path.exists(dirname): os.makedirs(self.output_weights_file) with open(self.output_weights_file, 'wb') as outfile: np.save(outfile, self.output_weights) comment = "\n # if a GPU is available, change mx.cpu() to mx.gpu()" last_line = "{:<15} = mx.mod.Module(symbol = {}, context = mx.cpu(), data_names = ['{}'])".format( "model", ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers]), ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.input_layers])) network_code += " " + comment + "\n" network_code += " " + last_line + "\n" network_code += " return model\n\n\n" weight_code = "" if not self.weight_loaded: weight_code += "# emitter does not detect any import weights, you may generate weights file manually\n" weight_code += self.gen_weight_code(shape, phase) main_code = "if __name__ == '__main__':\n model = RefactorModel()\n" if self.weight_loaded: main_code += " # remember to adjust params path\n model = deploy_weight(model, '{}')\n".format(self.output_weights_file) if phase == 'train': train_code = """def train(model): import logging logging.getLogger().setLevel(logging.DEBUG) model.fit(train_iter, # train data eval_data = val_iter, # validation data optimizer = 'sgd', # Defaults to 'sgd' optimizer_params = {'learning_rate':0.01}, # use fixed learning rate eval_metric = 'acc', # report accuracy during training, other possible predefined metrics are: 'ce', 'f1', 'mae', 'mse', 'rmse', 'top_k_accuracy' batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches num_epoch = 10) # train for at most 10 dataset passes\n\n """ code = network_code + weight_code + train_code + main_code else: test_code = """import matplotlib.pyplot as plt from collections import namedtuple Batch = namedtuple('Batch', ['data']) def get_image(url, show = False): import cv2 # download and show the image fname = mx.test_utils.download(url) img = cv2.cvtColor(cv2.imread(fname), cv2.COLOR_BGR2RGB) if img is None: return None if show: plt.imshow(img) plt.axis('off') # convert into format (batch, RGB, width, height) img = cv2.resize(img, (224, 224)) img = np.swapaxes(img, 0, 2) img = np.swapaxes(img, 1, 2) img = img[np.newaxis, :] return img def predict(model, labels, url): # to show the image, change the argument show into True img = get_image(url, show = False) # compute the predict probabilities model.forward(Batch([mx.nd.array(img)])) prob = model.get_outputs()[0].asnumpy() # print the top-5 prob = np.squeeze(prob) a = np.argsort(prob)[::-1] for i in a[0:5]: print('prbability = %f, class = %s' %(prob[i], labels[i]))\n\n """ main_code += """ # # call function predict # with open('synset.txt', 'r') as f: # labels = [l.rstrip() for l in f] # predict(model, labels, 'http://writm.com/wp-content/uploads/2016/08/Cat-hd-wallpapers.jpg') """ code = network_code + weight_code + test_code + main_code return code def gen_weight_code(self, shape, phase): if len(shape) == 0: # var = raw_input("Input layer not detected, please type data shape manually(i.e. X, X, X, X): ") shape['data'] = ', '.join('%s' % i for i in self.input_shape) str = "def deploy_weight(model, weight_file):\n" str += """ if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() arg_params = dict() aux_params = dict() for weight_name, weight_data in weights_dict.items(): weight_name = str(weight_name) if "moving" in weight_name: aux_params[weight_name] = mx.nd.array(weight_data) else: arg_params[weight_name] = mx.nd.array(weight_data) """ if phase == 'train': str += " model.bind(for_training = True, data_shapes = [" else: str += " model.bind(for_training = False, data_shapes = [" first = True for k, v in shape.items(): if not first: str += ", " str += "('" + k + "', " + "(" + v + "))" first = False str += "])\n" str += " model.set_params(arg_params = arg_params, aux_params = aux_params, allow_missing = True)\n\n return model\n\n\n" return str # raise NotImplementedError @staticmethod def calculate_same_pad(data_shape, kernel, stride): # same_pad = int(math.ceil(float(data_shape) / float(stride))) # valid_pad = int(math.ceil(float(data_shape - kernel + 1) / float(stride))) # # if (same_pad - valid_pad) % 2 == 0: # # return True, (same_pad - valid_pad) # # else: # # return False, (same_pad - valid_pad) # return (same_pad - valid_pad) # # raise NotImplementedError if (data_shape % stride == 0): pad = max(kernel - stride, 0) else: pad = max(kernel - (data_shape % stride), 0) if pad % 2 == 0: return False, pad else: return True, pad # raise NotImplementedError @staticmethod def transfer_pad(mode, data_shape, kernel, stride): if len(stride) == 0: stride = list([1] * len(kernel)) if mode == b'SAME': # print(data_shape, kernel, stride) defuse_pad = False ret = list() for i in range(len(kernel)): defuse_pad, same_pad = MXNetEmitter.calculate_same_pad(data_shape[i+1], kernel[i], stride[i]) ret.append(same_pad) if defuse_pad: tmp = list([0, 0, 0, 0]) for e in ret: tmp.extend([int(e / 2), int(e / 2 + 1)]) ret = tmp else: ret = [int(e / 2) for e in ret] return defuse_pad, ret elif mode == b'VALID': return False, list([0]* len(kernel)) else: raise ValueError("Padding algorithm [{}] is not supported" % mode) # raise NotImplementedError @staticmethod def transpose(data, dim): if dim == 1: data = data.transpose((2, 1, 0)) elif dim == 2: data = data.transpose((3, 2, 0, 1)) elif dim == 3: data = data.transpose((4, 3, 0, 1, 2)) else: raise ValueError("The weight of dim {} cannot transpose" % dim) return data def set_pad(self, IR_node, code, pad): code = "{:<15} = mx.sym.pad(data = {}, mode = 'constant', pad_width = ({}), constant_value = 0, name = '{}')".format( IR_node.variable_name + "_pad", self.parent_variable_name(IR_node), pad, IR_node.name + "_pad") for e in IR_node.in_edges: if e == 'data': continue self.IR_layer_map[e].out_edges = [x if not self.IR_layer_map[x].name == IR_node.variable_name else IR_node.variable_name + "_pad" for x in self.IR_layer_map[e].out_edges] return code def emit_UNKNOWN(self, IR_node): print(IR_node.IR_layer.name) def emit_FullyConnected(self, IR_node): if self.weight_loaded: weight_dict = self.weights[IR_node.name] self.output_weights[IR_node.name + "_weight"] = weight_dict['weights'].transpose((1, 0)) num_hidden = IR_node.IR_layer.attr["units"].i no_bias = not IR_node.IR_layer.attr["use_bias"].b if not no_bias and self.weight_loaded: self.output_weights[IR_node.name + "_bias"] = weight_dict['bias'] code = "{:<15} = mx.sym.FullyConnected(data = {}, num_hidden = {}, no_bias = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), num_hidden, no_bias, IR_node.name) return code def emit_Convolution(self, IR_node): if self.weight_loaded: weight_dict = self.weights[IR_node.name] weights = weight_dict['weights'] dim = len(IR_node.IR_layer.attr["filter"].list.i) - 2 kernel = list() for idx in range(0, dim): kernel.append(IR_node.IR_layer.attr["filter"].list.i[idx]) stride = list() for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]: stride.append(e) dilate = list() for e in IR_node.IR_layer.attr["dilation_rate"].list.i[1:-1]: dilate.append(e) dilate = ', '.join('%s' % i for i in dilate) defuse_pad = False pad = list() if "padding" in IR_node.IR_layer.attr: output_shape = list() for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim: output_shape.append(e.size) # print("Warning: MXNet Convolution Layer pad does not match IR Convolution Layer pad") defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["padding"].s, output_shape, kernel, stride) pad = ', '.join('%s' % i for i in pad) kernel = ', '.join('%s' % i for i in kernel) stride = ', '.join('%s' % i for i in stride) num_filter = IR_node.IR_layer.attr["filter"].list.i[-1] no_bias = not IR_node.IR_layer.attr["use_bias"].b if not no_bias and self.weight_loaded: self.output_weights[IR_node.name + "_bias"] = weight_dict['bias'] layout = IR_node.IR_layer.attr["data_format"].s # if layout == '': # if dim == 1: # layout = 'NCW' # elif dim == 2: # layout = 'NHWC' # elif dim == 3: # layout = 'NDHWC' layout = 'NCHW' if self.weight_loaded: # if layout not in MXNetEmitter.channels_last: weights = MXNetEmitter.transpose(weights, dim) self.output_weights[IR_node.name + "_weight"] = weights code = "" if not defuse_pad: # code = "{:<15} = mx.sym.transpose(data = {}, axes = (0, 3, 1, 2))\n".format(IR_node.replace_scope(IR_node.name) + "_input", IR_node.replace_scope(IR_node.in_edges[0])) code += "{:<15} = mx.sym.Convolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), pad = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), kernel, stride, dilate, pad, num_filter, no_bias, layout, IR_node.name) # code += " {:<15} = mx.sym.transpose(data = {}, axes = (0, 2, 3, 1))\n".format(IR_node.replace_scope(IR_node.name), IR_node.replace_scope(IR_node.name)) else: # code = "{:<15} = mx.sym.transpose(data = {}, axes = (0, 3, 1, 2))\n".format(IR_node.replace_scope(IR_node.name) + "_input", IR_node.replace_scope(IR_node.in_edges[0])) code += self.set_pad(IR_node, code, pad) code += "\n {:<15} = mx.sym.Convolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format( IR_node.variable_name, IR_node.variable_name + "_pad", kernel, stride, dilate, num_filter, no_bias, layout, IR_node.name) # code += " {:<15} = mx.sym.transpose(data = {}, axes = (0, 2, 3, 1))\n".format(IR_node.replace_scope(IR_node.name), IR_node.replace_scope(IR_node.name)) return code def emit_DataInput(self, IR_node): shape = list() shape.extend(IR_node.IR_layer.attr["shape"].list.i) code = "{:<15} = mx.sym.var('{}')".format(IR_node.variable_name, IR_node.name) return code # Add LeakyReLU Elu(slope not support) def emit_Activation(self, IR_node, act_type): act_type = act_type func_name = "" if act_type == "elu": func_name = "LeakyReLU" else: func_name = "Activation" code = "{:<15} = mx.sym.{}(data = {}, act_type = '{}', name = '{}')".format( IR_node.variable_name, func_name, self.parent_variable_name(IR_node), act_type, IR_node.name) return code def emit_BatchNorm(self, IR_node): if self.weight_loaded: weight_dict = self.weights[IR_node.name] # axis = IR_node.IR_layer.attr["axis"].i axis = 1 eps = IR_node.IR_layer.attr["epsilon"].f momentum = IR_node.IR_layer.attr["momentum"].f fix_gamma = not IR_node.IR_layer.attr["scale"].b if self.weight_loaded: if not fix_gamma: self.output_weights[IR_node.name + "_gamma"] = weight_dict['scale'] self.output_weights[IR_node.name + "_beta"] = weight_dict['bias'] # not supported yet use_global_stats = "False" if self.weight_loaded: self.output_weights[IR_node.name + "_moving_var"] = weight_dict['var'] self.output_weights[IR_node.name + "_moving_mean"] = weight_dict['mean'] code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), axis, eps, momentum, fix_gamma, use_global_stats, IR_node.name) return code def emit_Pool(self, IR_node): global_pool = IR_node.IR_layer.attr["global_pooling"].b kernel = list() if global_pool: kernel = [1] * (len(IR_node.IR_layer.attr["strides"].list.i) - 2) else: for e in IR_node.IR_layer.attr["window_shape"].list.i[1:-1]: kernel.append(e) pool_type = IR_node.IR_layer.attr["pooling_type"].s.lower().decode() stride = list() for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]: stride.append(e) defuse_pad = False pad = list() if "padding" in IR_node.IR_layer.attr: output_shape = list() for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim: output_shape.append(e.size) # print("Warning: MXNet Pooling Layer pad does not match IR Pooling Layer pad") defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["padding"].s, output_shape, kernel, stride) pad = ', '.join('%s' % i for i in pad) kernel = ', '.join('%s' % i for i in kernel) stride = ', '.join('%s' % i for i in stride) code = "" if not defuse_pad: # code = "{:<15} = mx.sym.transpose(data = {}, axes = (0, 3, 1, 2))\n".format(IR_node.replace_scope(IR_node.name) + "_input", IR_node.replace_scope(IR_node.in_edges[0])) code += "{:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel = ({}), pool_type = '{}', stride = ({}), pad = ({}), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), global_pool, kernel, pool_type, stride, pad, IR_node.name) # code += " {:<15} = mx.sym.transpose(data = {}, axes = (0, 2, 3, 1))\n".format(IR_node.replace_scope(IR_node.name), IR_node.replace_scope(IR_node.name)) else: # code = "{:<15} = mx.sym.transpose(data = {}, axes = (0, 3, 1, 2))\n".format(IR_node.replace_scope(IR_node.name) + "_input", IR_node.replace_scope(IR_node.in_edges[0])) code += self.set_pad(IR_node, code, pad) code += "\n {:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel = ({}), pool_type = '{}', stride = ({}), name = '{}')". format( IR_node.variable_name, IR_node.variable_name + "_pad", global_pool, kernel, pool_type, stride, IR_node.name) # code += " {:<15} = mx.sym.transpose(data = {}, axes = (0, 2, 3, 1))\n".format(IR_node.replace_scope(IR_node.name), IR_node.replace_scope(IR_node.name)) return code def emit_SoftmaxOutput(self, IR_node): code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format( IR_node.variable_name, self.parent_variable_name(IR_node) ) return code def emit_Softmax(self, IR_node): code = "" if len(IR_node.out_edges) == 0: code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format( IR_node.variable_name, self.parent_variable_name(IR_node)) else: axis = IR_node.IR_layer.attr["dim"].i code = "{:<15} = mx.sym.softmax(data = {}, axis = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), axis, IR_node.name) return code def emit_Squeeze(self, IR_node): return self.emit_Flatten(IR_node) def emit_Deconvolution(self, IR_node): if self.weight_loaded: weight_dict = self.weights[IR_node.name] weights = weight_dict['weights'] dim = len(IR_node.IR_layer.attr["filter"].list.i) - 2 kernel = list() for idx in range(0, dim): kernel.append(IR_node.IR_layer.attr["filter"].list.i[idx]) stride = list() for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]: stride.append(e) dilate = list() for e in IR_node.IR_layer.attr["dilation_rate"].list.i[1:-1]: dilate.append(e) dilate = ', '.join('%s' % i for i in dilate) defuse_pad = False pad = list() if "padding" in IR_node.IR_layer.attr: output_shape = list() for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim: output_shape.append(e.size) # print("Warning: MXNet Deconvolution Layer pad does not match IR Deconvolution Layer pad") defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["padding"].s, output_shape, kernel, stride) pad = ', '.join('%s' % i for i in pad) kernel = ', '.join('%s' % i for i in kernel) stride = ', '.join('%s' % i for i in stride) num_filter = IR_node.IR_layer.attr["filter"].list.i[-2] no_bias = not IR_node.IR_layer.attr["use_bias"].b if not no_bias and self.weight_loaded: self.output_weights[IR_node.replace_scope(IR_node.name) + "_bias"] = weight_dict['bias'] layout = IR_node.IR_layer.attr["data_format"].s # if layout == '': # if dim == 1: # layout = 'NCW' # elif dim == 2: # layout = 'NHWC' # elif dim == 3: # layout = 'NDHWC' layout = 'NCHW' if self.weight_loaded: # if layout not in MXNetEmitter.channels_last: weights = MXNetEmitter.transpose(weights, dim) self.output_weights[IR_node.replace_scope(IR_node.name) + "_weight"] = weights code = "" if not defuse_pad: code = "{:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), pad = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format( IR_node.replace_scope(IR_node.name), IR_node.replace_scope(IR_node.in_edges[0]), kernel, stride, dilate, pad, num_filter, no_bias, layout, IR_node.replace_scope(IR_node.name)) else: code = self.set_pad(IR_node, code, pad) code += "\n {:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format( IR_node.replace_scope(IR_node.name), IR_node.replace_scope(IR_node.name) + "_pad", kernel, stride, dilate, num_filter, no_bias, layout, IR_node.replace_scope(IR_node.name)) return code def emit_Embedding(self, IR_node): input_dim = IR_node.IR_layer.attr["input_dim"].i output_dim = IR_node.IR_layer.attr["output_dim"].i dtype = MXNetEmitter.dtype_map.get(IR_node.layer.attr["dtype"].type, "float32") code = "{:<15} = mx.sym.Embedding(data = {}, input_dim = {}, output_dim = {}, dtype = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), input_dim, output_dim, dtype, IR_node.name) return code # def emit_LeakyReLU(self, IR_node): # # IR only support Elu, the same problem with func emit_Activation # code = "{:<15} = mx.sym.LeakyReLU(data = {}, )".format() # return code # raise NotImplementedError def emit_Dropout(self, IR_node): p = IR_node.IR_layer.attr["keep_prob"].f mode = IR_node.IR_layer.attr["mode"].s.lower().decode() if 'mode' in IR_node.layer.attr else 'training' code = "{:<15} = mx.sym.Dropout(data = {}, p = {}, mode = '{}', name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), p, mode, IR_node.name) return code # reverse cannot support yet def emit_Reshape(self, IR_node): shape = list() for e in IR_node.IR_layer.attr["shape"].list.i: shape.append(e) shape = ', '.join('%s' % i for i in shape) reverse = False code = "{:<15} = mx.sym.reshape(data = {}, shape = ({}), reverse = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), shape, reverse, IR_node.name) return code def emit_Flatten(self, IR_node): # if "data_format" in IR_node.IR_layer.attr: # data_format = IR_node.IR_layer.attr["data_format"].s # else: # data_format = "NHWC" # print("set the conv format before flatten as default value NHWC") # if data_format in MXNetEmitter.channels_last: code = "{:<15} = mx.sym.transpose(data = {}, axes = (0, 2, 3, 1))\n".format("trans", self.parent_variable_name(IR_node)) code += " {:<15} = mx.sym.flatten(data = {}, name = '{}')".format(IR_node.variable_name, "trans", IR_node.name) # else: # code += "{:<15} = mx.sym.flatten(data = {}, name = '{}')".format( # IR_node.replace_scope(IR_node.name), # IR_node.replace_scope(IR_node.in_edges[0]), # IR_node.replace_scope(IR_node.name)) return code @staticmethod def _convert_axis(IR_node, axis): ndim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) if axis == 0: return 0 elif axis == ndim - 1: return 1 else: return axis + 1 def emit_Concat(self, IR_node): dim = MXNetEmitter._convert_axis(IR_node, IR_node.IR_layer.attr["axis"].i) code = "{:<15} = mx.sym.concat({}, dim = {}, name = '{}')".format( IR_node.variable_name, ', '.join(self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges), dim, IR_node.name) return code def emit_Cast(self, IR_node): dtype = IR_node.IR_layer.attr["dtype"].type code = "{:<15} = mx.sym.cast(data = {}, dtype = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), dtype, IR_node.name) return code def emit_Expand_dims(self, IR_node): axis = IR_node.IR_layer.attr["axis"].i code = "{:<15} = mx.sym.expand_dims(data = {}, axis = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), axis, IR_node.name) return code def emit_Pad(self, IR_node): mode = IR_node.IR_layer.attr["mode"].s.lower().decode() pad_width = list() pad_width.extend([0, 0, 0, 0]) for e in IR_node.IR_layer.attr["paddings"].list.i[2:-2]: pad_width.append(e) # if not pad_width[2] == 0 or not pad_width[3] == 0: # print("Warning: please check padding layer manually") pad_width = ', '.join('%s' % i for i in pad_width) code = "{:<15} = mx.sym.pad(data = {}, mode = '{}', pad_width = ({}), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), mode, pad_width, IR_node.name) return code def emit_Add(self, IR_node): code = "{:<15} = mx.sym.broadcast_add({}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_Mul(self, IR_node): code = "{:<15} = mx.sym.broadcast_mul({}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node, [1])) return code def emit_ReduceMean(self, IR_node): axes = IR_node.layer.attr['axes'].list.i[:] axes = ','.join('%s' % MXNetEmitter.transpose_map[i] for i in axes) code = "{:<15} = mx.sym.mean(data = {}, axis = ({}), keepdims = {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), axes, IR_node.layer.attr['keepdims'].b) return code def emit_LRN(self, IR_node): code = "{:<15} = mx.sym.LRN(data = {}, alpha = {}, beta = {}, knorm = {}, nsize = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f, IR_node.layer.attr['k'].f, IR_node.layer.attr['size'].i * 2 - 1, IR_node.name) return code
class CoreMLEmitter(Emitter): def __init__(self, architecture, weight): super(CoreMLEmitter, self).__init__() if os.path.exists(architecture) == False: raise ValueError("IR architecture file [{}] is not found.".format(architecture)) else: self.IR_graph = IRGraph(architecture) self.IR_graph.build() if os.path.exists(weight) == False: raise ValueError("IR weight file [{}] is not found.".format(weight)) else: self._load_weights(weight) def _get_inout(self): input_features = [] output_features = [] for input_node in self.IR_graph.input_layers: shape = shape_to_list(self.IR_graph.get_node(input_node).get_attr('shape')) shape = _infer_coreml_input_shape(shape) input_features.append((str(input_node), shape)) print("CoreML Model Input Layer: [{}] {}".format(input_node, shape)) for output_node in self.IR_graph.output_layers: node = self.IR_graph.get_node(output_node) node.out_edges.append(node.name) shape = node.get_attr('_output_shapes') if shape: shape = shape_to_list(shape[0]) else: shape = [1] shape = _infer_coreml_input_shape(shape) output_features.append((str(output_node), shape)) print("CoreML Model Output Layer: [{}] {}".format(output_node, shape)) return list(input_features), list(output_features) def _connect_coreml_layers(self): for layer in self.builder.nn_spec.layers: for i, out_node in enumerate(layer.output): layer.output[i] = self.IR_graph.get_node(out_node).real_name def gen_model(self, input_names=None, output_names=None, image_input_names=None, is_bgr=False, red_bias=0.0, green_bias=0.0, blue_bias=0.0, gray_bias=0.0, image_scale=1.0, class_labels=None, predicted_feature_name=None, predicted_probabilities_output=''): input_features, output_features = self._get_inout() is_classifier = class_labels is not None mode = 'classifier' if is_classifier else None self.builder = _NeuralNetworkBuilder(input_features, output_features, mode=mode) for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) print("Converting layer {}({})".format(current_node.name, current_node.type)) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) func(current_node) else: print("CoreMLEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) assert False # Add classifier classes (if applicable) if is_classifier: classes_in = class_labels if isinstance(classes_in, _string_types): if not os.path.isfile(classes_in): raise ValueError("Path to class labels [{}] does not exist.".format(classes_in)) with open(classes_in, 'r') as f: classes = f.read() classes = classes.splitlines() elif type(classes_in) is list: # list[int or str] classes = classes_in else: raise ValueError('Class labels must be a list of integers / strings, or a file path') if predicted_feature_name is not None: self.builder.set_class_labels(classes, predicted_feature_name = predicted_feature_name, prediction_blob = predicted_probabilities_output) else: self.builder.set_class_labels(classes) # Set pre-processing paramsters self.builder.set_pre_processing_parameters( image_input_names=[input_features[0][0]], #image_input_names, is_bgr=is_bgr, red_bias=red_bias, green_bias=green_bias, blue_bias=blue_bias, gray_bias=gray_bias, image_scale=image_scale) # Return the protobuf spec # model = _MLModel(self.builder.spec) print (self.builder.spec.description) return self.builder.spec, input_features, output_features @staticmethod def _get_padding(IR_node): auto_pad = IR_node.get_attr('auto_pad') if auto_pad is not None: if auto_pad == 'VALID': pass else: return 'SAME' pads = IR_node.get_attr('pads', [0,0,0,0,0,0,0,0]) return pads def _emit_merge(self, IR_node, func): """ Convert concat layer to coreml. """ # Get input and output names input_names = [self.IR_graph.get_node(inp).real_name for inp in IR_node.in_edges] self.builder.add_elementwise(name=IR_node.name, input_names=input_names, output_name=IR_node.name, mode=func) def emit_Conv(self, IR_node): """ Convert convolution layer to coreml. """ has_bias = IR_node.get_attr('use_bias', False) is_deconv = False # TODO: Deconv # Get the weights. output_channels = IR_node.get_attr('kernel_shape')[-1] # Dimensions and weights if is_deconv: raise NotImplementedError() height, width, n_filters, channels = weightList[0].shape W = weightList[0].transpose([0,1,3,2]) output_shape = output_blob_shape[:-1] else: W = self.weights_dict[IR_node.name]['weights'] height, width, channels, n_filters = W.shape output_shape = None b = self.weights_dict[IR_node.name]['bias'] if has_bias else None stride_height, stride_width = IR_node.get_attr('strides')[1], IR_node.get_attr('strides')[2] # Dilations dilations = IR_node.get_attr('dilations', [1, 1]) if is_deconv and not dilations == [1, 1]: raise ValueError("Unsupported non-unity dilation for Deconvolution layer") groups = IR_node.get_attr('groups', 1) kernel_channels = channels padding = self._get_padding(IR_node) if isinstance(padding, list): border_mode = "valid" # see protobuf padding_top, padding_left, padding_bottom, padding_right = padding[1], padding [2], padding[5], padding [6] else: border_mode = "same" padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0 input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name self.builder.add_convolution(name=IR_node.real_name, kernel_channels=kernel_channels, output_channels=output_channels, height=height, width=width, stride_height=stride_height, stride_width=stride_width, border_mode= border_mode, groups=groups, W=W, b=b, has_bias=has_bias, is_deconv=is_deconv, output_shape=output_shape, input_name=input_name, padding_top= padding_top, padding_left= padding_left, padding_bottom= padding_bottom, padding_right= padding_right, output_name=IR_node.real_name, dilation_factors=dilations) def emit_DepthwiseConv(self, IR_node): # depth-wise convolution input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name kernel_channels = 1 is_deconv = False has_bias = IR_node.get_attr('use_bias', False) depth_multiplier = IR_node.get_attr('kernel_shape')[-1] W = self.weights_dict[IR_node.name]['weights'] height, width, channels, n_filters = W.shape output_shape = None W = np.reshape(W,(height, width,1,channels * depth_multiplier)) b = self.weights_dict[IR_node.name]['bias'] if has_bias else None # Dilations dilations = IR_node.get_attr('dilations', [1, 1]) padding = self._get_padding(IR_node) if isinstance(padding, list): border_mode = "valid" # see protobuf padding_top, padding_left, padding_bottom, padding_right = padding[1], padding [2], padding[5], padding [6] else: border_mode = "same" padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0 output_channels = W.shape[-1] groups = W.shape[-1] stride_height, stride_width = IR_node.get_attr('strides')[1], IR_node.get_attr('strides')[2] self.builder.add_convolution(name=IR_node.real_name, kernel_channels=kernel_channels, output_channels=output_channels, height=height, width=width, stride_height=stride_height, stride_width=stride_width, border_mode=border_mode, groups=groups, W=W, b=b, has_bias=has_bias, is_deconv=is_deconv, output_shape=output_shape, padding_top= padding_top, padding_left= padding_left, padding_bottom= padding_bottom, padding_right= padding_right, input_name=input_name, output_name=IR_node.real_name, dilation_factors=dilations) def emit_Pool(self, IR_node): """ Convert pooling layer to coreml. """ # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name # Pooling layer type pooling_type = IR_node.get_attr('pooling_type') if pooling_type == 'MAX': layer_type_str = 'MAX' elif pooling_type == 'AVG': layer_type_str = 'AVERAGE' else: raise TypeError("Pooling type %s not supported" % pooling_type) # if it's global, set the global flag global_pooling = IR_node.get_attr('global_pooling', False) dim = len(IR_node.get_attr('strides')) - 2 if global_pooling: if dim == 2: stride_height, stride_width = tuple(IR_node.get_attr('strides')[1:-1]) height, width = 1, 1 # TODO global pooling modification # Padding padding = self._get_padding(IR_node) if isinstance(padding, list): padding_type = "VALID" # see protobuf padding_top, padding_left, padding_bottom, padding_right = padding[1], padding[2], padding[5], padding[6] else: padding_type = "SAME" padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0 elif dim == 1: raise NotImplementedError() global_pooling = False _, width, channels = keras_layer.input_shape height = 1 stride_height, stride_width = height, width padding_type = 'VALID' else: raise NotImplementedError() else: height, width = tuple(IR_node.get_attr('kernel_shape')[1:-1]) stride_height, stride_width = tuple(IR_node.get_attr('strides')[1:-1]) # Padding padding = self._get_padding(IR_node) if isinstance(padding, list): padding_type = "VALID" # see protobuf padding_top, padding_left, padding_bottom, padding_right = padding[1], padding [2], padding[5], padding [6] else: padding_type = "SAME" padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0 self.builder.add_pooling(name=IR_node.name, height=height, width=width, stride_height=stride_height, stride_width=stride_width, layer_type=layer_type_str, padding_type=padding_type, padding_top= padding_top, padding_left= padding_left, padding_bottom= padding_bottom, padding_right= padding_right, input_name=input_name, output_name=IR_node.name, exclude_pad_area=True, is_global=global_pooling) def emit_Scale(self, IR_node): # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name weights = IR_node.get_attr('scale', False) weights = self.weights_dict[IR_node.name]['scale'] has_bias = IR_node.get_attr('use_bias', False) if has_bias: bias = self.weights_dict[IR_node.name]['bias'] shape_scale = self.weights_dict[IR_node.name]['shapeScale'] if has_bias: shape_bias = self.weights_dict[IR_node.name]['shapeBias'] self.builder.add_scale(name = IR_node.real_name, W = weights, b = bias, has_bias = has_bias, input_name = input_name, output_name =IR_node.name, shape_scale= [shape_scale], shape_bias= [shape_bias]) def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_Crop(self, IR_node): input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name output_name=IR_node.real_name is_1d = False border = IR_node.get_attr('border') if is_1d: raise ValueError("Unrecognized padding option: %s" % (str(border))) else: if type(border) is int: top = left = bottom = right = border elif type(border) is list: top, left = border[1], border [0] bottom, right = border[2], border [3] else: raise ValueError("Unrecognized padding option: %s" % (str(border))) # Now add the layer self.builder.add_crop(name = IR_node.name, left = left, right=right, top=top, bottom=bottom, offset = [0,0], input_names = [input_name], output_name=output_name ) def emit_DataInput(self, IR_node): """ Layers that can be skipped. """ return def emit_Dropout(self, IR_node): """ Layers that can be skipped (because they are train time only. """ IR_node.real_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name def emit_FullyConnected(self, IR_node): """ Convert a dense layer to coreml. """ # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name output_name = IR_node.out_edges[0] has_bias = IR_node.get_attr('use_bias') # Get the weights from keras W = self.weights_dict[IR_node.name]['weights'].T Wb = self.weights_dict[IR_node.name]['bias'].T if has_bias else None output_channels, input_channels = W.shape self.builder.add_inner_product(name=IR_node.name, W=W, b=Wb, input_channels=input_channels, output_channels=output_channels, has_bias=has_bias, input_name=input_name, output_name=IR_node.name) def emit_Flatten(self, IR_node): """ Convert a flatten layer from keras to coreml. """ # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name output_name = IR_node.out_edges[0] """ # blob_order == 0 if the input blob needs not be rearranged # blob_order == 1 if the input blob needs to be rearranged blob_order = 0 # using keras_layer.input.shape have a "?" (Dimension[None] at the front), # making a 3D tensor with unknown batch size 4D if len(keras_layer.input.shape) == 4: blob_order = 1 """ self.builder.add_flatten(name=IR_node.name, mode=1, input_name=input_name, output_name=IR_node.name) def emit_Reshape(self, IR_node): def ShapetrToTuple(string, batch_none = False): if batch_none == True: ls = [int(item) for item in string.split(', ')] ls.insert(0,None) return tuple(ls) else: ls = [int(item) for item in string.split(', ')] return tuple(ls) last_node = self.IR_graph.get_node(IR_node.in_edges[0]).layer input_shape_dims = last_node.attr["_output_shapes"].list.shape target_shape_dims = IR_node.IR_layer.attr["_output_shapes"].list.shape input_shape = ShapetrToTuple(IRGraph.shapeToStr(input_shape_dims[0]),True) target_shape = ShapetrToTuple(IRGraph.shapeToStr(target_shape_dims[0])) def get_coreml_target_shape(target_shape): if len(target_shape) == 1: #(D,) coreml_shape = (1,target_shape[0],1,1) elif len(target_shape) == 2: #(S,D) coreml_shape = target_shape + (1,1) elif len(target_shape) == 3: #(H,W,C) coreml_shape = (1, target_shape[2], target_shape[0], target_shape[1]) else: coreml_shape = None return coreml_shape def get_mode(input_shape, target_shape): in_shape = input_shape[1:] if len(in_shape) == 3 or len(target_shape) == 3: return 1 else: return 0 input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name new_shape = get_coreml_target_shape(target_shape) mode = get_mode(input_shape, target_shape) self.builder.add_reshape( name=IR_node.real_name, input_name=input_name, output_name=IR_node.real_name, target_shape=new_shape, mode=mode) def emit_Tanh(self, IR_node): assert False code = "{:<15} = Activation(name = '{}', activation = tanh)({})".format( IR_node.replace_scope(IR_node.name), IR_node.name, IR_node.replace_scope(IR_node.in_edges[0])) return code def _emit_activation(self, IR_node, act, params=None): # Get input and output names input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name output_name = IR_node.real_name if not isinstance(params, list): params = [params] self.builder.add_activation(name=IR_node.real_name, non_linearity=act, input_name=input_name, output_name=output_name, params=params) # activation emit def emit_Relu(self, IR_node): self._emit_activation(IR_node, 'RELU') def emit_PRelu(self, IR_node): self._emit_activation(IR_node, 'PRELU', IR_node.get_attr('gamma', 0) ) def emit_LeakyRelu(self, IR_node): self._emit_activation(IR_node, 'LEAKYRELU', IR_node.get_attr('alpha', 0) ) def emit_Elu(self,IR_node): self._emit_activation(IR_node, 'ELU', IR_node.get_attr('alpha', 0) ) def emit_ThresholdedRelu(self, IR_node): self._emit_activation(IR_node, 'THRESHOLDEDRELU', IR_node.get_attr('alpha', 0) ) def emit_ScaledTanh(self, IR_node): self._emit_activation(IR_node, 'SCALED_TANH', [IR_node.get_attr('alpha', 0),IR_node.get_attr('beta', 0)]) def emit_linear(self, IR_node): self._emit_activation(IR_node, 'LINEAR', [IR_node.get_attr('alpha', 0),IR_node.get_attr('beta', 0)]) def emit_SigmoidHard(self, IR_node): self._emit_activation(IR_node, 'SIGMOID_HARD', [IR_node.get_attr('alpha', 0),IR_node.get_attr('beta', 0)]) def emit_ParametricSoftplus(self, IR_node): self._emit_activation(IR_node, 'PARAMETRICSOFTPLUS', [ IR_node.get_attr('alpha', 0),IR_node.get_attr('beta', 0) ]) def emit_Softmax(self, IR_node): # Get input and output names input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name output_name = IR_node.out_edges[0] self.builder.add_softmax(name=IR_node.name, input_name=input_name, output_name=IR_node.name) def emit_Sigmoid(self, IR_node): assert False code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format( IR_node.replace_scope(IR_node.name), IR_node.name, IR_node.replace_scope(IR_node.in_edges[0])) return code def emit_Relu6(self, IR_node): layer = IR_node.real_name input_name, output_name = (IR_node.IR_layer.input[0], IR_node.IR_layer.name) relu_output_name = output_name + '_relu' self.builder.add_activation(layer, 'RELU', input_name, relu_output_name) # negate it neg_output_name = relu_output_name + '_neg' self.builder.add_activation(layer+'__neg__', 'LINEAR', relu_output_name, neg_output_name,[-1.0, 0]) # apply threshold clip_output_name = relu_output_name + '_clip' self.builder.add_unary(layer+'__clip__', neg_output_name, clip_output_name, 'threshold', alpha = -6.0) # negate it back self.builder.add_activation( layer + '_neg2', 'LINEAR', clip_output_name, output_name, [-1.0, 0]) def emit_Gather(self, IR_node): raise NotImplementedError() W = self.weights_dict[IR_node.name]['weights'] if W.ndim == 2: vocab_size = W.shape[0] output_channels = W.shape[1] builder.add_embedding( name=IR_node.real_name, W = W, b = None, input_dim = vocab_size, output_channels = output_channels, has_bias=False, input_name=input_name, output_name=IR_node.real_name) else: raise NotImplementedError() def emit_RNNs(self, IR_node, func): assert False # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): self._emit_merge(IR_node, 'ADD') def emit_Concat(self, IR_node): self._emit_merge(IR_node, "CONCAT") def emit_BatchNorm(self, IR_node): """ Convert a Batch Normalization layer. """ # Get input and output names input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name axis = IR_node.get_attr('axis', -1) nb_channels = IR_node.get_attr('_output_shapes')[0].dim[axis].size # Set parameters # Parameter arrangement in Keras: gamma, beta, mean, variance weights = self.weights_dict[IR_node.name] mean = weights['mean'] std = weights['var'] gamma = weights.get('scale', np.ones(mean.shape)) beta = weights.get('bias', np.zeros(mean.shape)) # compute adjusted parameters # Reference: parameter transformation https://github.com/apple/coremltools/issues/153 variance = std * std f = 1.0 / np.sqrt(std + IR_node.get_attr('epsilon')) gamma1 = gamma*f beta1 = beta - gamma*mean*f mean[:] = 0.0 #mean variance[:] = 1.0 - .00001 #stddev self.builder.add_batchnorm( name=IR_node.real_name, channels = nb_channels, gamma = gamma1, beta = beta1, mean = mean, variance = variance, input_name = input_name, output_name=IR_node.real_name) def emit_Pad(self, IR_node): input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name output_name=IR_node.real_name is_1d = False padding = IR_node.get_attr('pads') if is_1d: raise ValueError("Unrecognized padding option: %s" % (str(padding))) else: if type(padding) is int: top = left = bottom = right = padding elif type(padding) is list: top, left = padding[1], padding [2] bottom, right = padding[5], padding [6] else: raise ValueError("Unrecognized padding option: %s" % (str(padding))) # padding type TODO # Type of the padding. Can be one of 'constant', 'reflection' or 'replication padding_type = IR_node.get_attr('mode', 'CONSTANT') if padding_type == 'CONSTANT': padding_type = 'constant' elif padding_type == 'REFLECT': padding_type = 'reflection' elif padding_type == 'SYMMETRIC': padding_type = 'replication' # Now add the layer self.builder.add_padding(name = IR_node.name, left = left, right=right, top=top, bottom=bottom, value = 0, input_name = input_name, output_name=output_name ) def emit_Squeeze(self, IR_node): self.emit_Flatten(IR_node) def emit_LRN(self, IR_node): input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name output_name = IR_node.real_name C = IR_node.get_attr('size') alpha = IR_node.get_attr('alpha') beta = IR_node.get_attr('beta') k = IR_node.get_attr('k') depth_radius = int(IR_node.get_attr('size')) self.builder.add_lrn(output_name, input_name, output_name, alpha=alpha * C, beta=beta, local_size=depth_radius, k=k) def emit_SeparableConv(self, IR_node): input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name output_name = IR_node.real_name strides = IR_node.get_attr('strides') stride_height, stride_width = (strides[1], strides[2]) # Get the weights W0 = self.weights_dict[IR_node.name]['depthwise_filter'] W1 = self.weights_dict[IR_node.name]['pointwise_filter'] padding = IR_node.get_attr('auto_pad').split('_')[0].lower() has_bias = IR_node.get_attr('use_bias') b = self.weights_dict[IR_node.name]['bias'] if has_bias else None output_blob_shape = IR_node.get_attr('_output_shapes') shape = shape_to_list(output_blob_shape[0]) output_channels = shape[-1] height, width, input_channels, depth_mult = W0.shape W0 = np.reshape(W0, (height, width, 1, input_channels * depth_mult)) intermediate_name = input_name + '_intermin_' self.builder.add_convolution(name = IR_node.name + '_step_1', kernel_channels = 1, output_channels = input_channels * depth_mult, height = height, width = width, stride_height = stride_height, stride_width = stride_width, border_mode = padding, groups = input_channels, W = W0, b = None, has_bias = False, is_deconv = False, output_shape = None, input_name = input_name, output_name = intermediate_name, dilation_factors = [1,1]) self.builder.add_convolution(name = IR_node.name + '_step_2', kernel_channels = input_channels * depth_mult, output_channels = output_channels, height = 1, width = 1, stride_height = 1, stride_width = 1, border_mode = padding, groups = 1, W = W1, b = b, has_bias = has_bias, is_deconv = False, output_shape = None, input_name = intermediate_name, output_name = output_name, dilation_factors = [1,1])
class PytorchEmitter(Emitter): dtype_map = { graph_pb2.DT_FLOAT16 : "float16", graph_pb2.DT_FLOAT32 : "float32", graph_pb2.DT_FLOAT64 : "float64", graph_pb2.DT_INT16 : "int16", graph_pb2.DT_INT32 : "int32", graph_pb2.DT_INT64 : "int64", graph_pb2.DT_UINT8 : "uint8", graph_pb2.DT_UINT16 : "uint16" } # Base Functions def __init__(self, model): super(PytorchEmitter, self).__init__() if isinstance(model, _string_types): network_path = model else: network_path = model[0] weight_path = model[1] self.init_code = str() self.IR_graph = IRGraph(network_path) self.IR_graph.build() self._load_weights(weight_path) def run(self, dstNetworkPath, dstWeightPath = None, phase = 'test'): super(PytorchEmitter, self).run(dstNetworkPath, dstWeightPath, phase) if self.weight_loaded: self.save_weights(self.weights_dict, dstWeightPath) def add_init(self, indent, codes): if isinstance(codes, _string_types): codes = [codes] for code in codes: self.init_code += (" " * indent) + code + '\n' @property def header_code(self): return """import numpy as np import torch import torch.nn as nn import torch.nn.functional as F __weights_dict = dict() def load_weights(weight_file): if weight_file == None: return try: weights_dict = np.load(weight_file).item() except: weights_dict = np.load(weight_file, encoding='bytes').item() return weights_dict class KitModel(nn.Module): """ def gen_code(self, phase): self.add_init(1, """ def __init__(self, weight_file): super(KitModel, self).__init__() global __weights_dict __weights_dict = load_weights(weight_file) """) self.add_body(1, "def forward(self, x):") for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type if hasattr(self, "emit_" + node_type): func = getattr(self, "emit_" + node_type) line = func(current_node) else: print("Pytorch Emitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) self.add_body(2, "return {}".format( ','.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers]))) self.add_body(0, "") for i in self.used_layers: func = getattr(self, "_layer_" + i) func() return self.header_code + '\n' + self.init_code + '\n' + self.body_code def _defuse_padding(self, IR_node, extra_str = ""): input_node = self.parent_variable_name(IR_node) if IR_node.get_attr('auto_pad') == 'VALID': return input_node if is_valid_padding(IR_node.get_attr("pads")) == True: return input_node padding = self._convert_padding(IR_node) input_node = IR_node.variable_name + '_pad' self.add_body(2, "{:<15} = F.pad({}, {}{})".format( input_node, self.parent_variable_name(IR_node), padding, extra_str )) return input_node def emit_Conv(self, IR_node): self.used_layers.add(IR_node.type) dim = len(IR_node.get_attr('strides')) - 2 in_channels = IR_node.get_attr('kernel_shape')[-2] filter = IR_node.get_attr('kernel_shape')[-1] kernel = IR_node.get_attr('kernel_shape')[:-2] strides = IR_node.get_attr('strides')[1:-1] self.add_init(2, "self.{} = self.__conv({}, name='{}', in_channels={}, out_channels={}, kernel_size={}, stride={}, groups={}, bias={})".format( IR_node.variable_name, dim, IR_node.name, in_channels, filter, tuple(kernel), tuple(strides), # padding, IR_node.get_attr('group', 1), IR_node.get_attr('use_bias'))) input_node = self._defuse_padding(IR_node) self.add_body(2, "{:<15} = self.{}({})".format( IR_node.variable_name, IR_node.variable_name, input_node)) if self.weight_loaded: self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim))) def emit_Pool(self, IR_node): dim = len(IR_node.get_attr('strides')) - 2 if IR_node.get_attr('pooling_type') == "MAX": pool_name = "max_pool{}d".format(dim) exstr = ", value=float('-Inf')" elif IR_node.get_attr('pooling_type') == "AVG": pool_name = "avg_pool{}d".format(dim) exstr = "" else: assert False if IR_node.layer.attr['global_pooling'].b: self.add_body(2, "{:<15} = F.{}(input = {}, kernel_size = {}.size()[2:])".format( IR_node.variable_name, pool_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node) )) else: for e in IR_node.get_attr('dilations', []): assert e == 1 pool_size = IR_node.get_attr('kernel_shape')[1:-1] strides = IR_node.get_attr('strides')[1:-1] input_node = self._defuse_padding(IR_node, exstr) self.add_body(2, "{:<15} = F.{}({}, kernel_size={}, stride={})".format( IR_node.variable_name, pool_name, input_node, tuple(pool_size), tuple(strides) )) def emit_UNKNOWN(self, IR_node): print(IR_node.name) def emit_DataInput(self, IR_node): # Ignore it in Pytorch IR_node.real_name = 'x' def emit_Dropout(self, IR_node): self.add_body(2, "{:<15} = F.dropout(input = {}, p = {}, training = self.training, inplace = True)".format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name, IR_node.layer.attr["keep_prob"].f)) def check_if_need_transpose(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) while parent.type == 'Flatten': parent = self.IR_graph.get_parent(parent.name, [0]) dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) if dim > 2: original_dims = self.weights_dict[IR_node.name]['weights'].shape dims = [i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1] self.weights_dict[IR_node.name]['weights'] = np.reshape(self.weights_dict[IR_node.name]['weights'], dims) self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) self.weights_dict[IR_node.name]['weights'] = np.reshape(self.weights_dict[IR_node.name]['weights'], original_dims) def emit_FullyConnected(self, IR_node): self.used_layers.add(IR_node.type) in_features = 1 for i in self.IR_graph.get_parent(IR_node.name, [0]).layer.attr['_output_shapes'].list.shape[0].dim[1:]: in_features *= i.size self.add_init(2, "self.{} = self.__dense(name = '{}', in_features = {}, out_features = {}, bias = {})".format( IR_node.variable_name, IR_node.name, in_features, IR_node.layer.attr["units"].i, IR_node.IR_layer.attr["use_bias"].b)) input_node = self.parent_variable_name(IR_node) if len(self.IR_graph.get_parent(IR_node.name, [0]).get_attr('_output_shapes')[0].dim) > 2: input_node = "{}.view({}.size(0), -1)".format(input_node, input_node) self.add_body(2, "{:<15} = self.{}({})".format( IR_node.variable_name, IR_node.variable_name, input_node)) if self.weight_loaded: self.check_if_need_transpose(IR_node) self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], (1, 0)) def emit_Flatten(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name self.add_body(2, "{:<15} = {}.view({}.size(0), -1)".format( IR_node.variable_name, parent, parent)) def emit_Reshape(self, IR_node): raise NotImplementedError shape_str = IRGraph.shapeToStr(IR_node.IR_layer.attr["shape"].shape, True) self.add_body(1, "{:<15} = Reshape(name = \"{}\", target_shape = ({}))({})".format( IR_node.variable_name, IR_node.name, shape_str, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)) def emit_Tanh(self, IR_node): raise NotImplementedError() code = "{:<15} = Activation(name = '{}', activation = 'tanh')({})".format( IR_node.replace_scope(IR_node.name), IR_node.name, IR_node.replace_scope(IR_node.in_edges[0])) return code def emit_Relu(self, IR_node): self.add_body(2, "{:<15} = F.relu({})".format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) def emit_Softmax(self, IR_node): self.add_body(2, "{:<15} = F.softmax({})".format( IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) def emit_Sigmoid(self, IR_node): code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format( IR_node.replace_scope(IR_node.name), IR_node.name, IR_node.replace_scope(IR_node.in_edges[0])) return code def emit_Embedding(self, IR_node): raise NotImplementedError() ret = "{:<15} = Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format( IR_node.name, IR_node.IR_layer.attr['input_dim'].i, IR_node.IR_layer.attr['output_dim'].i, IR_node.IR_layer.attr['mask_zero'].b, IR_node.in_edges[0]) return ret def emit_RNNs(self, IR_node, func): raise NotImplementedError() # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code def emit_LSTM(self, IR_node): return self.emit_RNNs(IR_node, "LSTM") def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") def emit_Add(self, IR_node): self.add_body(2, "{:<15} = {}".format( IR_node.variable_name, '+ '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) @staticmethod def _convert_axis(IR_node, axis): ndim = len(IR_node.get_attr('_output_shapes')[0].dim) if axis == 0: return 0 elif axis == ndim - 1: return 1 else: return axis + 1 def emit_Concat(self, IR_node): axis = self._convert_axis(IR_node, IR_node.get_attr('axis')) self.add_body(2, "{:<15} = torch.cat(({}), {})".format( IR_node.variable_name, ', '.join(self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges), axis, )) def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2 self.add_init(2, "self.{} = self.__batch_normalization({}, '{}', num_features={}, eps={}, momentum={})".format( IR_node.variable_name, dim, IR_node.name, IR_node.layer.attr['_output_shapes'].list.shape[0].dim[-1].size, IR_node.layer.attr['epsilon'].f, IR_node.layer.attr['momentum'].f, )) self.add_body(2, "{:<15} = self.{}({})".format( IR_node.variable_name, IR_node.variable_name, self.parent_variable_name(IR_node) )) def emit_Squeeze(self, IR_node): self.add_body(2, "{:<15} = torch.squeeze({})".format( IR_node.variable_name, self.parent_variable_name(IR_node) )) @staticmethod def _convert_padding(IR_node): padding = IR_node.get_attr('pads') padding = convert_onnx_pad_to_tf(padding)[1:-1] new_padding = [] for pad in padding: new_padding.insert(0, pad) return tuple(np.array(new_padding).reshape(-1).tolist()) def emit_Pad(self, IR_node): if IR_node.get_attr('mode') == 'constant': mode = "mode = 'constant', value = {}".format(0) elif IR_node.get_attr('mode') == 'reflect': mode = "mode = 'reflect'" elif IR_node.get_attr('mode') == 'SYMMETRIC': mode = "mode = 'replicate'" else: assert False padding = self._convert_padding(IR_node) self.add_body(2, "{:<15} = F.pad({}, {}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), padding, mode)) def emit_ReduceMean(self, IR_node): axes = [self._convert_axis(IR_node, x) for x in IR_node.get_attr('axes')] input_node = self.parent_variable_name(IR_node) for axis in sorted(axes, reverse=True): self.add_body(2, "{:<15} = torch.mean({}, {}, {})".format( IR_node.variable_name, input_node, axis, IR_node.get_attr("keepdims") )) input_node = IR_node.variable_name def emit_LRN(self, IR_node): self.used_layers.add(IR_node.type) self.add_body(2, "{:<15} = self.LRN(size = {}, alpha = {}, beta = {})({})".format( IR_node.variable_name, IR_node.layer.attr['size'].i * 2 - 1, IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f, self.parent_variable_name(IR_node) )) def _layer_Conv(self): self.add_body(0, """ @staticmethod def __conv(dim, name, **kwargs): if dim == 1: layer = nn.Conv1d(**kwargs) elif dim == 2: layer = nn.Conv2d(**kwargs) elif dim == 3: layer = nn.Conv3d(**kwargs) else: raise NotImplementedError() layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) return layer""") def _layer_FullyConnected(self): self.add_body(0, """ @staticmethod def __dense(name, **kwargs): layer = nn.Linear(**kwargs) layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) return layer""") def _layer_BatchNorm(self): self.add_body(0, """ @staticmethod def __batch_normalization(dim, name, **kwargs): if dim == 1: layer = nn.BatchNorm1d(**kwargs) elif dim == 2: layer = nn.BatchNorm2d(**kwargs) elif dim == 3: layer = nn.BatchNorm3d(**kwargs) else: raise NotImplementedError() if 'scale' in __weights_dict[name]: layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['scale'])) else: layer.weight.data.fill_(1) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) else: layer.bias.data.fill_(0) layer.state_dict()['running_mean'].copy_(torch.from_numpy(__weights_dict[name]['mean'])) layer.state_dict()['running_var'].copy_(torch.from_numpy(__weights_dict[name]['var'])) return layer""") def _layer_LRN(self): self.add_body(0, """ class LRN(nn.Module): def __init__(self, size=1, alpha=1.0, beta=0.75, ACROSS_CHANNELS=False): super(KitModel.LRN, self).__init__() self.ACROSS_CHANNELS = ACROSS_CHANNELS if self.ACROSS_CHANNELS: self.average=nn.AvgPool3d(kernel_size=(size, 1, 1), stride=1, padding=(int((size-1.0)/2), 0, 0)) else: self.average=nn.AvgPool2d(kernel_size=size, stride=1, padding=int((size-1.0)/2)) self.alpha = alpha self.beta = beta def forward(self, x): if self.ACROSS_CHANNELS: div = x.pow(2).unsqueeze(1) div = self.average(div).squeeze(1) div = div.mul(self.alpha).add(1.0).pow(self.beta) else: div = x.pow(2) div = self.average(div) div = div.mul(self.alpha).add(1.0).pow(self.beta) x = x.div(div) return x""")
class MMGraph: """ MMDNN Graph Class Args: graphfile (str): MMDNN graphfile weightfile(str, optional): MMDNN weightfile, dropped anyways Attributes: IR_graph : mmdnn Intermediate Representation Graph """ def __init__(self, graphfile, weightfile=None): print("Initializing network...") self.graphfile = graphfile self.weightfile = weightfile self.IR_graph = IRGraph(self.graphfile) self.IR_graph.build() self.IR_graph.model = 1 if self.weightfile is None: logging.info("No weights file loaded\n") else: logging.info("Load weights...\n") try: self.weights_dict = np.load(self.weightfile, allow_pickle=True).item() except: self.weights_dict = np.load(self.weightfile, encoding='bytes', allow_pickle=True).item() self.analyze_net() print("Network analyzed successfully...\n") def analyze_net(self): """Walk through net and compute attributes""" # TODO look for DataInput layer and add if necessary """ for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type #find input layers if not current_node.in_edges and not(current_node.type in ['DataInput']) : print(current_node.type) """ for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) #node_type = current_node.type self.fix_shape_names(current_node) def fix_shape_names(self, layer): """Fixed shape_names Arguments: layer (obj): layer to fix names and shapes """ if not (layer.type in ['yolo']): output_shape = layer.get_attr('_output_shape') # For tensorflow models it is called output_shapes if output_shape is None: output_shape = layer.get_attr('_output_shapes') output_shape = shape_to_list(output_shape[0]) layer.set_attrs({'output_shape': output_shape}) if not (layer.type in ['DataInput']): if (layer.in_edges): innode = self.IR_graph.get_node(layer.in_edges[0]) input_shape = innode.get_attr('_output_shape') # For tensorflow models it is called output_shapes if input_shape is None: input_shape = innode.get_attr('_output_shapes') input_shape = shape_to_list(input_shape[0]) layer.set_attrs({'input_shape': input_shape}) def fix_depthwise(self, layer): """Fixed depthwise layers Arguments: layer (obj): layer to fix names and shapes """ if layer.type in ['Conv']: output_shape = layer.get_attr('_output_shape') # For tensorflow models it is called output_shapes if output_shape is None: output_shape = layer.get_attr('_output_shapes') output_shape = shape_to_list(output_shape[0]) group = layer.get_attr('group') if not (group is None): logging.debug(layer.name) logging.debug(group) logging.debug(output_shape) if group == output_shape[3]: return 'DepthwiseConv' return layer.type def convert_to_annette(self, name): """Convert MMDNN to Annette graph Arguments: name (str): Network name Return: annette_graph (obj) """ annette_graph = AnnetteGraph(name) # TODO for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) logging.debug(current_node.type) node_type = self.fix_depthwise(current_node) layer_dict = {'type': node_type} layer_name = current_node.name logging.debug(current_node.in_edges) logging.debug(current_node.out_edges) layer_dict['parents'] = current_node.in_edges layer_dict['children'] = current_node.out_edges attributes = [ 'output_shape', 'input_shape', 'kernel_shape', 'strides', 'pads', 'pooling_type', 'global_pooling', 'dilations', 'axis' ] for attr in attributes: tmp = current_node.get_attr(attr) if tmp is not None: layer_dict[attr] = tmp if layer_dict['type'] in ['DepthwiseConv' ] and attr == 'kernel_shape': tmp[3] = 1 layer_dict[attr] = tmp annette_graph.add_layer(layer_name, layer_dict) return annette_graph