def convert_datasets_to_tensor(name, obj): if type(obj) == h5py.Dataset: name = os.path.basename( name ) # leave only last chunk of the tensor name, such as 'kernel:0' try: index = klass.in_args[name] tensors[index] = Struct(name=obj.name, shape=obj.shape, data=obj[:]) if index == 0 or -1 not in tensors: tensors[-1] = tensors[ index] # use '-1' as 'default' tensor except KeyError: print('SKIP: unknown tensor', name)
def convert(source_file, target_file, trim_unused_by_output="", verbose=False, compress_f16=False): """ Converts a Keras model into a Barracuda model. :param source_file: The Keras Model :param target_file: The name of the file the converted model will be saved to :param trim_unused_by_output: The regexp to match output nodes to remain in the model. All other uconnected nodes will be removed. :param verbose: If True, will display debug messages :param compress_f16: If true, the float values will be converted to f16 :return: """ if (type(verbose) == bool): args = Struct() args.verbose = verbose args.print_layers = verbose args.print_source_json = verbose args.print_barracuda_json = verbose args.print_layer_links = verbose args.print_patterns = verbose args.print_tensors = verbose else: args = verbose if args.print_supported_ops: barracuda.print_known_operations(known_classes, known_activations) # Load Keras model print("Converting %s to %s" % (source_file, target_file)) i_model = h5py.File(source_file, 'r') configJSON = json.loads(i_model.attrs['model_config'].decode('utf-8')) layers = configJSON['config'] model_tensors = i_model['model_weights'] if args.print_source_json or args.verbose: pprint(configJSON) # Convert o_model = barracuda.Model() o_model.layers, o_input_shapes, o_model.memories = \ process_model(layers, model_tensors, args) # Gather patched model tensors for l in o_model.layers: for x in l.tensors: o_model.tensors[x.name] = x # Trim if trim_unused_by_output: o_model.layers = barracuda.trim(o_model.layers, trim_unused_by_output, args.verbose) # Find model inputs & outputs all_layers = {l.name for l in o_model.layers} all_inputs = {i for l in o_model.layers for i in l.inputs} # global inputs - are inputs that are NOT connected to any layer in the network # global outputs - are outputs that are NOT feeding any layer in the network o_model.inputs = { i: o_input_shapes[i] for l in o_model.layers for i in l.inputs if i not in all_layers } o_model.outputs = [ l.name for l in o_model.layers if l.name not in all_inputs ] # Compress if compress_f16: o_model = barracuda.compress(o_model) # Summary barracuda.summary(o_model, print_layer_links=args.print_layer_links or args.verbose, print_barracuda_json=args.print_barracuda_json or args.verbose, print_tensors=args.print_tensors or args.verbose) # Write to file barracuda.write(o_model, target_file) print('DONE: wrote', target_file, 'file.')
def process_model( layers, model_tensors, args, map_ignored_layer_to_its_input={}, o_context=ModelBuilderContext() ): #model_tensors, input_shapes, map_ignored_layer_to_its_input = {}): prev_layer_name = '' if 'get' in dir(layers): layers = layers.get('layers') # special handling for Sequential model case in Keras # when the 1st layer can define the shape of the input if layers: o_context.input_shapes[prev_layer_name] = get_input_layer_shape( layers[0]['config']) for layer in layers: name = layer['config']['name'] class_name = layer['class_name'] inputs = extract_strings( layer.get('inbound_nodes')) or [prev_layer_name] inputs = replace_strings_in_list(inputs, map_ignored_layer_to_its_input) if args.print_layers or args.verbose: print("'%s' %s %s" % (name, class_name, inputs)) if class_name in transient_classes: transient_classes[class_name](layer['config'], name, inputs, model_tensors, args, o_context) continue if not class_name in known_classes: if class_name in requires_runtime_flag: print('SKIP:', class_name, 'layer is used only for training') else: print('IGNORED:', class_name, 'unknown layer') map_ignored_layer_to_its_input[name] = inputs continue klass = known_classes[class_name] if type(klass) == int: klass = Struct(id=klass) o_l = Struct() o_l.type = klass.id o_l.class_name = class_name o_l.name = name o_l.inputs = inputs activation = layer['config'].get('activation') axis = layer['config'].get('axis') padding = layer['config'].get('padding') strides = layer['config'].get('strides') pool_size = layer['config'].get('pool_size') size = layer['config'].get('size') use_bias = layer['config'].get('use_bias') data_frmt = layer['config'].get('data_format') alpha = layer['config'].get('alpha') beta = layer['config'].get('beta') if activation and not activation in known_activations: print('IGNORED: unknown activation', activation) if padding and not padding in known_paddings: print('IGNORED: unknown padding', padding) if data_frmt and not data_frmt in supported_data_formats: print('UNSUPPORTED: data format', data_frmt) o_l.activation = known_activations.get(activation) or 0 o_l.pads = known_paddings.get( padding) if padding else [0, 0, 0, 0] or [0, 0, 0, 0] o_l.strides = strides or [] o_l.pool_size = pool_size or size or [] o_l.use_bias = embody(use_bias, default=True) o_l.axis = embody(axis, default=-1) o_l.alpha = embody(alpha, default=1) o_l.beta = beta or 0 tensors = {} # Process input arguments if hasattr(klass, 'in_args'): if isinstance(klass.in_args, list): klass.in_args = { name: idx for idx, name in enumerate(klass.in_args) } def convert_datasets_to_tensor(name, obj): if type(obj) == h5py.Dataset: name = os.path.basename( name ) # leave only last chunk of the tensor name, such as 'kernel:0' try: index = klass.in_args[name] tensors[index] = Struct(name=obj.name, shape=obj.shape, data=obj[:]) if index == 0 or -1 not in tensors: tensors[-1] = tensors[ index] # use '-1' as 'default' tensor except KeyError: print('SKIP: unknown tensor', name) try: layer_tensors = model_tensors[o_l.name] layer_tensors.visititems(convert_datasets_to_tensor) except KeyError: # no tensors with specified name, op does not require tensor args pass # Set defaults for missing argument tensors if hasattr(klass, 'defaults'): assert (hasattr(klass, 'in_args')) index_to_arg_name = {v: k for k, v in klass.in_args.items()} default_shape = tensors[-1].shape for index, default in enumerate(klass.defaults): if index not in tensors and klass.defaults[index] != None: data = klass.defaults[index](default_shape) if args.verbose: print(name + ':' + index_to_arg_name[index], 'default to', data[0]) tensors[index] = Struct( name=('/model_weights/default/%s/%s') % (name, index_to_arg_name[index]), shape=np.shape(data), data=data) # Patch tensor data if hasattr(klass, 'patch_data'): data = {i: x.data for i, x in tensors.items()} patch_data_fn = klass.patch_data patch_data_expected_arg_count = patch_data_fn.__code__.co_argcount patch_data_args = ( data, layer) if patch_data_expected_arg_count > 1 else (data, ) tensor_data = patch_data_fn(*patch_data_args) for i, data in enumerate(tensor_data): tensors[i].data = data # Force all tensors to float32 for x in tensors.values(): x.data = x.data.astype(np.float32) # Patch shapes and write out tensors o_l.tensors = [] if hasattr(klass, 'out_shapes'): shapes = klass.out_shapes({i: x.shape for i, x in tensors.items()}) for i, shape in enumerate(shapes): tensors[i].shape = shape o_l.tensors.append(tensors[i]) else: # no 'out_shapes' lambda was specified, op does not require tensor args pass # Layer is ready o_context.layers.append(o_l) prev_layer_name = o_l.name return o_context.layers, o_context.input_shapes, o_context.model_memories
def get_epsilon(layer): # default epsilon taken from https://keras.io/layers/normalization/ return layer['config'].get('epsilon') or 0.001 add_zero_bias_as_2nd_arg = [ None, lambda default_shape: np.zeros(default_shape[-1]) ] known_classes = { 'Dense': Struct( id=1, in_args=['kernel:0', 'bias:0'], defaults=add_zero_bias_as_2nd_arg, out_shapes=lambda shapes: [ [shapes[0][0], 1, 1, shapes[0][1]], # W [1, 1, 1, shapes[1][0]] # B ]), 'Conv2D': Struct( id=20, in_args=['kernel:0', 'bias:0'], defaults=add_zero_bias_as_2nd_arg, out_shapes=lambda shapes: [ shapes[0], # K [1, 1, 1, shapes[1][0]] # B ]), 'SeparableConv2D': Struct( id=21,
def convert(source_file, target_file, trim_unused_by_output="", verbose=False, compress_f16=False): """ Converts a ONNX model into a Barracuda model. :param source_file: The ONNX Model :param target_file: The name of the file the converted model will be saved to :param trim_unused_by_output: The regexp to match output nodes to remain in the model. All other uconnected nodes will be removed. :param verbose: If True, will display debug messages :param compress_f16: If true, the float values will be converted to f16 :return: """ if (type(verbose) == bool): args = Struct() args.verbose = verbose args.print_layers = verbose args.print_source_json = verbose args.print_barracuda_json = verbose args.print_layer_links = verbose args.print_patterns = verbose args.print_tensors = verbose else: args = verbose # Load ONNX model print("Converting %s to %s" % (source_file, target_file)) i_model = onnx.load(source_file) if args.print_source_json or args.verbose: for layer in i_model.graph.node: print(MessageToJson(layer) + ",") # Convert o_model = barracuda.Model() o_model.layers, o_input_shapes, o_model.tensors, o_model.memories = \ process_model(i_model, args) # Trim if trim_unused_by_output: o_model.layers = barracuda.trim(o_model.layers, trim_unused_by_output, args.verbose) # Find model inputs & outputs all_inputs = {i for l in o_model.layers for i in l.inputs} # Create load layers for constants const_tensors = [i for i in all_inputs if i in o_model.tensors] const_tensors += o_model.globals for x in const_tensors: shape = adapt_input_shape(o_model.tensors[x].dims) if hasattr( o_model.tensors[x], 'dims') and len(o_model.tensors[x].dims) > 0 else [1, 1, 1, 1] o_l = Struct( type=255, # Load class_name="Const", name=x, pads=[0, 0, 0, 0], strides=[], pool_size=[], axis=-1, alpha=1, beta=0, activation=0, inputs=[], tensors=[ Struct(name=x, shape=shape, data=np.reshape(get_tensor_data(o_model.tensors[x]), shape).astype(np.float32)) ]) o_model.layers.insert(0, o_l) all_layers = {l.name for l in o_model.layers} # global inputs - are inputs that are NOT connected to any layer in the network # global outputs - are outputs that are NOT feeding any layer in the network o_model.inputs = { i: o_input_shapes[i] for l in o_model.layers for i in l.inputs if i not in all_layers } o_model.outputs = [ l.name for l in o_model.layers if l.name not in all_inputs ] # Compress if compress_f16: o_model = barracuda.compress(o_model) # Summary barracuda.summary(o_model, print_layer_links=args.print_layer_links or args.verbose, print_barracuda_json=args.print_barracuda_json or args.verbose, print_tensors=args.print_tensors or args.verbose) # Write to file barracuda.write(o_model, target_file) print('DONE: wrote', target_file, 'file.')
def process_layer(layer, context, args): model_tensors = context.model_tensors map_ignored_layer_to_its_input = context.map_ignored_layer_to_its_input name = layer.output[0] if len( layer.output ) > 0 else layer.name # prefer node.output over the node.name class_name = layer.op_type inputs = layer.input # ONNX inputs are always explicit, but in case of Keras we had 'inputs = layer.input or [prev_layer_name]' inputs = replace_strings_in_list(inputs, map_ignored_layer_to_its_input) if class_name == 'Constant': model_tensors[name] = get_attr(layer, 'value') model_tensors[name].name = name #print('CONST:', name, model_tensors[name].dims, struct.unpack('<'+str(np.prod(model_tensors[name].dims))+'f', model_tensors[name].raw_data)) return if args.print_layers or args.verbose: print("'%s' %s %s" % (name, class_name, inputs)) if class_name in known_activations: activation = class_name class_name = 'Activation' else: activation = 'Linear' if not class_name in known_classes: if class_name in requires_runtime_flag: print('SKIP:', class_name, 'layer is used only for training') else: print('IGNORED:', class_name, 'unknown layer') map_ignored_layer_to_its_input[name] = inputs return klass = known_classes[class_name] if type(klass) == int: klass = Struct(id=klass) o_l = Struct() o_l.type = klass.id(layer) if callable(klass.id) else klass.id o_l.class_name = class_name o_l.name = name axis = axis_to_NHWC(get_attr(layer, 'axis', -1)) auto_pad = get_attr(layer, 'auto_pad') pads = get_attr(layer, 'pads') strides = get_attr(layer, 'strides') pool_size = get_attr(layer, 'kernel_shape') shape = get_attr(layer, 'shape') starts = get_attr(layer, 'starts') ends = get_attr(layer, 'ends') slice_strides = [1, 1, 1, 1] if starts and ends else [] #TODO properly extract scale from const Tensor for Upsample layers size = [get_attr(layer, 'height_scale'), get_attr(layer, 'width_scale')] if get_attr( layer, 'width_scale') and class_name == 'Upsample' else [2, 2] alpha = get_attr(layer, 'alpha') or get_attr(layer, 'ratio') or get_attr( layer, 'value') beta = get_attr(layer, 'beta') or get_attr(layer, 'epsilon') # TODO: decide what to do with 'is_test' attribute if auto_pad and not auto_pad in known_paddings: print('IGNORED: unknown padding', auto_pad) if size == [None, None]: size = None if size: size = np.array(size).astype(int).tolist() o_l.activation = known_activations.get(activation) or 0 o_l.pads = known_paddings.get( auto_pad) if auto_pad else pads or starts or [0, 0, 0, 0] o_l.strides = strides or slice_strides or [] o_l.pool_size = pool_size or size or shape or ends or [] o_l.axis = embody(axis, default=-1) o_l.alpha = embody(alpha, default=1) o_l.beta = beta or 0 # Patch shapes & data try: tensor_names = [i for i in inputs if i in model_tensors] o_l.tensors = [ Struct(name=model_tensors[x].name, shape=model_tensors[x].dims, data=get_tensor_data(model_tensors[x])) for x in tensor_names ] o_l.inputs = [i for i in inputs if i not in model_tensors] shapes = klass.patch_shapes([x.shape for x in o_l.tensors]) # if we have more shapes than actual tensors, # then create & fill missing tensors with zeros in_tensor_num = len(o_l.tensors) for index, new_shape in enumerate(shapes): if index >= in_tensor_num: new_tensor = Struct(name=('/model_weights/%s/%s/patch:%i') % (name, name, index - in_tensor_num), shape=new_shape, data=np.zeros(new_shape)) o_l.tensors.append(new_tensor) assert (len(shapes) <= len(o_l.tensors)) if hasattr(klass, 'patch_data'): data = [x.data for x in o_l.tensors] patch_data_fn = klass.patch_data patch_data_expected_arg_count = patch_data_fn.__code__.co_argcount patch_data_args = ( data, layer) if patch_data_expected_arg_count > 1 else (data, ) tensor_data = patch_data_fn(*patch_data_args) o_l.tensors = o_l.tensors[:len( tensor_data )] # resize tensor array to match patched data - patching might reduce number of tensors for x, data in zip(o_l.tensors, tensor_data): x.data = data # after this point we should have equal amount of shapes and tensors assert (len(o_l.tensors) == len(shapes)) for x, shape in zip(o_l.tensors, shapes): x.shape = shape except AttributeError: # no 'patch_data' lambda was specified, op does not require tensor args o_l.tensors = [] o_l.inputs = inputs try: attrs = klass.patch_attrs(o_l, layer) for k, v in attrs.items(): o_l.__dict__[k] = v except AttributeError: pass # no 'patch_attrs' lambda was specified # Force all tensors to float32 for x in o_l.tensors: x.data = x.data.astype(np.float32) # Layer is ready context.layers.append(o_l)
# ONNX format: # See: https://github.com/onnx/onnx/blob/master/onnx/onnx.proto # ONNX schema: https://github.com/onnx/onnx/blob/master/docs/Operators.md # ONNX conventions: https://github.com/onnx/onnx/blob/master/docs/OpConventions.md def get_epsilon(layer): # default epsilon taken from https://github.com/onnx/onnx/blob/master/docs/Operators.md#BatchNormalization return get_attr(layer, 'epsilon', default=1e-05) known_classes = { 'Gemm': Struct( id=1, patch_shapes=lambda shapes: [ shape_to_HW(shapes[0]), # W bias(shape_to_HW(shapes[-1])), # B ], patch_data=lambda data: [data_to_HW(data[0]), data[1]]), 'MatMul': Struct( id=1, patch_shapes=lambda shapes: [ shape_to_HW(shapes[0]), # W bias(shape_to_HW(shapes[-1])), # ZERO ], patch_data=lambda data: [data_to_HW(data[0]), np.zeros(np.shape(data[1]))]), 'Conv': Struct( id=lambda layer: 21
def convert(source_file, target_file, trim_unused_by_output="", verbose=False, compress_f16=False): """ Converts a ONNX model into a Barracuda model. :param source_file: The ONNX Model :param target_file: The name of the file the converted model will be saved to :param trim_unused_by_output: The regexp to match output nodes to remain in the model. All other uconnected nodes will be removed. :param verbose: If True, will display debug messages :param compress_f16: If true, the float values will be converted to f16 :return: """ if (type(verbose) == bool): args = Struct() args.verbose = verbose args.print_layers = verbose args.print_source_json = verbose args.print_barracuda_json = verbose args.print_layer_links = verbose args.print_patterns = verbose args.print_tensors = verbose else: args = verbose if args.print_supported_ops: barracuda.print_known_operations(known_classes, known_activations) # Load ONNX model print("Converting %s to %s" % (source_file, target_file)) i_model = onnx.load(source_file) if args.print_source_json or args.verbose: for layer in i_model.graph.node: print(MessageToJson(layer) + ",") # Convert o_model = barracuda.Model() o_model.layers, o_input_shapes, o_model.tensors, o_model.memories, o_model.globals = \ process_model(i_model, args) # Trim if trim_unused_by_output: o_model.layers = barracuda.trim(o_model.layers, trim_unused_by_output, args.verbose) # Create load layers for constants def dims_to_barracuda_shape(tensor): if hasattr(tensor, 'dims') and len(tensor.dims) > 0: return adapt_input_shape(tensor.dims) return [1, 1, 1, 1] barracuda.setup_constants(o_model, lambda tensor: dims_to_barracuda_shape(tensor), lambda tensor: get_tensor_data(tensor)) # Find model inputs & outputs all_inputs = {i for l in o_model.layers for i in l.inputs} all_layers = {l.name for l in o_model.layers} # global inputs - are inputs that are NOT connected to any layer in the network # global outputs - are outputs that are NOT feeding any layer in the network o_model.inputs = { i: o_input_shapes[i] for l in o_model.layers for i in l.inputs if i not in all_layers } def is_output_layer(layer): if layer.name in all_inputs: # Only layers that do not input to other layers can count as global output return False if layer.name in o_model.globals: return False return True o_model.outputs = [l.name for l in o_model.layers if is_output_layer(l)] # Compress if compress_f16: o_model = barracuda.compress(o_model) # Sort model so that layer inputs are always ready upfront o_model.layers = barracuda.sort(o_model.layers, o_model.inputs, o_model.memories, args.verbose) o_model.layers = barracuda.fuse(o_model.layers, args.verbose) # Summary barracuda.summary(o_model, print_layer_links=args.print_layer_links or args.verbose, print_barracuda_json=args.print_barracuda_json or args.verbose, print_tensors=args.print_tensors or args.verbose) # Write to file barracuda.write(o_model, target_file) print('DONE: wrote', target_file, 'file.')