def get_concat_quantize_layers(bottom_Xs, X, top_Xs, mse_opt_num, bitwidth, new_xlayer_names): # type: (List[XLayer], XLayer, List[XLayer], int, int dict) -> List[XLayer] assert bitwidth == 8 new_Xs = [] X = X._replace(bottoms=[new_xlayer_names[bottom] for bottom in X.bottoms]) new_Xs.append(X) # Threshold layer th_out_var_name = X.name + '_th_out' th_out_var_attrs = { # 'init_value': np.array(1.), 'dtype': 'float32' } th_out_var_layer = defaultXLayer() th_out_var_layer = th_out_var_layer._replace(type=['Variable'], name=th_out_var_name, shapes=[1], attrs=th_out_var_attrs, bottoms=[], data=[np.array(1.)], tops=[]) new_Xs.append(th_out_var_layer) # Quantize layer quant_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', # TODO?? 'axis': 0, # TODO NCHW 'mse_opt_num': mse_opt_num } quant_X = defaultXLayer() quant_X = quant_X._replace(type=['MSEQuantize'], name=X.name + "_quantize", shapes=X.shapes[:], attrs=quant_attrs, bottoms=[X.name, th_out_var_name], tops=[]) new_Xs.append(quant_X) return new_Xs
def test_xlayer_factory(self): X1 = defaultXLayer() assert X1.layer == lpx.StrVector([]) assert X1.tops == lpx.StrVector([]) assert X1.bottoms == lpx.StrVector([]) assert X1.targets == lpx.StrVector([]) assert X1.target == 'cpu' X1 = X1._replace(name='test', tops=['test']) assert X1.name == 'test' assert X1.tops == lpx.StrVector(['test']) X2 = defaultXLayer() assert X2.layer == lpx.StrVector([]) assert X2.tops == lpx.StrVector([]) assert X2.bottoms == lpx.StrVector([]) assert X2.targets == lpx.StrVector([])
def replace_func(bottom_Xs, X, top_Xs): """ Replace Convolution with Pooling operation """ new_Xs = [] if X.type[0] in ['Convolution']: new_X = defaultXLayer() new_X = new_X._replace(type=['Pooling'], name=X.name, shapes=X.shapes, sizes=X.sizes, bottoms=X.bottoms, tops=X.tops) new_Xs.append(new_X) else: new_Xs.append(X) return new_Xs
def xgraph_build_func(xgraph: XGraph, target: str, xtype, layout='NCHW', **kwargs) -> XGraph: fancy_logger.banner("Subgraph build func, target: {}, layout: {}".format( target, layout)) compiler_output = xgraph.get_compiler_output() if xgraph.is_compiled() \ else None compiler_output_keys = list(compiler_output.keys()) \ if compiler_output else [] logger.debug("Compiler output keys: {}".format(compiler_output_keys)) if layout not in ['NCHW', 'NHWC']: raise ValueError( "Supported layouts are [NCHW, NHWC] but got: {}".format(layout)) layout_transform_pass = \ XGraphLayoutTransformationPass(layout, target=target) xgraph = layout_transform_pass.execute(xgraph, subgraphs_only=False) xgraph_factory = XGraphFactory() xgraph_partitioner = XGraphPartitioner() subgraphs = { xp.name: xp for xp in xgraph_partitioner.get_subgraphs(xgraph) } # Retrieve CompilerOutput if available # compiler_output = xgraph.get_compiler_output() if xgraph.is_compiled() \ # else None # compiler_output_keys = list(compiler_output.keys()) \ # if compiler_output else [] # logger.debug("Compiler output keys: {}".format(compiler_output_keys)) # Keep track of the visited partitions/subgraphs and the layers # inside the partition visited_xps = {} # Keep track of the subgraph output tensors and the corresponding # new layers (TupleGetItem or Transpose) xp_out_tensors_2_layers = {} name_changes = {} net_map = {} net = [] for X in xgraph.get_layers(): if X.subgraph is not None and X.subgraph not in visited_xps: Xp = subgraphs[X.subgraph] if 'target' in Xp.attrs and Xp.attrs['target'] == target: visited_xps[Xp.name] = set([X.name]) logger.debug("XSHAPES: {}".format(X.shapes)) bottoms = Xp.bottoms # Keep track of subgraph input and output names sub_xgraph = xgraph_factory.build_from_xlayer(Xp.subgraph_data) input_names = Xp.attrs['input_names'][:] output_names = Xp.attrs['output_names'][:] input_layers = \ [sub_xgraph.get(in_name) for in_name in input_names] output_layers = \ [sub_xgraph.get(out_name) for out_name in output_names] attrs = { 'input_names': input_names, 'output_names': output_names, 'input_layers': {il.name: il.layer[:] for il in input_layers}, 'output_layers': {ol.name: ol.layer[:] for ol in output_layers} } for k, v in kwargs.items(): if k in attrs: raise ValueError("Provided claimed subgraph layer" " key: {}".format(k)) attrs[k] = v if Xp.name in compiler_output_keys: attrs['rt_in_map'] = compiler_output.get_in_map(Xp.name) for in_name in input_names: for merged_layer in attrs['input_layers'][in_name]: attrs['rt_in_map'][merged_layer] = \ attrs['rt_in_map'][in_name] attrs['rt_out_map'] = compiler_output.get_out_map(Xp.name) for out_name in output_names: for merged_layer in attrs['output_layers'][out_name]: attrs['rt_out_map'][merged_layer] = \ attrs['rt_out_map'][out_name] Xp.attrs.update(attrs) shapes = Xp.shapes[:] subgraph_X = Xp._replace( # name = X.name, type=[xtype], shapes=shapes, bottoms=bottoms, # Fill tops later tops=[], subgraph_data=[]) net.append(subgraph_X.name) net_map[Xp.name] = subgraph_X # Subgraph layers have multiple outputs (Tuple) so we # retrieve the different subgraph outputs # (see output_names variable) using a TupleGetItem # layer top_tensors = Xp.attrs['__top_tensors'] for i, output_name in enumerate(output_names): # Handle merged layers out_tensor = Xp.attrs['output_layers'][output_name][-1] tgi_name = out_tensor # tgi_name = subgraph_X.name + '_tgi' + str(i) top_tensor = top_tensors[output_name] shapes = subgraph_X.shapes[i][:] X_tgi = defaultXLayer() X_tgi = X_tgi._replace(name=tgi_name, type=['TupleGetItem'], shapes=shapes, sizes=shapes.get_size(), layer=[tgi_name], tops=top_tensor[:], bottoms=[subgraph_X.name], internal=1, attrs={'index': i}) net.append(X_tgi.name) # Keep track of TGI layer for both last merged layer and output name net_map[tgi_name] = X_tgi net_map[output_name] = X_tgi subgraph_X.tops.append(tgi_name) xp_out_tensors_2_layers[output_name] = tgi_name else: net.append(X.name) net_map[X.name] = X elif X.subgraph is not None and X.subgraph in visited_xps: # Remove layer visited_xps[X.subgraph].add(X.name) elif 'Transpose' in X.type: # Possibly merge transpose in TupleGetItem layer bX = net_map[X.bottoms[0]] new_tops = [] for t in bX.tops: if t != X.name: new_tops.append(t) elif len(X.tops) > 0: new_tops.append(X.tops[0]) if 'TupleGetItem' in bX.type: new_X = bX._replace(tops=new_tops) new_X.attrs['transpose'] = True new_X.attrs['axes'] = X.attrs['axes'] new_X.shapes[:] = TensorShape(X.shapes[:]) net_map[new_X.name] = new_X name_changes[X.name] = bX.name else: net.append(X.name) net_map[X.name] = X else: net.append(X.name) net_map[X.name] = X # Reflect possibly merged layers new_bottoms = [ b if b not in name_changes else name_changes[b] for b in X.bottoms ] if new_bottoms != X.bottoms: new_X = X._replace(bottoms=new_bottoms) net_map[X.name] = new_X # Set tops and bottoms & enforce topological sequence for xp in visited_xps.keys(): Xp = subgraphs[xp] for b in Xp.bottoms: top_name = Xp.name bX = xgraph.get(b) bX.tops = [(bXt if bXt not in visited_xps[Xp.name] else top_name) for bXt in bX.tops] for t in Xp.tops: tX = xgraph.get(t) tX.bottoms = [(tXb if tXb not in visited_xps[Xp.name] else xp_out_tensors_2_layers[tXb]) for tXb in tX.bottoms] # Topological sorting X_net = [net_map[e] for e in net] top_net = sort_topologically(X_net) sub_xgraph = xgraph_factory.build_from_xlayer(top_net) # Merge transposes if they are cancelling out # optimizer = XGraphTransposesOptimizer(sub_xgraph) # optimizer.optimize() return sub_xgraph
def get_convolution_quantization_layers(bottom_Ps, P, top_Ps, quant_params): # type: (Dict[str, XLayer], XLayer, # Dict[str, XLayer], Dict[str,dict]) -> List[XLayer] """ TODO: Make more modular """ new_Ps = [] W, B = P.data.weights, P.data.biases kernel_name = P.name + "_kernel" bias_name = P.name + "_biases" # KERNEL k_in_attrs = { 'dtype': 'float32', 'layout': 'None' } k_in_P = defaultXLayer() k_in_P = k_in_P._replace( type=['Input'], name=kernel_name, shapes=list(W.shape) ) new_Ps.append(k_in_P) if P.name in quant_params: k_quant_attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'quant_threshold': quant_params[P.name]['th_params'] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_threshold': quant_params[P.name]['th_params'], 'dtype': 'int8', 'input_types': ['float32'], 'axis': 0 # TODO: OIHW } k_quant_P = defaultXLayer() k_quant_P = k_quant_P._replace( type=['Quantize'], name=kernel_name + "_quantize", shapes=list(W.shape), attrs=k_quant_attrs, bottoms=[kernel_name] ) new_Ps.append(k_quant_P) # BIAS b_in_attrs = { 'dtype': 'float32', 'layout': 'None' } b_in_P = defaultXLayer() b_in_P = b_in_P._replace( type=['Input'], name=bias_name, shapes=list(B.shape) ) new_Ps.append(b_in_P) if P.name in quant_params: b_quant_attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'quant_threshold': quant_params[P.name]['th_layer_in'], # 'th_params': quant_params[P.name]['th_params'] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_threshold': quant_params[P.name]['th_layer_in'], 'quant_th_params': quant_params[P.name]['th_params'], 'dtype': 'int32', 'input_types': ['float32'] } b_quant_P = defaultXLayer() b_quant_P = b_quant_P._replace( type=['QuantizeBias'], name=bias_name + "_quantize", shapes=list(B.shape), attrs=b_quant_attrs, bottoms=[bias_name] ) new_Ps.append(b_quant_P) # CONVOLUTION if P.name in quant_params: # INPUT # Quantize bottoms attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'quant_threshold': [quant_params[P.name]['th_layer_in']] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_threshold': [quant_params[P.name]['th_layer_in']], 'dtype': 'int8', 'input_types': ['float32'], 'axis': 1 # TODO: NCHW } assert(len(P.bottoms) == 1) quant_bottom = defaultXLayer() quant_bottom = quant_bottom._replace( type=['Quantize'], name=P.bottoms[0] + "_quantize", shapes=bottom_Ps[0].shapes, attrs=attrs, bottoms=[P.bottoms[0]] ) new_Ps.append(quant_bottom) P = P._replace( bottoms=[bottom + "_quantize" for bottom in P.bottoms] + [kernel_name + '_quantize', bias_name + '_quantize'], tops=[] ) else: P = P._replace( bottoms=P.bottoms + [kernel_name, bias_name], tops=[] ) new_Ps.append(P) # QUANTIZE AFTER CONVOLUTION if P.name in quant_params: qi_attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'scale': quant_params[P.name]['scale'], # 'postscale_shift': quant_params[P.name]['postscale_shift'], # 'prescale_shift': quant_params[P.name]['prescale_shift'] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_scale': quant_params[P.name]['scale'], 'quant_postscale_shift': quant_params[P.name]['postscale_shift'], 'quant_prescale_shift': quant_params[P.name]['prescale_shift'], 'dtype': 'int8', # TODO?? 'input_types': ['float32'], 'axis': 1 # NCHW } quant_inter_P = defaultXLayer() quant_inter_P = quant_inter_P._replace( # type=['QuantizeInter'], type=['QuantizeInter12MSBits'], name=P.name + "_quantize_inter", shapes=P.shapes, attrs=qi_attrs, bottoms=[P.name] ) new_Ps.append(quant_inter_P) # UNQUANTIZE LAYER attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'quant_threshold': [quant_params[P.name]['th_layer_out']] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_threshold': [quant_params[P.name]['th_layer_out']], 'dtype': 'float32', 'input_types': ['int8'], 'axis': 1 # TODO: NCHW } unquant_P = defaultXLayer() unquant_P = unquant_P._replace( type=['UnQuantize'], name=P.name + "_unquantize", shapes=P.shapes, attrs=attrs, bottoms=[P.name + "_quantize_inter"] ) new_Ps.append(unquant_P) return new_Ps
def transform_layers(bottom_Xs, X, top_Xs): # type: (List[XLayer], XLayer, List[XLayer]) -> List[XLayer] """ Transform the layers with a specific layout """ new_Xs = [] layout_transform_ops = XGraphLayoutTransformationPass.xop_registry\ .get_xops_with_layout_transform() # TODO: make more extensible if X.type[0] in layout_transform_ops and\ (self.target is None or X.target == self.target): data_layout = X.attrs['data_layout'] if data_layout == self.target_data_layout: new_Xs.append(X) else: # Bottom transpose axes_b = [ data_layout.index(e) for e in self.target_data_layout ] tb_name = "{}_bottom_{}>{}".format(X.name, data_layout, self.target_data_layout) # tb_name = "{}_bottom_{}>{}-{}".format( # X.name.split("-")[0], # data_layout, # self.target_data_layout, # X.name.split("-")[-1] # ) input_shapes = bottom_Xs[0].shapes[:] input_sizes = bottom_Xs[0].sizes[:] tb_shape = [input_shapes[i] for i in axes_b] Tb = defaultXLayer() Tb = Tb._replace( name=tb_name, type=['Transpose'], shapes=tb_shape, sizes=input_sizes, layer=[tb_name], # tops ! tops=[], bottoms=[bottom_Xs[0].name], internal=1, attrs={'axes': axes_b}) logger.debug( "Insert bottom transpose: {}, axes: {}".format( tb_name, axes_b)) # Top transpose axes_t = [ self.target_data_layout.index(e) for e in data_layout ] # tt_name = "{}_top_{}>{}-{}".format( # X.name.split("-")[0], # self.target_data_layout, # data_layout, # X.name.split("-")[-1] # ) tt_name = "{}_top_{}>{}".format(X.name, self.target_data_layout, data_layout) input_sizes = X.sizes[:] tt_shape = X.shapes[:] Tt = defaultXLayer() Tt = Tt._replace( name=tt_name, type=['Transpose'], shapes=tt_shape, sizes=input_sizes, layer=[tt_name], # Tops ! tops=[], bottoms=[X.name], internal=1, attrs={'axes': axes_t}) logger.debug("Insert top transpose: {}, axes: {}".format( tt_name, axes_t)) # X new_bottoms = [(b if b != bottom_Xs[0].name else tb_name) for b in X.bottoms] # Call operation layout transformation function layout_transform_func = \ XGraphLayoutTransformationPass.xop_registry\ .get_xop_layout_transform(X.type[0]) layout_transform_func(X, self.target_data_layout) X = X._replace( bottoms=new_bottoms, # tops are filled later TODO tops=[], ) X.attrs['data_layout'] = self.target_data_layout new_Xs.append(Tb) new_Xs.append(X) new_Xs.append(Tt) else: new_Xs.append(X) return new_Xs
def get_scale_quantize_layers(bottom_Xs, X, top_Xs, mse_opt_num, bitwidth, new_xlayer_names): # type: (List[XLayer], XLayer, List[XLayer], int, int dict) -> List[XLayer] """ TODO: Make more modular """ new_Xs = [] # TODO train beta if threshold scaling layer G, B = X.data.gamma, X.data.beta gamma_name, beta_name = X.name + "_gamma", X.name + "_beta" # Scaling is executed as an elementwise layer in combination with # quantization scaling # ! Ignore gamma scaling values (they are already incorporated in # quantization parameters) # INPUT th_in_var_name = X.name + '_th_in' var_attrs = { # 'init_value': np.array(1.), 'dtype': 'float32' } var_layer = defaultXLayer() var_layer = var_layer._replace(type=['Variable'], name=th_in_var_name, shapes=[1], attrs=var_attrs, bottoms=[], data=[np.array(1.)], tops=[]) new_Xs.append(var_layer) quant_in_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', # TODO: input types? 'axis': 0, # TODO: NCHW 'mse_opt_num': mse_opt_num } quant_in_layer = defaultXLayer() quant_in_layer = quant_in_layer._replace( type=['MSEQuantize'], name=X.name + '_quantize_in', shapes=bottom_Xs[0].shapes, attrs=quant_in_attrs, bottoms=[new_xlayer_names[X.bottoms[0]], th_in_var_name], tops=[]) new_Xs.append(quant_in_layer) # GAMMA g_in_attrs = {'dtype': 'float32', 'layout': 'None'} g_in_X = defaultXLayer() g_in_X = g_in_X._replace(type=['Input'], name=gamma_name, shapes=list(G.shape), bottoms=[], tops=[], attrs=g_in_attrs) new_Xs.append(g_in_X) # BETA b_in_attrs = {'dtype': 'float32', 'layout': 'None'} b_in_X = defaultXLayer() b_in_X = b_in_X._replace(type=['Input'], name=beta_name, shapes=list(B.shape), bottoms=[], tops=[], attrs=b_in_attrs) new_Xs.append(b_in_X) b_quant_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', # TODO: input types? 'axis': 0 # TODO: NCHW } b_quant_X = defaultXLayer() b_quant_X = b_quant_X._replace( type=['MSEQuantizeBias'], name=beta_name + '_quantize', shapes=list(B.shape), attrs=b_quant_attrs, bottoms=[beta_name, th_in_var_name, gamma_name], tops=[]) new_Xs.append(b_quant_X) X = X._replace(bottoms=[X.name + '_quantize_in'] + [gamma_name, beta_name + '_quantize']) new_Xs.append(X) # Threshold layer th_out_var_name = X.name + '_th_out' th_out_var_attrs = { # 'init_value': np.array(1.), 'dtype': 'float32' } th_out_var_layer = defaultXLayer() th_out_var_layer = th_out_var_layer._replace(type=['Variable'], name=th_out_var_name, shapes=[1], attrs=var_attrs, bottoms=[], data=[np.array(1.)], tops=[]) new_Xs.append(th_out_var_layer) # Quantize layer quant_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', # TODO?? 'axis': 0, # TODO NCHW 'mse_opt_num': mse_opt_num } quant_X = defaultXLayer() quant_X = quant_X._replace(type=['MSEQuantize'], name=X.name + "_quantize", shapes=X.shapes[:], attrs=quant_attrs, bottoms=[X.name, th_out_var_name], tops=[]) new_Xs.append(quant_X) return new_Xs
def get_convolution_quantize_layers(bottom_Xs, X, top_Xs, mse_opt_num, bitwidth, new_xlayer_names): # type: (List[XLayer], XLayer, List[XLayer], int, int, dict) # -> List[XLayer] """ TODO: Make more modular """ new_Xs = [] W, B = X.data.weights, X.data.biases kernel_name = X.name + "_kernel" bias_name = X.name + "_biases" # INPUT th_in_var_name = X.name + '_th_in' var_attrs = { # 'init_value': np.array(1.), 'dtype': 'float32' } var_layer = defaultXLayer() var_layer = var_layer._replace(type=['Variable'], name=th_in_var_name, shapes=[1], attrs=var_attrs, bottoms=[], tops=[], data=[np.array(1.)], subgraph=X.subgraph) new_Xs.append(var_layer) quant_in_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', 'axis': 1 if X.attrs['data_layout'] == 'NCHW' else 3, 'mse_opt_num': mse_opt_num } quant_in_layer = defaultXLayer() quant_in_layer = quant_in_layer._replace( type=['MSEQuantize'], name=X.name + '_quantize_in', shapes=bottom_Xs[0].shapes[:], attrs=quant_in_attrs, bottoms=[new_xlayer_names[X.bottoms[0]], th_in_var_name], tops=[], subgraph=X.subgraph) new_Xs.append(quant_in_layer) # KERNEL k_in_attrs = {'dtype': 'float32', 'layout': 'None'} k_in_X = defaultXLayer() k_in_X = k_in_X._replace(type=['Input'], name=kernel_name, shapes=list(W.shape), bottoms=[], tops=[], subgraph=X.subgraph # TODO attrs ) new_Xs.append(k_in_X) th_params_var_name = X.name + '_th_params' th_params_var_attrs = { # 'init_value': np.ones(W.shape[0]), # OIHW 'dtype': 'float32' } th_params_var_layer = defaultXLayer() th_params_var_layer = th_params_var_layer._replace( type=['Variable'], name=th_params_var_name, shapes=[list(W.shape)[0]], # NCHW, attrs=th_params_var_attrs, bottoms=[], tops=[], data=[np.ones(W.shape[0])], subgraph=X.subgraph) new_Xs.append(th_params_var_layer) k_quant_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', # TODO: input types? 'axis': 0, # TODO: OIHW 'mse_opt_num': mse_opt_num } k_quant_X = defaultXLayer() k_quant_X = k_quant_X._replace(type=['MSEQuantize'], name=kernel_name + '_quantize', shapes=list(W.shape), attrs=k_quant_attrs, bottoms=[kernel_name, th_params_var_name], tops=[], subgraph=X.subgraph) new_Xs.append(k_quant_X) # BIAS b_in_attrs = {'dtype': 'float32', 'layout': 'None'} b_in_X = defaultXLayer() b_in_X = b_in_X._replace(type=['Input'], name=bias_name, shapes=list(B.shape), bottoms=[], tops=[], subgraph=X.subgraph) new_Xs.append(b_in_X) b_quant_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', # TODO: input types? 'axis': 0 # TODO: NCHW } b_quant_X = defaultXLayer() b_quant_X = b_quant_X._replace( type=['MSEQuantizeBias'], name=bias_name + '_quantize', shapes=list(B.shape), attrs=b_quant_attrs, bottoms=[bias_name, th_in_var_name, th_params_var_name], tops=[], subgraph=X.subgraph) new_Xs.append(b_quant_X) X = X._replace(bottoms=[X.name + '_quantize_in'] + [kernel_name + '_quantize', bias_name + '_quantize']) new_Xs.append(X) # Threshold layer th_out_var_name = X.name + '_th_out' var_attrs = { # 'init_value': np.array(1.), 'dtype': 'float32' } var_layer = defaultXLayer() var_layer = var_layer._replace(type=['Variable'], name=th_out_var_name, shapes=[1], attrs=var_attrs, bottoms=[], tops=[], data=[np.array(1.)], subgraph=X.subgraph) # variable_layers.add(th_in_var_name) new_Xs.append(var_layer) # Quantize layer quant_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', # TODO?? 'axis': 1 if X.attrs['data_layout'] == 'NCHW' else 3, 'mse_opt_num': mse_opt_num } quant_X = defaultXLayer() quant_X = quant_X._replace(type=['MSEQuantize'], name=X.name + "_quantize", shapes=X.shapes, attrs=quant_attrs, bottoms=[X.name, th_out_var_name], tops=[], subgraph=X.subgraph) new_Xs.append(quant_X) return new_Xs
def get_eltwise_quantize_layers(bottom_Xs, X, top_Xs, mse_opt_num, bitwidth, new_xlayer_names): # type: (List[XLayer], XLayer, List[XLayer], int, int dict) -> List[XLayer] """ TODO: Make more modular """ new_Xs = [] assert (len(bottom_Xs) == 2) # Input 0 th_in_var_name = X.name + '_th_in_0' var_attrs = { # 'init_value': np.array(1.), 'dtype': 'float32' } var_layer = defaultXLayer() var_layer = var_layer._replace(type=['Variable'], name=th_in_var_name, shapes=[1], attrs=var_attrs, bottoms=[], tops=[], data=[np.array(1.)], subgraph=X.subgraph) new_Xs.append(var_layer) quant_in_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', 'axis': 0, 'mse_opt_num': mse_opt_num } quant_in_layer = defaultXLayer() quant_in_layer = quant_in_layer._replace( type=['MSEQuantize'], name=X.name + '_quantize_in_0', shapes=bottom_Xs[0].shapes[:], attrs=quant_in_attrs, bottoms=[new_xlayer_names[X.bottoms[0]], th_in_var_name], tops=[], subgraph=X.subgraph) new_Xs.append(quant_in_layer) # Input 1 th_in_var_name = X.name + '_th_in_1' var_attrs = { # 'init_value': np.array(1.), 'dtype': 'float32' } var_layer = defaultXLayer() var_layer = var_layer._replace(type=['Variable'], name=th_in_var_name, shapes=[1], attrs=var_attrs, bottoms=[], tops=[], data=[np.array(1.)], subgraph=X.subgraph) new_Xs.append(var_layer) quant_in_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', 'axis': 0, 'mse_opt_num': mse_opt_num } quant_in_layer = defaultXLayer() quant_in_layer = quant_in_layer._replace( type=['MSEQuantize'], name=X.name + '_quantize_in_1', shapes=bottom_Xs[1].shapes[:], attrs=quant_in_attrs, bottoms=[new_xlayer_names[X.bottoms[1]], th_in_var_name], tops=[], subgraph=X.subgraph) new_Xs.append(quant_in_layer) # Threshold in layer th_in_var_name = X.name + '_th_in' th_in_var_attrs = { # 'init_value': np.array(1.), 'dtype': 'float32' } th_in_var_layer = defaultXLayer() th_in_var_layer = th_in_var_layer._replace(type=['Variable'], name=th_in_var_name, shapes=[1], attrs=th_in_var_attrs, bottoms=[], data=[np.array(1.)], tops=[]) new_Xs.append(th_in_var_layer) quant_eltwise_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', # TODO?? 'axis': 1, # TODO NCHW 'mse_opt_num': mse_opt_num } X = X._replace( type=['MSEQuantizeEltwise'], bottoms=[X.name + "_quantize_in_0", X.name + "_quantize_in_0"] + [X.name + "_quantize_in_1", X.name + "_quantize_in_1"] + [th_in_var_name], # bottoms = [X.bottoms[0], X.name + "_quantize_in_0"] \ # + [X.bottoms[1], X.name + "_quantize_in_1"] \ # + [th_in_var_name], attrs=quant_eltwise_attrs) new_Xs.append(X) # Threshold layer th_out_var_name = X.name + '_th_out' var_attrs = { # 'init_value': np.array(1.), 'dtype': 'float32' } var_layer = defaultXLayer() var_layer = var_layer._replace(type=['Variable'], name=th_out_var_name, shapes=[1], attrs=var_attrs, bottoms=[], data=[np.array(1.)], tops=[]) new_Xs.append(var_layer) # Quantize layer quant_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', # TODO?? 'axis': 1, # TODO NCHW 'mse_opt_num': mse_opt_num } quant_X = defaultXLayer() quant_X = quant_X._replace( type=['MSEQuantize'], name=X.name + "_quantize", shapes=X.shapes, attrs=quant_attrs, bottoms=[X.name, th_out_var_name], # eltwise_scale_name, beta_var_name tops=[]) new_Xs.append(quant_X) return new_Xs
def get_pooling_quantize_layers(bottom_Xs, X, top_Xs, mse_opt_num, bitwidth, new_xlayer_names): # type: (Dict[str, XLayer], XLayer, Dict[str, XLayer]) # -> List[XLayer] """ TODO: Make more modular """ new_Xs = [] assert (len(bottom_Xs) == 1) # Input th_in_var_name = X.name + '_th_in' var_attrs = { # 'init_value': np.array(1.), 'dtype': 'float32' } var_layer = defaultXLayer() var_layer = var_layer._replace(type=['Variable'], name=th_in_var_name, shapes=[1], attrs=var_attrs, bottoms=[], tops=[], data=[np.array(1.)], subgraph=X.subgraph) new_Xs.append(var_layer) quant_in_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', 'axis': 1 if X.attrs['data_layout'] == 'NCHW' else 3, 'mse_opt_num': mse_opt_num } quant_in_layer = defaultXLayer() quant_in_layer = quant_in_layer._replace( type=['MSEQuantize'], name=X.name + '_quantize_in', shapes=bottom_Xs[0].shapes[:], attrs=quant_in_attrs, bottoms=[new_xlayer_names[X.bottoms[0]], th_in_var_name], tops=[], subgraph=X.subgraph) new_Xs.append(quant_in_layer) # Pooling layer X = X._replace(bottoms=[X.name + '_quantize_in']) new_Xs.append(X) # Threshold layer th_out_var_name = X.name + '_th_out' var_attrs = { # 'init_value': np.array(1.), 'dtype': 'float32' } var_layer = defaultXLayer() var_layer = var_layer._replace(type=['Variable'], name=th_out_var_name, shapes=[1], attrs=var_attrs, bottoms=[], data=[np.array(1.)], tops=[]) # variable_layers.add(th_in_var_name) new_Xs.append(var_layer) # Quantize layer # Mock quantization for max pooling layers because for max pooling # input and output threshold should be equal quant_type = 'MSEMockQuantize' if X.attrs['pool_type'] == 'Max' \ else 'MSEQuantize' quant_attrs = { 'quant_bitwidth': bitwidth, 'dtype': 'float32', # TODO?? 'axis': 1 if X.attrs['data_layout'] == 'NCHW' else 3, 'mse_opt_num': mse_opt_num } quant_X = defaultXLayer() quant_X = quant_X._replace(type=[quant_type], name=X.name + "_quantize", shapes=X.shapes, attrs=quant_attrs, bottoms=[X.name, th_out_var_name], tops=[]) new_Xs.append(quant_X) return new_Xs
def merge_transposes(xgraph, bottom_Xs, X, top_Xs, **kwargs): # type: (xgraph, List[XLayer], XLayer, List[XLayer]) -> XLayer """ Try to merge transpose layers in the XGraph. This is used for supporting networks with operations in multiple layouts TRANSFORMATIONS Transform X --> T in T --> X if X is a valid layer and `--> T the transposes are the same Transform X --> T in T --> X if X is a valid layer """ # Used for getting information on operations xop_registry = XOpRegistry() # tX_all_transposes = all([tX.type[0] == 'Transpose' for tX in top_Xs]) tX_all_eq_axes = \ all([tX.attrs['axes'] == top_Xs[0].attrs['axes'] for tX in top_Xs]) changes = False logger.debug("-- Merge transposes: {}, {}, {}".format( [bX.name for bX in bottom_Xs], X.name, [tX.name for tX in top_Xs])) # assert len(top_Xs) <= 1 or len(bottom_Xs) <= 1,\ # " top_Xs: {}, bottom_Xs: {}".format(top_Xs, bottom_Xs) if len(top_Xs) > 0 and tX_all_eq_axes and X.type[0] in ['Transpose']: # Check if we have two transposes that cancel eachother out tX = top_Xs[0] axes = X.attrs['axes'] tX_axes = tX.attrs['axes'] if [axes[i] for i in tX_axes] == [0, 1, 2, 3]: logger.debug( "-- -- Merge transposes: bX: {}, X: {}, tX: {}".format( [bX.name for bX in bottom_Xs], X.name, [tX.name for tX in top_Xs])) changes = True xgraph.remove(X.name) for tX in top_Xs: xgraph.remove(tX.name) elif len(top_Xs) > 0 and tX_all_eq_axes \ and X.type[0] in xop_registry.get_xops_with_transpose_transform(): # ['ReLU', 'BiasAdd', 'Concat', 'Eltwise', # 'BatchNorm', 'Scale', 'Pad']: changes = True logger.debug("-- -- Move transpose: bX: {}, X: {}, tX: {}".format( [bX.name for bX in bottom_Xs], X.name, [tX.name for tX in top_Xs])) tX = top_Xs[0] tX_name = tX.name ttXs = [xgraph.get(tt_name) for tX in top_Xs for tt_name in tX.tops] axes = tX.attrs['axes'][:] top_names = [tX.name for tX in top_Xs] for tX in top_Xs: xgraph.remove(tX.name) for i, bX in enumerate(bottom_Xs): if len(bottom_Xs) > 1: t_name = "{}_split_{}".format(i, "_".join(top_names)) elif len(top_Xs) > 1: t_name = "merge_{}".format("_".join(top_names)) else: t_name = tX_name t_shape = [bX.shapes[i] for i in axes] logger.debug("-- -- t_shape: {}".format(t_shape)) attrs = {'axes': axes} T = xlayer.defaultXLayer() T = T._replace(name=t_name, type=['Transpose'], shapes=t_shape, sizes=bX.sizes[:], layer=[t_name], tops=[X.name], bottoms=[bX.name], internal=1, attrs=attrs) logger.debug("-- -- insert: {}".format(T)) xgraph.insert(T) # TODO: test this functionality more thoroughly, lots of edge cases if len(ttXs) > 0 and\ all([ttX.subgraph == ttXs[0].subgraph for ttX in ttXs]) and\ ttXs[0].subgraph is not None: logger.debug("-- -- update subgraph of {} from {} to: {}".format( X.name, X.subgraph, ttXs[0].subgraph)) X.subgraph = ttXs[0].subgraph # xgraph.update(X.name) # xgraph.update(X._replace( # subgraph=ttXs[0].subgraph # )) # TRANSFORM X old_shape = X.shapes[:] transpose_transform_func = xop_registry.get_xop_transpose_transform( X.type[0]) transpose_transform_func(X, axes) logger.debug("-- -- X old shapes: {}, axes: {}, new shapes: {}".format( old_shape, axes, X.shapes)) return changes
def sweep_transposes_flow(xgraph, bottom_Xs, X, top_Xs, target=None, **kwargs): # type: (xgraph, List[XLayer], XLayer, List[XLayer], str, dict) -> XLayer """ Sweep transpose layers in the XGraph following the flow of the directed graph. If target is specified, only sweep transposes from inside to outside target subgraphs. This is functionality is used for supporting layout transformation for models with subgraphs TRANSFORMATIONS Transform T --> X in X --> T if X is a valid layer and T ----^ the transposes are the same Transform T --> X in X --> T if X is a valid layer """ # Used for getting information on operations xop_registry = XOpRegistry() bX_all_transposes = all([bX.type[0] == 'Transpose' for bX in bottom_Xs]) bX_all_eq_axes = all( [bX.attrs['axes'] == bottom_Xs[0].attrs['axes'] for bX in bottom_Xs]) changes = False if len(bottom_Xs) > 0 and bX_all_transposes and\ bX_all_eq_axes and X.type[0] in ['Transpose']: # Check if we have two transposes that cancel eachother out bX = bottom_Xs[0] axes = X.attrs['axes'] bX_axes = bX.attrs['axes'] if [axes[i] for i in bX_axes] == [0, 1, 2, 3]: logger.debug( "-- -- Merge transposes: bX: {}, X: {}, tX: {}".format( [bX.name for bX in bottom_Xs], X.name, [tX.name for tX in top_Xs])) changes = True xgraph.remove(X.name) [xgraph.remove(bX.name) for bX in bottom_Xs] elif bX_all_transposes and bX_all_eq_axes and\ X.type[0] in xop_registry.get_xops_with_transpose_transform(): logger.debug("-- -- Sweep transpose: bX: {}, X: {}, tX: {}".format( [bX.name for bX in bottom_Xs], X.name, [tX.name for tX in top_Xs])) axes = bottom_Xs[0].attrs['axes'][:] # Transposes can have only one input assert all([len(bX.bottoms) == 1 for bX in bottom_Xs]) bbXs = [xgraph.get(bX.bottoms[0]) for bX in bottom_Xs] # Sweep transposes outside the subgraph if target is None or\ (X.target == target and all([(bbX.target == target and bbX.subgraph == X.subgraph) for bbX in bbXs])): changes = True bottom_names = [bX.name for bX in bottom_Xs] for bX, bbX in zip(bottom_Xs, bbXs): new_tops = [b for b in bX.tops if b != X.name] if new_tops == []: xgraph.remove(bX.name) # Important to not touch bX after removal continue else: bX.tops = new_tops bbX.tops.append(X.name) X.bottoms = [ b if b != bX.name else bbX.name for b in X.bottoms ] if len(top_Xs) > 0: # Insert transposes for i, tX in enumerate(top_Xs): if len(top_Xs) > 1: t_name = "{}_split_{}".format(i, "_".join(bottom_names)) # bX.name for bX in bottom_Xs elif len(bottom_Xs) > 1: t_name = "merge_{}".format("_".join(bottom_names)) # [bX.name for bX in bottom_Xs] else: t_name = "moved_" + bX.name t_shape = X.shapes[:] logger.debug("-- -- t_shape: {}".format(t_shape)) attrs = {'axes': axes} T = xlayer.defaultXLayer() T = T._replace(name=t_name, type=['Transpose'], shapes=t_shape, sizes=X.sizes[:], layer=[t_name], tops=[tX.name], bottoms=[X.name], internal=1, attrs=attrs) # logger.debug("-- -- insert: {}".format(T)) xgraph.insert(T) else: # No top layers: Insert 1 transpose if len(bottom_Xs) > 1: t_name = "merge_{}".format("_".join(bottom_names)) else: t_name = "moved_" + bX.name t_shape = X.shapes[:] logger.debug("-- -- t_shape: {}".format(t_shape)) attrs = {'axes': axes} T = xlayer.defaultXLayer() T = T._replace(name=t_name, type=['Transpose'], shapes=t_shape, sizes=X.sizes[:], layer=[t_name], tops=[], bottoms=[X.name], internal=1, attrs=attrs) xgraph.add(T) # TRANSFORM X axes_t = [axes[i] for i in axes] old_shape = X.shapes[:] transpose_transform_func = \ xop_registry.get_xop_transpose_transform(X.type[0]) transpose_transform_func(X, axes_t) logger.debug( "-- -- X old shapes: {}, axes: {}, new shapes: {}".format( old_shape, axes_t, X.shapes)) return changes
def get_concat_quantization_layers(bottom_Ps, P, top_Ps, quant_params): # type: (Dict[str, XLayer], XLayer, Dict[str, XLayer], Dict[str,dict]) # -> List[XLayer] new_Ps = [] if P.name in quant_params: # Quantize bottoms for idx, bottom in enumerate(P.bottoms): attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'quant_threshold': [quant_params[P.name]['th_layer_in']] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_threshold': [quant_params[P.name]['th_layer_in']], 'dtype': 'int8', 'input_types': ['float32'], 'axis': 1 # TODO: NCHW } quant_bottom = defaultXLayer() quant_bottom = quant_bottom._replace(type=['Quantize'], name=bottom + "_quantize", shapes=bottom_Ps[idx].shapes, attrs=attrs, bottoms=[bottom], tops=[], targets=[]) new_Ps.append(quant_bottom) P = P._replace(bottoms=[bottom + "_quantize" for bottom in P.bottoms], tops=[]) else: P = P._replace(tops=[]) new_Ps.append(P) if P.name in quant_params: # NO QuantizeInter layer # TODO How to handle quantization for concat layers after concat # layers? # See DenseNet kind of architectures. # UNQUANTIZE LAYER attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'quant_threshold': [quant_params[P.name]['th_layer_out']] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_threshold': [quant_params[P.name]['th_layer_out']], 'dtype': 'float32', 'input_types': ['int8'], 'axis': 1 # TODO: NCHW } unquant_P = defaultXLayer() unquant_P = unquant_P._replace(type=['UnQuantize'], name=P.name + "_unquantize", shapes=P.shapes, attrs=attrs, bottoms=[P.name], tops=[], targets=[]) new_Ps.append(unquant_P) return new_Ps
def get_eltwise_quantization_layers(bottom_Ps, P, top_Ps, quant_params): # type: (Dict[str, XLayer], XLayer, # Dict[str, XLayer], Dict[str,dict]) # -> List[XLayer] """ TODO: Make more modular """ new_Ps = [] assert (len(bottom_Ps) == 2) if P.name in quant_params: # Quantize bottoms for idx, bottom in enumerate(P.bottoms): attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'quant_threshold': [quant_params[P.name]['th_layer_in']] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_threshold': [quant_params[P.name]['th_layer_in']], 'dtype': 'int8', 'input_types': ['float32'], 'axis': 1 # NCHW # TODO: should elemwise be broadcastable?? } quant_bottom = defaultXLayer() quant_bottom = quant_bottom._replace(type=['Quantize'], name=bottom + "_quantize", shapes=bottom_Ps[idx].shapes, attrs=attrs, bottoms=[bottom]) new_Ps.append(quant_bottom) P = P._replace(bottoms=[bottom + "_quantize" for bottom in P.bottoms], tops=[]) else: P = P._replace(tops=[]) new_Ps.append(P) if P.name in quant_params: # Quantize inter layers attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'scale': quant_params[P.name]['scale'], # 'postscale_shift': quant_params[P.name]['postscale_shift'], # 'prescale_shift': quant_params[P.name]['prescale_shift'] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_scale': quant_params[P.name]['scale'], 'quant_postscale_shift': quant_params[P.name]['postscale_shift'], 'quant_prescale_shift': quant_params[P.name]['prescale_shift'], 'dtype': 'int8', # TODO?? 'input_types': ['int32'], 'axis': 1 # TODO NCHW } quant_inter_P = defaultXLayer() quant_inter_P = quant_inter_P._replace(type=['QuantizeInter'], name=P.name + "_quantize_inter", shapes=P.shapes, attrs=attrs, bottoms=[P.name]) new_Ps.append(quant_inter_P) # UNQUANTIZE LAYER attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'quant_threshold': [quant_params[P.name]['th_layer_out']] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_threshold': [quant_params[P.name]['th_layer_out']], 'dtype': 'float32', 'input_types': ['int8'], 'axis': 1 # TODO: NCHW } unquant_P = defaultXLayer() unquant_P = unquant_P._replace(type=['UnQuantize'], name=P.name + "_unquantize", shapes=P.shapes, attrs=attrs, bottoms=[P.name + "_quantize_inter"]) new_Ps.append(unquant_P) return new_Ps
def get_pooling_quantization_layers(bottom_Ps, P, top_Ps, quant_params): # type: (List[XLayer], XLayer, List[XLayer], QuantParams) # -> List[XLayer] """ TODO: Make more modular """ new_Ps = [] # TODO: Can we do better? # Maxpool layers quant params are stored in the quantization file under # another name otherwise it messes up maxpool computation on FPGA quant_name = P.name if P.name in quant_params else P.name + "_QUANT_UTIL" if quant_name in quant_params: # Quantize bottoms attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[quant_name]['bw_layer_in'], # 'quant_threshold': [quant_params[quant_name]['th_layer_in']] # }, 'quant_bitwidth': quant_params[quant_name]['bw_layer_in'], 'quant_threshold': [quant_params[quant_name]['th_layer_in']], 'dtype': 'int8', 'input_types': ['float32'], 'axis': 1 # TODO: NCHW } assert (len(P.bottoms) == 1) quant_bottom = defaultXLayer() quant_bottom = quant_bottom._replace(type=['Quantize'], name=P.bottoms[0] + "_quantize", shapes=bottom_Ps[0].shapes, attrs=attrs, bottoms=[P.bottoms[0]], tops=[P.name]) new_Ps.append(quant_bottom) # NOTE: quantization parameters include the division part of the # the average pooling operation. Therefore we want to use a AvgPool # layer without the division part # (instead just the sum of the elements). P = P._replace(type=['PoolingNoDivision'], bottoms=[bottom + "_quantize" for bottom in P.bottoms], tops=[]) else: P = P._replace(tops=[]) new_Ps.append(P) if quant_name in quant_params: # Quantize inter layers attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[quant_name]['bw_layer_in'], # 'scale': quant_params[quant_name]['scale'], # 'postscale_shift': # quant_params[quant_name]['postscale_shift'], # 'prescale_shift': quant_params[quant_name]['prescale_shift'] # }, 'quant_bitwidth': quant_params[quant_name]['bw_layer_in'], 'quant_scale': quant_params[quant_name]['scale'], 'quant_postscale_shift': quant_params[quant_name]['postscale_shift'], 'quant_prescale_shift': quant_params[quant_name]['prescale_shift'], 'dtype': 'int8', # TODO?? 'input_types': ['int8'], 'axis': 1 # TODO NCHW } quant_inter_P = defaultXLayer() quant_inter_P = quant_inter_P._replace(type=['QuantizeInter'], name=P.name + "_quantize_inter", shapes=P.shapes, attrs=attrs, bottoms=[P.name]) new_Ps.append(quant_inter_P) # UNQUANTIZE LAYER attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[quant_name]['bw_layer_in'], # 'quant_threshold': [quant_params[quant_name]['th_layer_out']] # }, 'quant_bitwidth': quant_params[quant_name]['bw_layer_in'], 'quant_threshold': [quant_params[quant_name]['th_layer_out']], 'dtype': 'float32', 'input_types': ['int8'], 'axis': 1 # TODO: NCHW } unquant_P = defaultXLayer() unquant_P = unquant_P._replace(type=['UnQuantize'], name=P.name + "_unquantize", shapes=P.shapes, attrs=attrs, bottoms=[P.name + "_quantize_inter"]) new_Ps.append(unquant_P) return new_Ps
def get_scale_quantization_layers(bottom_Ps, P, top_Ps, quant_params): # type: (Dict[str, XLayer], XLayer, Dict[str, XLayer], Dict[str,dict]) # -> List[XLayer] """ TODO: Make more modular """ new_Ps = [] G, B = P.data.gamma, P.data.beta gamma_name, beta_name = P.name + "_gamma", P.name + "_beta" # Scaling is executed as an elementwise layer in combination with # quantization scaling # ! Ignore gamma scaling values (they are already incorporated in # quantization parameters) new_Ps = [] # BETA b_in_attrs = {'dtype': 'float32', 'layout': 'None'} b_in_P = defaultXLayer() b_in_P = b_in_P._replace(type=['Constant'], name=beta_name, shapes=list(B.shape), bottoms=[], tops=[], attrs=b_in_attrs, targets=[], layer=[], data=[B]) new_Ps.append(b_in_P) if P.name in quant_params: b_quant_attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # # 'quant_threshold': [quant_params[P.name]['th_layer_in']], # # 'th_params': quant_params[P.name]['th_params'] # 'th_out': [quant_params[P.name]['th_layer_out']], # 'scale': [quant_params[P.name]['scale']], # 'postscale_shift': [quant_params[P.name]['postscale_shift']] # # [th_param * 127 for th_param in quant_params[P.name] # # ['th_params']] # # TODO Add wuant beta layer to avoid multiplication by 127 # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_th_out': [quant_params[P.name]['th_layer_out']], 'quant_scale': quant_params[P.name]['scale'], 'quant_postscale_shift': quant_params[P.name]['postscale_shift'], 'dtype': 'int32', 'input_types': ['float32'], 'axis': 0 # TODO: C } b_quant_P = defaultXLayer() b_quant_P = b_quant_P._replace(type=['QuantizeScaleBias'], name=beta_name + "_quantize", shapes=list(B.shape), attrs=b_quant_attrs, bottoms=[beta_name], tops=[], targets=[]) new_Ps.append(b_quant_P) if P.name in quant_params: # INPUT # Quantize bottoms attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'quant_threshold': [quant_params[P.name]['th_layer_in']] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_threshold': [quant_params[P.name]['th_layer_in']], 'dtype': 'int8', 'input_types': ['float32'], 'axis': 0 # TODO: NCHW } assert (len(P.bottoms) == 1) quant_bottom = defaultXLayer() quant_bottom = quant_bottom._replace(type=['Quantize'], name=P.bottoms[0] + "_quantize", shapes=bottom_Ps[0].shapes, attrs=attrs, bottoms=[P.bottoms[0]], tops=[], targets=[]) new_Ps.append(quant_bottom) P = P._replace(bottoms=[bottom + "_quantize" for bottom in P.bottoms] + [beta_name + '_quantize'], tops=[]) else: P = P._replace(bottoms=P.bottoms + [beta_name], tops=[]) # Move relu from eltwise layer to QuantizeInter layer if applicable # because of negative scaling case is_relu = 'activation' in P.attrs and P.attrs['activation'] == 'ReLU' # P = P._replace( # type = ['Eltwise'], # data = P.data.beta, # targets = [], # layer = [] # ) P.attrs['dtype'] = 'int32' P = P._replace(type=['BiasAdd'], data=[P.data.beta], targets=[], layer=[]) new_Ps.append(P) if P.name in quant_params: # Quantize inter layers attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'scale': quant_params[P.name]['scale'], # 'postscale_shift': quant_params[P.name]['postscale_shift'], # 'prescale_shift': quant_params[P.name]['prescale_shift'] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_scale': quant_params[P.name]['scale'], 'quant_postscale_shift': quant_params[P.name]['postscale_shift'], 'quant_prescale_shift': quant_params[P.name]['prescale_shift'], 'dtype': 'int8', # TODO?? 'input_types': ['int32'], 'axis': 1 # TODO NCHW } if is_relu: attrs['activation'] = 'ReLU' quant_inter_P = defaultXLayer() quant_inter_P = quant_inter_P._replace(type=['QuantizeInter'], name=P.name + "_quantize_inter", shapes=P.shapes, attrs=attrs, bottoms=[P.name], tops=[]) new_Ps.append(quant_inter_P) # UNQUANTIZE LAYER attrs = { # 'quant_params': { # 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], # 'quant_threshold': [quant_params[P.name]['th_layer_out']] # }, 'quant_bitwidth': quant_params[P.name]['bw_layer_in'], 'quant_threshold': [quant_params[P.name]['th_layer_out']], 'dtype': 'float32', 'input_types': ['int8'], 'axis': 0 # TODO: NCHW } unquant_P = defaultXLayer() unquant_P = unquant_P._replace(type=['UnQuantize'], name=P.name + "_unquantize", shapes=P.shapes[:], attrs=attrs, bottoms=[P.name + "_quantize_inter"]) new_Ps.append(unquant_P) # new_Ps.extend(get_eltwise_quantization_layers(bottom_Ps, P, top_Ps, # quant_params)) return new_Ps