def add_copy(self, src_name, src_batch_stride, src_stride, src_offset, dst_name, dst_batch_stride, dst_stride, dst_offset, length, output_name, src_index_name=None, dst_index_name=None): src_index = None dst_index = None if (src_index_name is None): layer = caffe_net.LayerParameter(name=output_name, type='Copy', bottom=[src_name, dst_name], top=[output_name]) else: layer = caffe_net.LayerParameter(name=output_name, type='Copy', bottom=[src_name, dst_name, src_index_name, dst_index_name], top=[output_name]) src_index = self.data_dict[src_index_name] dst_index = self.data_dict[dst_index_name] layer.copy_param(src_batch_stride, src_stride, src_offset, dst_batch_stride, dst_stride, dst_offset, length) self.caffe_model.add_layer(layer) self.data_dict[dst_name] = Operators.copy(self.data_dict[src_name], src_batch_stride, src_stride, src_offset, self.data_dict[dst_name], dst_batch_stride, dst_stride, dst_offset, length, dst_name, src_index, dst_index) return dst_name
def add_reshape(self, input_name, output_name, shape): layer = caffe_net.LayerParameter(name=output_name, type='Reshape', bottom=[input_name], top=[output_name]) layer.reshape_param(shape) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.reshape(self.data_dict[input_name], shape, output_name) return output_name
def add_transpose(self, input_name, output_name, dim): layer = caffe_net.LayerParameter(name=output_name, type='Permute', bottom=[input_name], top=[output_name]) layer.permute_param(dim) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.transpose(self.data_dict[input_name], dim, output_name) return output_name
def add_tile(self, input_name, loops, axis, output_name): layer = caffe_net.LayerParameter(name=output_name, type='Tile', bottom=[input_name], top=[output_name]) layer.tile_param(axis, loops) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.tile(self.data_dict[input_name], loops, axis, output_name) return output_name
def generate(self, input = None): #batch_seq_length = 16 input_text_name = "input_text" input_text_shape = [self.batch, 16] self.add_input(input_text_name, input_text_shape) self.set_input(input) #embedding x = self.extract_embedding(input_text_name, 0, "emb_table", "embedding_lookup") #bilstm x = self.extract_lstm(x, None, "BiLSTM", 0, steps = -2, scope_name = ["BiLSTM/fw/lstm_cell", "BiLSTM/bw/lstm_cell"]) #FC weight = self.get_weight("W") bias = self.get_weight("b") layer = caffe_net.LayerParameter("wb_fc_output", type='InnerProduct', bottom=[x], top=["wb_fc_output"]) num_output = len(weight[0]) weight = weight.transpose((1,0)) layer.inner_product_param(num_output, bias_term=bias is not None) if len(bias) != num_output: print("[ERROR] extract_dense failed") exit(0) layer.add_data(weight, bias) self.caffe_model.add_layer(layer) self.data_dict["wb_fc_output"] = Operators.fully_connect(self.data_dict[x], weight.transpose((1, 0)), bias, "wb_fc_output") x = "wb_fc_output" #softmax x = self.add_softmax(x, "softmax_output", -1) #argmax x = self.add_argmax(x, -1, "output") self.save_caffe_model()
def add_relu6(self, input_name, output_name): layer = caffe_net.LayerParameter(name=output_name, type='ReLU6', bottom=[input_name], top=[output_name]) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.relu(self.data_dict[input_name], output_name, max_value=6) return output_name
def add_sigmoid(self, input_name, output_name): layer = caffe_net.LayerParameter(name=output_name, type='Sigmoid', bottom=[input_name], top=[output_name]) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.sigmoid(self.data_dict[input_name], output_name) return output_name
def extract_batch_norm( self, input_name, output_name, scope_id, data_format="NCHW", axis=1, eps=1e-3, layer_names=["bn", "moving_mean", "moving_variance"]): assert (data_format == "NCHW") mean, var = self.get_weights(scope_id, layer_names) layer = caffe_net.LayerParameter(name=output_name, type='BatchNorm', bottom=[input_name], top=[output_name]) layer.batch_norm_param(axis=axis, eps=eps) layer.add_data(mean, var) self.caffe_model.add_layer(layer) if (self.data_dict[input_name] is not None): input_data, input_shape, inv_transpose_dims = self.preprocess_nchwc8_nchw_input( input_name, axis) output_data = Operators.batch_norm(input_data, mean, var, eps, output_name) self.data_dict[output_name] = self.postprocess_nchwc8_nchw_output( output_data, input_shape, inv_transpose_dims) else: self.data_dict[output_name] = None gamma = self.get_weights(scope_id, [layer_names[0], "gamma"]) if (gamma is not None): scale_name = self.extract_scale(output_name, output_name + "_s", scope_id, data_format, axis, [layer_names[0], "gamma", "beta"]) self.data_dict[output_name] = self.data_dict[scale_name] output_name = scale_name return output_name
def extract_dense(self, input_name, output_name, scope_id, scope_name="dense"): if (isinstance(scope_name, str)): layer_names = [scope_name, "kernel", "bias"] elif (isinstance(scope_name, list)): layer_names = scope_name else: print("[ERROR] unsupported dense scope_name") exit(1) kernel, bias = self.get_weights(scope_id, layer_names) layer = caffe_net.LayerParameter(name=output_name, type='InnerProduct', bottom=[input_name], top=[output_name]) num_output = len(kernel[0]) kernel = kernel.transpose((1, 0)) layer.inner_product_param(num_output, bias_term=bias is not None) if bias is not None: if len(bias) != num_output: print("[ERROR] extract_dense failed") exit(0) layer.add_data(kernel, bias) else: layer.add_data(kernel) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.fully_connect( self.data_dict[input_name], kernel.transpose((1, 0)), bias, output_name) return output_name
def add_l2norm(self, input_name, output_name): layer = caffe_net.LayerParameter(name=output_name, type='L2Norm', bottom=[input_name], top=[output_name]) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.l2_norm(self.data_dict[input_name], output_name) return output_name
def add_swish(self, input_name, output_name, beta=1.0): layer = caffe_net.LayerParameter(name=output_name, type='Swish', bottom=[input_name], top=[output_name]) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.swish(self.data_dict[input_name], beta, output_name) return output_name
def add_expand_dims(self, input_name, axis, output_name): layer = caffe_net.LayerParameter(name=output_name, type='Unsqueeze', bottom=[input_name], top=[output_name]) layer.unsqueeze_param(axis) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.expand_dims(self.data_dict[input_name], axis, output_name) return output_name
def add_input(self, output_name, input_shape): layer = caffe_net.LayerParameter(name=output_name, type='Input', top=[output_name]) layer.input_param(input_shape) self.caffe_model.add_layer(layer) return output_name
def extract_lstm(self, input_name, state_name, output_name, scope_id, steps=-1, scope_name="basic_lstm_cell"): self.scopes[scope_id] = scope_name self.scopes[scope_id + 1] = "kernel" kernel_name = self.generate_name(self.scopes, scope_id + 2) kernel = self.get_tensor(kernel_name) self.scopes[scope_id + 1] = "bias" bias_name = self.generate_name(self.scopes, scope_id + 2) bias = self.get_tensor(bias_name) layer = caffe_net.LayerParameter(name=output_name, type='LSTM', bottom=[input_name, state_name], top=[output_name]) num_output_4 = len(kernel[0]) if (bias is not None): if (len(bias) != num_output_4): print("[ERROR] extract_lstm failed") exit(0) num_output = num_output_4 // 4 layer.lstm_param(num_output, steps) layer.add_data(kernel.transpose([1, 0]), bias) self.caffe_model.add_layer(layer) self.data_dict[output_name], self.data_dict[ state_name] = Operators.lstm(self.data_dict[input_name], self.data_dict[state_name], kernel, bias, output_name, state_name) return output_name
def extract_group_norm(self, input_name, groups, output_name, scope_id, data_format="NCHW", axis=1, layer_names=["GroupNorm", "gamma", "beta"]): assert (data_format == "NCHW") gamma, beta = self.get_weights(scope_id, layer_names) layer = caffe_net.LayerParameter(name=output_name, type='GroupNorm', bottom=[input_name], top=[output_name]) layer.add_data(gamma, beta) layer.group_norm_param(groups) self.caffe_model.add_layer(layer) if (self.data_dict[input_name] is not None): input_data, input_shape, inv_transpose_dims = self.preprocess_nchwc8_nchw_input( input_name, axis) output_data = Operators.group_norm(input_data, groups, gamma, beta, output_name) self.data_dict[output_name] = self.postprocess_nchwc8_nchw_output( output_data, input_shape, inv_transpose_dims) else: self.data_dict[output_name] = None return output_name
def add_relative_shift(self, input_name, output_name, axis, shift_length): layer = caffe_net.LayerParameter(name=output_name, type='RelativeShift', bottom=[input_name], top=[output_name]) layer.relative_shift_param(axis, shift_length) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.relative_shift(self.data_dict[input_name], axis, shift_length, output_name) return output_name
def extract_dense(self, input_name, dense_name, scope_id, scope_name="dense"): self.scopes[scope_id] = scope_name self.scopes[scope_id + 1] = "kernel" kernel_name = self.generate_name(self.scopes, scope_id + 2) kernel = self.get_tensor(kernel_name) self.scopes[scope_id + 1] = "bias" bias_name = self.generate_name(self.scopes, scope_id + 2) bias = self.get_tensor(bias_name) layer = caffe_net.LayerParameter(name=dense_name, type='InnerProduct', bottom=[input_name], top=[dense_name]) num_output = len(kernel[0]) kernel = kernel.transpose((1, 0)) layer.inner_product_param(num_output, bias_term=bias is not None) if bias is not None: if len(bias) != num_output: print("[ERROR] extract_dense failed") exit(0) layer.add_data(kernel, bias) else: layer.add_data(kernel) self.caffe_model.add_layer(layer) self.data_dict[dense_name] = Operators.fully_connect( self.data_dict[input_name], kernel.transpose((1, 0)), bias, dense_name) return dense_name
def add_memory(self, memory_name, memory_shapes, data_type): layer = caffe_net.LayerParameter(name=memory_name+"_mem", type='PreAllocatedMemory', top=[memory_name]) layer.memory_param(memory_shapes, data_type) self.caffe_model.add_layer(layer) self.data_dict[memory_name] = Operators.zeros(memory_shapes, memory_name) return memory_name
def add_pad(self, input_name, output_name, padding_shapes, padding_values=None): layer = caffe_net.LayerParameter(name=output_name, type='Pad', bottom=[input_name], top=[output_name]) layer.padding_param(padding_shapes, padding_values) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.pad(self.data_dict[input_name], padding_shapes, padding_values, output_name) return output_name
def add_power(self, input_name, output_name, scale=1, shift=0, power=1): layer = caffe_net.LayerParameter(name=output_name, type='Power', bottom=[input_name], top=[output_name]) layer.power_param(scale, shift, power) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.power(self.data_dict[input_name], scale, shift, power, output_name) return output_name
def add_argmax(self, input_name, axis, output_name): layer = caffe_net.LayerParameter(name=output_name, type='ArgMax', bottom=[input_name], top=[output_name]) layer.argmax_param(axis) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.argmax(self.data_dict[input_name], axis, output_name) return output_name
def extract_scale(self, input_name, output_name, scope_id, data_format="NCHW", axis=1, layer_names=["scale", "gamma", "beta"]): assert (data_format == "NCHW") gamma, beta = self.get_weights(scope_id, layer_names) layer = caffe_net.LayerParameter(name=output_name, type='Scale', bottom=[input_name], top=[output_name]) if beta is not None: layer.scale_param(axis=axis, bias_term=True) layer.add_data(gamma, beta) else: layer.scale_param(axis=axis, bias_term=False) layer.add_data(gamma) self.caffe_model.add_layer(layer) if (self.data_dict[input_name] is not None): input_data, input_shape, inv_transpose_dims = self.preprocess_nchwc8_nchw_input( input_name, axis) output_data = Operators.scale(input_data, gamma, beta, output_name) self.data_dict[output_name] = self.postprocess_nchwc8_nchw_output( output_data, input_shape, inv_transpose_dims) else: self.data_dict[output_name] = None return output_name
def extract_layer_norm(self, input_name, layer_norm_name, scope_id, layer_names=None): if (layer_names is None): self.scopes[scope_id] = "LayerNorm" self.scopes[scope_id + 1] = "gamma" gamma_name = self.generate_name(self.scopes, scope_id + 2) self.scopes[scope_id + 1] = "beta" beta_name = self.generate_name(self.scopes, scope_id + 2) else: self.scopes[scope_id] = layer_names[0] self.scopes[scope_id + 1] = layer_names[1] gamma_name = self.generate_name(self.scopes, scope_id + 2) self.scopes[scope_id + 1] = layer_names[2] beta_name = self.generate_name(self.scopes, scope_id + 2) gamma = self.get_tensor(gamma_name) beta = self.get_tensor(beta_name) layer = caffe_net.LayerParameter(name=layer_norm_name, type='LayerNorm', bottom=[input_name], top=[layer_norm_name]) layer.add_data(gamma, beta) self.caffe_model.add_layer(layer) self.data_dict[layer_norm_name] = Operators.layer_norm( self.data_dict[input_name], gamma, beta, layer_norm_name) return layer_norm_name
def add_reduce_mean(self, input_name, axis, keep_dim, output_name): operation = 4 # MEAN layer = caffe_net.LayerParameter(name=output_name, type='Reduction', bottom=[input_name], top=[output_name]) layer.reduction_param(operation, axis, keep_dim) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.reduction(self.data_dict[input_name], None, operation, axis, output_name) return output_name
def add_clip(self, input_name, output_name, min_value, max_value): layer = caffe_net.LayerParameter(name=output_name, type='Clip', bottom=[input_name], top=[output_name]) layer.clip_param(min_value, max_value) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.clip(self.data_dict[input_name], min_value, max_value, output_name) return output_name
def add_input(self, input_name, input_shape): layer = caffe_net.LayerParameter(name=input_name, type='Input', top=[input_name]) layer.input_param(input_shape) self.caffe_model.add_layer(layer) if (input_name not in self.data_dict.keys()): self.data_dict[input_name] = None return input_name
def add_matmul(self, input_a_name, input_b_name, output_name, transpose_a=False, transpose_b=False): layer = caffe_net.LayerParameter(name=output_name, type='MatMul', bottom=[input_a_name, input_b_name], top=[output_name]) layer.matmul_param(transpose_a, transpose_b) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.matmul(self.data_dict[input_a_name], transpose_a, self.data_dict[input_b_name], transpose_b, output_name) return output_name
def add_attention(self, input_name, attention_num, from_seq_length, to_seq_length, output_name): layer = caffe_net.LayerParameter(name=output_name, type='Attention', bottom=[input_name], top=[output_name]) layer.attention_param(attention_num, from_seq_length, to_seq_length) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.attention(self.data_dict[input_name], attention_num, from_seq_length, to_seq_length, output_name) return output_name
def add_attention_mask(self, input_name, output_name, attn_trunc_len, same_length, mask): layer = caffe_net.LayerParameter(name=output_name, type='AttentionMask', bottom=[input_name], top=[output_name]) layer.attention_mask_param(attn_trunc_len, same_length, mask) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.attention_mask(self.data_dict[input_name], attn_trunc_len, same_length, mask, output_name) return output_name
def add_multiply(self, input_name, output_name, scale=1, bias=0): layer = caffe_net.LayerParameter(name=output_name, type='Multiply', bottom=[input_name], top=[output_name]) layer.multiply_param(scale, bias) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.multiply( self.data_dict[input_name], scale, bias, output_name) return output_name