def add_prod(self, input_names, output_name): layer = caffe_net.LayerParameter(name=output_name, type='Eltwise', bottom=input_names, top=[output_name]) layer.eltwise_param(0) #Prod self.caffe_model.add_layer(layer) self.data_dict[output_name] = self.data_dict[input_names[0]] for i in range(1, len(input_names)): self.data_dict[output_name] = Operators.matmultiply(self.data_dict[output_name], self.data_dict[input_names[i]], output_name) return output_name
def add_prod(self, input_names, output_name): layer = caffe_net.LayerParameter(name=output_name, type='Eltwise', bottom=input_names, top=[output_name]) layer.eltwise_param(0) #Prod self.caffe_model.add_layer(layer) self.data_dict[output_name] = self.data_dict[input_names[0]] for i in range(1, len(input_names)): self.data_dict[output_name] = Operators.matmultiply(self.data_dict[output_name], self.data_dict[input_names[i]], output_name) return output_name def add_l2norm(self, input_name, output_name): layer = caffe_net.LayerParameter(name=output_name, type='L2Norm', bottom=[input_name], top=[output_name]) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.l2_norm(self.data_dict[input_name], output_name) return output_name def add_slice(self, input_name, output_names, axis, slice_point): layer = caffe_net.LayerParameter(name=output_names[0], type='Slice', bottom=[input_name], top=output_names) layer.slice_param(axis, slice_point) self.caffe_model.add_layer(layer) result = Operators.slice(self.data_dict[input_name], axis, slice_point, output_names) for i in range(len(output_names)): if (result is not None): self.data_dict[output_names[i]] = result[i] else: self.data_dict[output_names[i]] = None return output_names def add_attention(self, input_name, attention_num, from_seq_length, to_seq_length, output_name): layer = caffe_net.LayerParameter(name=output_name, type='Attention', bottom=[input_name], top=[output_name]) layer.attention_param(attention_num, from_seq_length, to_seq_length) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.attention(self.data_dict[input_name], attention_num, from_seq_length, to_seq_length, output_name) return output_name def add_attention_mask(self, input_name, output_name, attn_trunc_len, same_length, mask): layer = caffe_net.LayerParameter(name=output_name, type='AttentionMask', bottom=[input_name], top=[output_name]) layer.attention_mask_param(attn_trunc_len, same_length, mask) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.attention_mask(self.data_dict[input_name], attn_trunc_len, same_length, mask, output_name) return output_name def add_sum(self, input_names, output_name): layer = caffe_net.LayerParameter(name=output_name, type='Eltwise', bottom=input_names, top=[output_name]) layer.eltwise_param(1) #SUM self.caffe_model.add_layer(layer) data = [] for name in input_names: data.append(self.data_dict[name]) self.data_dict[output_name] = Operators.sum(data, output_name) return output_name def add_softmax(self, input_name, output_name, axis): layer = caffe_net.LayerParameter(name=output_name, type='Softmax', bottom=[input_name], top=[output_name]) layer.softmax_param(axis) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.softmax(self.data_dict[input_name], axis, output_name) return output_name def add_gelu(self, input_name, output_name): layer = caffe_net.LayerParameter(name=output_name, type='Gelu', bottom=[input_name], top=[output_name]) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.gelu(self.data_dict[input_name], output_name) return output_name def add_tanh(self, input_name, output_name): layer = caffe_net.LayerParameter(name=output_name, type='TanH', bottom=[input_name], top=[output_name]) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.tanh(self.data_dict[input_name], output_name) return output_name def add_relu(self, input_name, output_name): layer = caffe_net.LayerParameter(name=output_name, type='ReLU', bottom=[input_name], top=[output_name]) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.relu(self.data_dict[input_name], output_name) return output_name def add_relu6(self, input_name, output_name): layer = caffe_net.LayerParameter(name=output_name, type='ReLU6', bottom=[input_name], top=[output_name]) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.relu(self.data_dict[input_name], output_name, max_value=6) return output_name def add_sigmoid(self, input_name, output_name): layer = caffe_net.LayerParameter(name=output_name, type='Sigmoid', bottom=[input_name], top=[output_name]) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.sigmoid(self.data_dict[input_name], output_name) return output_name def add_swish(self, input_name, output_name, beta=1.0): layer = caffe_net.LayerParameter(name=output_name, type='Swish', bottom=[input_name], top=[output_name]) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.swish(self.data_dict[input_name], beta, output_name) return output_name def add_weight(self, output_name, scope_id=None, weight_name=None, weight=None, transpose=None, data_type="FLOAT32"): if scope_id is not None: weight_name = self.generate_name(self.scopes, scope_id) if weight_name is not None: weight = self.get_weight(weight_name) if weight is None: print("[ERROR] can not add null weight layer") exit(0) layer = caffe_net.LayerParameter(name=output_name+"_weight", type='SharedWeight', top=[output_name]) if (transpose is not None): weight = weight.transpose(transpose) layer.weight_param(weight.shape, data_type) layer.add_data(weight) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.weight(weight, output_name) return output_name def add_embedding(self, input_name, weight_name, output_name, transpose=False): layer = caffe_net.LayerParameter(name=output_name, type='Embed', bottom=[input_name,weight_name], top=[output_name]) weight = self.data_dict[weight_name] if transpose: input_dim = weight.shape[-1] embedding_dim = weight.shape[-2] else: input_dim = weight.shape[-2] embedding_dim = weight.shape[-1] layer.embed_param(input_dim, embedding_dim, transpose) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.embedding(self.data_dict[input_name], weight, transpose, output_name) return output_name def extract_embedding(self, input_name, scope_id, tensorflow_weight_name, output_name): self.scopes[scope_id] = tensorflow_weight_name weight_name = self.generate_name(self.scopes, scope_id+1) weight = self.get_weight(weight_name) layer = caffe_net.LayerParameter(name=output_name, type='Embed', bottom=[input_name], top=[output_name]) layer.add_data(weight) embedding_dim = len(weight[0]) layer.embed_param(len(weight), embedding_dim, False) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.embedding(self.data_dict[input_name], weight, False, output_name) return output_name def add_relative_position_embedding(self, input_name, weight_name, axis, output_name, transpose=False): layer = caffe_net.LayerParameter(name=output_name, type='RelativePositionEmbed', bottom=[input_name,weight_name], top=[output_name]) weight = self.data_dict[weight_name] if transpose: input_dim = len(weight[0]) embedding_dim = len(weight) else: input_dim = len(weight) embedding_dim = len(weight[0]) layer.relative_position_embed_param(input_dim, embedding_dim, transpose, axis) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.relative_position_embedding(self.data_dict[input_name], weight, axis, output_name) return output_name def add_reduce_mean(self, input_name, axis, keep_dim, output_name): operation = 4 # MEAN layer = caffe_net.LayerParameter(name=output_name, type='Reduction', bottom=[input_name], top=[output_name]) layer.reduction_param(operation, axis, keep_dim) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.reduction(self.data_dict[input_name], None, operation, axis, output_name) return output_name def add_reduce_sum(self, input_name, axis, keep_dim, output_name, mask_input_name=None): operation = 1 # SUM bottom = [input_name] if (mask_input_name is not None): bottom.append(mask_input_name) layer = caffe_net.LayerParameter(name=output_name, type='Reduction', bottom=bottom, top=[output_name]) layer.reduction_param(operation, axis, keep_dim) self.caffe_model.add_layer(layer) if (mask_input_name is None): mask = None else: mask = self.data_dict[mask_input_name] self.data_dict[output_name] = Operators.reduction(self.data_dict[input_name], mask, operation, axis, output_name) return output_name def add_expand_dims(self, input_name, axis, output_name): layer = caffe_net.LayerParameter(name=output_name, type='Unsqueeze', bottom=[input_name], top=[output_name]) layer.unsqueeze_param(axis) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.expand_dims(self.data_dict[input_name], axis, output_name) return output_name def add_tile(self, input_name, loops, axis, output_name): layer = caffe_net.LayerParameter(name=output_name, type='Tile', bottom=[input_name], top=[output_name]) layer.tile_param(axis, loops) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.tile(self.data_dict[input_name], loops, axis, output_name) return output_name def add_argmax(self, input_name, axis, output_name): layer = caffe_net.LayerParameter(name=output_name, type='ArgMax', bottom=[input_name], top=[output_name]) layer.argmax_param(axis) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.argmax(self.data_dict[input_name], axis, output_name) return output_name def extract_lstm(self, input_name, state_name, output_name, scope_id, steps=-1, scope_name="basic_lstm_cell", use_proj=False, zoneoutCell=0, zoneoutOutput=0): if (isinstance(scope_name, str)): scope_name = [scope_name] kernels = [] biases = [] projections = [] projection_biases = [] for i in range(len(scope_name)): kernel, bias = self.get_weights(scope_id, [scope_name[i], "kernel", "bias"]) projection_size = 0; projection = None if (use_proj): self.scopes[scope_id] = scope_name[i] projection, projection_bias = self.get_weights(scope_id+1, ["projection", "kernel", "bias"]) projection_size = projection.shape[0] num_output_4 = len(kernel[0]) if (bias is not None): if (len(bias) != num_output_4): print("[ERROR] extract_lstm failed") exit(0) if (use_proj): num_output = projection.shape[1] else: num_output = num_output_4 // 4 if (len(kernel) != self.get_tensor_shape(input_name)[-1] + num_output): kernel_2, bias_2 = self.get_weights(scope_id, [scope_name[i], "recurrent_kernel", "bias"]) kernel = np.concatenate([kernel, kernel_2], axis = 0) kernels.append(kernel.transpose([1, 0])) if (bias is None): bias = np.zeros([num_output_4 // 2]) biases.append(bias) if (use_proj): projections.append(projection.transpose([1, 0])) if (projection_bias is not None): projection_bias = np.zeros(num_output) projection_biases.append(projection_bias) else: projections.append(None) projection_biases.append(None) bottom = [input_name] if (state_name is not None): bottom.append(state_name) layer = caffe_net.LayerParameter(name=output_name, type='LSTM', bottom=bottom, top=[output_name]) layer.lstm_param(num_output, steps, projection_size, zoneoutCell, zoneoutOutput) if (use_proj): if (projection_biases[0] is not None): layer.add_data(np.concatenate(kernels, axis=0), np.concatenate(biases, axis=0), np.concatenate(projections, axis=0), np.concatenate(projection_biases, axis=0)) else: layer.add_data(np.concatenate(kernels, axis=0), np.concatenate(biases, axis=0), np.concatenate(projections, axis=0)) else: layer.add_data(np.concatenate(kernels, axis=0), np.concatenate(biases, axis=0)) self.caffe_model.add_layer(layer) #if (len(scope_name) == 1): if (steps >= 0): self.data_dict[output_name] = Operators.fw_lstm(self.data_dict[input_name], kernels[0], biases[0], projections[0], projection_biases[0], zoneoutCell, zoneoutOutput, output_name) elif (steps == -1): self.data_dict[output_name], self.data_dict[state_name] = Operators.lstm(self.data_dict[input_name], self.data_dict[state_name], kernels[0], biases[0], projections[0], projection_biases[0], zoneoutCell, zoneoutOutput, output_name, state_name) #elif (len(scope_name) == 2): elif (steps == -2): self.data_dict[output_name] = Operators.bi_lstm(self.data_dict[input_name], kernels, biases, projections, projection_biases, zoneoutCell, zoneoutOutput, output_name) return output_name def add_check(self, left_name, right_name, condition, status_name): layer = caffe_net.LayerParameter(name=status_name, type='Check', bottom=[left_name, right_name], top=[status_name]) layer.check_param(condition) self.caffe_model.add_layer(layer) self.data_dict[status_name] = Operators.check(self.data_dict[left_name], self.data_dict[right_name], condition, status_name) return status_name def add_jump(self, jump_start_name, output_name="jump", status_name=None): bottom = [] if (status_name is not None): bottom = [jump_start_name, status_name] else: bottom = [jump_start_name] layer = caffe_net.LayerParameter(name=output_name, type='Jump', bottom=bottom, top=[output_name]) self.caffe_model.add_layer(layer) return jump_start_name def add_copy(self, src_name, src_batch_stride, src_stride, src_offset, dst_name, dst_batch_stride, dst_stride, dst_offset, length, output_name, src_index_name=None, dst_index_name=None): src_index = None dst_index = None if (src_index_name is None): layer = caffe_net.LayerParameter(name=output_name, type='Copy', bottom=[src_name, dst_name], top=[output_name]) else: layer = caffe_net.LayerParameter(name=output_name, type='Copy', bottom=[src_name, dst_name, src_index_name, dst_index_name], top=[output_name]) src_index = self.data_dict[src_index_name] dst_index = self.data_dict[dst_index_name] layer.copy_param(src_batch_stride, src_stride, src_offset, dst_batch_stride, dst_stride, dst_offset, length) self.caffe_model.add_layer(layer) self.data_dict[dst_name] = Operators.copy(self.data_dict[src_name], src_batch_stride, src_stride, src_offset, self.data_dict[dst_name], dst_batch_stride, dst_stride, dst_offset, length, dst_name, src_index, dst_index) return dst_name def add_repeat(self, loops, repeat_start_name, output_name="repeat", status_name=None, axis_name=None, axis=-1): bottom = [repeat_start_name] if (status_name is not None): bottom.append(status_name) if (axis_name is not None): bottom.append(axis_name) layer = caffe_net.LayerParameter(name=output_name, type='Repeat', bottom=bottom, top=[output_name]) layer.repeat_param(loops, axis) self.caffe_model.add_layer(layer) return repeat_start_name def add_repeat_set_times(self, repeat_name, input_name, input_axis, output_name="set_repeat_times"): layer = caffe_net.LayerParameter(name=output_name, type='RepeatSetTimesByAxisLength', bottom=[repeat_name, input_name], top=[output_name]) layer.repeat_set_times_by_axis_length_param(input_axis) self.caffe_model.add_layer(layer) return repeat_name def add_memory(self, memory_name, memory_shapes, data_type): layer = caffe_net.LayerParameter(name=memory_name+"_mem", type='PreAllocatedMemory', top=[memory_name]) layer.memory_param(memory_shapes, data_type) self.caffe_model.add_layer(layer) self.data_dict[memory_name] = Operators.zeros(memory_shapes, memory_name) return memory_name def add_pad(self, input_name, output_name, padding_shapes, padding_values=None): layer = caffe_net.LayerParameter(name=output_name, type='Pad', bottom=[input_name], top=[output_name]) layer.padding_param(padding_shapes, padding_values) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.pad(self.data_dict[input_name], padding_shapes, padding_values, output_name) return output_name def add_relative_shift(self, input_name, output_name, axis, shift_length): layer = caffe_net.LayerParameter(name=output_name, type='RelativeShift', bottom=[input_name], top=[output_name]) layer.relative_shift_param(axis, shift_length) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.relative_shift(self.data_dict[input_name], axis, shift_length, output_name) return output_name def add_clip(self, input_name, output_name, min_value, max_value): layer = caffe_net.LayerParameter(name=output_name, type='Clip', bottom=[input_name], top=[output_name]) layer.clip_param(min_value, max_value) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.clip(self.data_dict[input_name], min_value, max_value, output_name) return output_name