Ejemplo n.º 1
0
 def add_prod(self, input_names, output_name):
     layer = caffe_net.LayerParameter(name=output_name, type='Eltwise',
                 bottom=input_names,
                                   top=[output_name])
     layer.eltwise_param(0) #Prod
     self.caffe_model.add_layer(layer)
     self.data_dict[output_name] = self.data_dict[input_names[0]]
     for i in range(1, len(input_names)):
         self.data_dict[output_name] = Operators.matmultiply(self.data_dict[output_name],
                                           self.data_dict[input_names[i]], output_name)
     return output_name
Ejemplo n.º 2
0
    def add_prod(self, input_names, output_name):
        layer = caffe_net.LayerParameter(name=output_name, type='Eltwise',
                    bottom=input_names,
                                      top=[output_name])
        layer.eltwise_param(0) #Prod
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = self.data_dict[input_names[0]]
        for i in range(1, len(input_names)):
            self.data_dict[output_name] = Operators.matmultiply(self.data_dict[output_name],
                                              self.data_dict[input_names[i]], output_name)
        return output_name
		
	def add_l2norm(self, input_name, output_name):
        layer = caffe_net.LayerParameter(name=output_name, type='L2Norm',
                    bottom=[input_name],
                    top=[output_name])
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.l2_norm(self.data_dict[input_name], output_name)
        return output_name

    def add_slice(self, input_name, output_names, axis, slice_point):
        layer = caffe_net.LayerParameter(name=output_names[0], type='Slice',
                    bottom=[input_name], top=output_names)
        layer.slice_param(axis, slice_point)
        self.caffe_model.add_layer(layer)
        result = Operators.slice(self.data_dict[input_name], axis, slice_point, output_names)
        for i in range(len(output_names)):
            if (result is not None):
                self.data_dict[output_names[i]] = result[i]
            else:
                self.data_dict[output_names[i]] = None
        return output_names

    def add_attention(self, input_name, attention_num, from_seq_length, to_seq_length, output_name):
        layer = caffe_net.LayerParameter(name=output_name, type='Attention',
                    bottom=[input_name], top=[output_name])
        layer.attention_param(attention_num, from_seq_length, to_seq_length)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.attention(self.data_dict[input_name],
                                          attention_num, from_seq_length, to_seq_length,
                                          output_name)
        return output_name

    def add_attention_mask(self, input_name, output_name, attn_trunc_len, same_length, mask):
        layer = caffe_net.LayerParameter(name=output_name, type='AttentionMask',
                    bottom=[input_name], top=[output_name])
        layer.attention_mask_param(attn_trunc_len, same_length, mask)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.attention_mask(self.data_dict[input_name],
                                          attn_trunc_len, same_length, mask,
                                          output_name)
        return output_name

    def add_sum(self, input_names, output_name):
        layer = caffe_net.LayerParameter(name=output_name, type='Eltwise',
                    bottom=input_names,
                    top=[output_name])
        layer.eltwise_param(1) #SUM
        self.caffe_model.add_layer(layer)
        data = []
        for name in input_names:
            data.append(self.data_dict[name])
        self.data_dict[output_name] = Operators.sum(data, output_name)
        return output_name

    def add_softmax(self, input_name, output_name, axis):
        layer = caffe_net.LayerParameter(name=output_name, type='Softmax',
                    bottom=[input_name],
                    top=[output_name])
        layer.softmax_param(axis)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.softmax(self.data_dict[input_name], axis, output_name)
        return output_name

    def add_gelu(self, input_name, output_name):
        layer = caffe_net.LayerParameter(name=output_name, type='Gelu',
                    bottom=[input_name],
                    top=[output_name])
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.gelu(self.data_dict[input_name], output_name)
        return output_name

    def add_tanh(self, input_name, output_name):
        layer = caffe_net.LayerParameter(name=output_name, type='TanH',
                    bottom=[input_name],
                    top=[output_name])
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.tanh(self.data_dict[input_name], output_name)
        return output_name


    def add_relu(self, input_name, output_name):
        layer = caffe_net.LayerParameter(name=output_name, type='ReLU',
                    bottom=[input_name],
                    top=[output_name])
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.relu(self.data_dict[input_name], output_name)
        return output_name

    def add_relu6(self, input_name, output_name):
        layer = caffe_net.LayerParameter(name=output_name, type='ReLU6',
                    bottom=[input_name],
                    top=[output_name])
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.relu(self.data_dict[input_name], output_name, max_value=6)
        return output_name

    def add_sigmoid(self, input_name, output_name):
        layer = caffe_net.LayerParameter(name=output_name, type='Sigmoid',
                    bottom=[input_name],
                                      top=[output_name])
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.sigmoid(self.data_dict[input_name], output_name)
        return output_name

    def add_swish(self, input_name, output_name, beta=1.0):
        layer = caffe_net.LayerParameter(name=output_name, type='Swish',
                    bottom=[input_name],
                                      top=[output_name])
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.swish(self.data_dict[input_name], beta, output_name)
        return output_name

    def add_weight(self, output_name, scope_id=None, weight_name=None, weight=None, transpose=None, data_type="FLOAT32"):
        if scope_id is not None:
            weight_name = self.generate_name(self.scopes, scope_id)
        if weight_name is not None:
            weight = self.get_weight(weight_name)
        if weight is None:
            print("[ERROR] can not add null weight layer")
            exit(0)
        layer = caffe_net.LayerParameter(name=output_name+"_weight", type='SharedWeight',
                                         top=[output_name])
        if (transpose is not None):
            weight = weight.transpose(transpose)
        layer.weight_param(weight.shape, data_type)
        layer.add_data(weight)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.weight(weight, output_name)
        return output_name

    def add_embedding(self, input_name, weight_name, output_name, transpose=False):
        layer = caffe_net.LayerParameter(name=output_name, type='Embed',
                    bottom=[input_name,weight_name], top=[output_name])
        weight = self.data_dict[weight_name]
        if transpose:
            input_dim = weight.shape[-1]
            embedding_dim = weight.shape[-2]
        else:
            input_dim = weight.shape[-2]
            embedding_dim = weight.shape[-1]
        layer.embed_param(input_dim, embedding_dim, transpose)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.embedding(self.data_dict[input_name], weight, transpose, output_name)
        return output_name

    def extract_embedding(self, input_name, scope_id, tensorflow_weight_name, output_name):
        self.scopes[scope_id] = tensorflow_weight_name
        weight_name = self.generate_name(self.scopes, scope_id+1)
        weight = self.get_weight(weight_name)
        layer = caffe_net.LayerParameter(name=output_name, type='Embed',
                    bottom=[input_name], top=[output_name])
        layer.add_data(weight)
        embedding_dim = len(weight[0])
        layer.embed_param(len(weight), embedding_dim, False)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.embedding(self.data_dict[input_name], weight, False, output_name)
        return output_name

    def add_relative_position_embedding(self, input_name, weight_name, axis, output_name, transpose=False):
        layer = caffe_net.LayerParameter(name=output_name, type='RelativePositionEmbed',
                    bottom=[input_name,weight_name], top=[output_name])
        weight = self.data_dict[weight_name]
        if transpose:
            input_dim = len(weight[0])
            embedding_dim = len(weight)
        else:
            input_dim = len(weight)
            embedding_dim = len(weight[0])
        layer.relative_position_embed_param(input_dim, embedding_dim, transpose, axis)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.relative_position_embedding(self.data_dict[input_name],
                                          weight, axis, output_name)
        return output_name

    def add_reduce_mean(self, input_name, axis, keep_dim, output_name):
        operation = 4 # MEAN
        layer = caffe_net.LayerParameter(name=output_name, type='Reduction',
                    bottom=[input_name], top=[output_name])
        layer.reduction_param(operation, axis, keep_dim)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.reduction(self.data_dict[input_name], None, operation, axis, output_name)
        return output_name

    def add_reduce_sum(self, input_name, axis, keep_dim, output_name, mask_input_name=None):
        operation = 1 # SUM
        bottom = [input_name]
        if (mask_input_name is not None):
            bottom.append(mask_input_name)
        layer = caffe_net.LayerParameter(name=output_name, type='Reduction',
                    bottom=bottom, top=[output_name])
        layer.reduction_param(operation, axis, keep_dim)
        self.caffe_model.add_layer(layer)
        if (mask_input_name is None):
            mask = None
        else:
            mask = self.data_dict[mask_input_name]
        self.data_dict[output_name] = Operators.reduction(self.data_dict[input_name], mask,
            operation, axis, output_name)
        return output_name

    def add_expand_dims(self, input_name, axis, output_name):
        layer = caffe_net.LayerParameter(name=output_name, type='Unsqueeze',
                    bottom=[input_name], top=[output_name])
        layer.unsqueeze_param(axis)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.expand_dims(self.data_dict[input_name], axis, output_name)
        return output_name

    def add_tile(self, input_name, loops, axis, output_name):
        layer = caffe_net.LayerParameter(name=output_name, type='Tile',
                                         bottom=[input_name], top=[output_name])
        layer.tile_param(axis, loops)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.tile(self.data_dict[input_name], loops, axis, output_name)
        return output_name

    def add_argmax(self, input_name, axis, output_name):
        layer = caffe_net.LayerParameter(name=output_name, type='ArgMax',
                    bottom=[input_name], top=[output_name])
        layer.argmax_param(axis)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.argmax(self.data_dict[input_name], axis, output_name)
        return output_name

    def extract_lstm(self, input_name, state_name, output_name, scope_id,
            steps=-1, scope_name="basic_lstm_cell",
            use_proj=False, zoneoutCell=0, zoneoutOutput=0):
        if (isinstance(scope_name, str)):
            scope_name = [scope_name]
        kernels = []
        biases = []
        projections = []
        projection_biases = []
        for i in range(len(scope_name)):
            kernel, bias = self.get_weights(scope_id, [scope_name[i], "kernel", "bias"])
            projection_size = 0;
            projection = None
            if (use_proj):
                self.scopes[scope_id] = scope_name[i]
                projection, projection_bias = self.get_weights(scope_id+1, ["projection", "kernel", "bias"])
                projection_size = projection.shape[0]
            num_output_4 = len(kernel[0])
            if (bias is not None):
                if (len(bias) != num_output_4):
                    print("[ERROR] extract_lstm failed")
                    exit(0)
            if (use_proj):
                num_output = projection.shape[1]
            else:
                num_output = num_output_4 // 4
            if (len(kernel) != self.get_tensor_shape(input_name)[-1] + num_output):
                kernel_2, bias_2 = self.get_weights(scope_id, [scope_name[i], "recurrent_kernel", "bias"])
                kernel = np.concatenate([kernel, kernel_2], axis = 0)
            kernels.append(kernel.transpose([1, 0]))
            if (bias is None):
                bias = np.zeros([num_output_4 // 2])
            biases.append(bias)
            if (use_proj):
                projections.append(projection.transpose([1, 0]))
                if (projection_bias is not None):
                    projection_bias = np.zeros(num_output)
                projection_biases.append(projection_bias)
            else:
                projections.append(None)
                projection_biases.append(None)
        bottom = [input_name]
        if (state_name is not None):
            bottom.append(state_name)
        layer = caffe_net.LayerParameter(name=output_name, type='LSTM',
                    bottom=bottom, top=[output_name])
        layer.lstm_param(num_output, steps, projection_size, zoneoutCell, zoneoutOutput)
        if (use_proj):
            if (projection_biases[0] is not None):
                layer.add_data(np.concatenate(kernels, axis=0), np.concatenate(biases, axis=0),
                    np.concatenate(projections, axis=0), np.concatenate(projection_biases, axis=0))
            else:
                layer.add_data(np.concatenate(kernels, axis=0), np.concatenate(biases, axis=0),
                    np.concatenate(projections, axis=0))
        else:
            layer.add_data(np.concatenate(kernels, axis=0),
                np.concatenate(biases, axis=0))
        self.caffe_model.add_layer(layer)
        #if (len(scope_name) == 1):
        if (steps >= 0):
            self.data_dict[output_name] = Operators.fw_lstm(self.data_dict[input_name],
                kernels[0],
                biases[0],
                projections[0],
                projection_biases[0],
                zoneoutCell, zoneoutOutput,
                output_name)
        elif (steps == -1):
            self.data_dict[output_name], self.data_dict[state_name] = Operators.lstm(self.data_dict[input_name],
                self.data_dict[state_name],
                kernels[0],
                biases[0],
                projections[0],
                projection_biases[0],
                zoneoutCell, zoneoutOutput,
                output_name,
                state_name)
        #elif (len(scope_name) == 2):
        elif (steps == -2):
            self.data_dict[output_name] = Operators.bi_lstm(self.data_dict[input_name],
                kernels,
                biases,
                projections,
                projection_biases,
                zoneoutCell, zoneoutOutput,
                output_name)
        return output_name

    def add_check(self, left_name, right_name, condition, status_name):
        layer = caffe_net.LayerParameter(name=status_name, type='Check',
                    bottom=[left_name, right_name], top=[status_name])
        layer.check_param(condition)
        self.caffe_model.add_layer(layer)
        self.data_dict[status_name] = Operators.check(self.data_dict[left_name],
                                          self.data_dict[right_name],
                                          condition,
                                          status_name)
        return status_name

    def add_jump(self, jump_start_name, output_name="jump", status_name=None):
        bottom = []
        if (status_name is not None):
            bottom = [jump_start_name, status_name]
        else:
            bottom = [jump_start_name]
        layer = caffe_net.LayerParameter(name=output_name, type='Jump',
                    bottom=bottom, top=[output_name])
        self.caffe_model.add_layer(layer)
        return jump_start_name

    def add_copy(self, src_name, src_batch_stride, src_stride, src_offset,
            dst_name, dst_batch_stride, dst_stride, dst_offset,
            length,
            output_name,
            src_index_name=None, dst_index_name=None):
        src_index = None
        dst_index = None
        if (src_index_name is None):
            layer = caffe_net.LayerParameter(name=output_name, type='Copy',
                        bottom=[src_name, dst_name], top=[output_name])
        else:
            layer = caffe_net.LayerParameter(name=output_name, type='Copy',
                        bottom=[src_name, dst_name, src_index_name, dst_index_name], top=[output_name])
            src_index = self.data_dict[src_index_name]
            dst_index = self.data_dict[dst_index_name]
        layer.copy_param(src_batch_stride, src_stride, src_offset, dst_batch_stride, dst_stride, dst_offset, length)
        self.caffe_model.add_layer(layer)

        self.data_dict[dst_name] = Operators.copy(self.data_dict[src_name],
                                       src_batch_stride, src_stride, src_offset,
                                       self.data_dict[dst_name],
                                       dst_batch_stride, dst_stride, dst_offset,
                                       length,
                                       dst_name,
                                       src_index, dst_index)
        return dst_name

    def add_repeat(self, loops, repeat_start_name,
            output_name="repeat", status_name=None, axis_name=None, axis=-1):
        bottom = [repeat_start_name]
        if (status_name is not None):
            bottom.append(status_name)
        if (axis_name is not None):
            bottom.append(axis_name)
        layer = caffe_net.LayerParameter(name=output_name, type='Repeat',
                    bottom=bottom, top=[output_name])
        layer.repeat_param(loops, axis)
        self.caffe_model.add_layer(layer)
        return repeat_start_name

    def add_repeat_set_times(self, repeat_name, input_name, input_axis, output_name="set_repeat_times"):
        layer = caffe_net.LayerParameter(name=output_name, type='RepeatSetTimesByAxisLength',
                    bottom=[repeat_name, input_name], top=[output_name])
        layer.repeat_set_times_by_axis_length_param(input_axis)
        self.caffe_model.add_layer(layer)
        return repeat_name

    def add_memory(self, memory_name, memory_shapes, data_type):
        layer = caffe_net.LayerParameter(name=memory_name+"_mem", type='PreAllocatedMemory',
            top=[memory_name])
        layer.memory_param(memory_shapes, data_type)
        self.caffe_model.add_layer(layer)
        self.data_dict[memory_name] = Operators.zeros(memory_shapes, memory_name)
        return memory_name

    def add_pad(self, input_name, output_name, padding_shapes, padding_values=None):
        layer = caffe_net.LayerParameter(name=output_name, type='Pad',
                    bottom=[input_name], top=[output_name])
        layer.padding_param(padding_shapes, padding_values)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.pad(self.data_dict[input_name], padding_shapes, padding_values, output_name)
        return output_name

    def add_relative_shift(self, input_name, output_name, axis, shift_length):
        layer = caffe_net.LayerParameter(name=output_name, type='RelativeShift',
                    bottom=[input_name], top=[output_name])
        layer.relative_shift_param(axis, shift_length)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.relative_shift(self.data_dict[input_name], axis, shift_length, output_name)
        return output_name

    def add_clip(self, input_name, output_name, min_value, max_value):
        layer = caffe_net.LayerParameter(name=output_name, type='Clip',
                    bottom=[input_name],
                    top=[output_name])
        layer.clip_param(min_value, max_value)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name] = Operators.clip(self.data_dict[input_name], min_value, max_value, output_name)
        return output_name