def extract_lstm(self, input_name, state_name, output_name, scope_id, steps=-1, scope_name="basic_lstm_cell"): self.scopes[scope_id] = scope_name self.scopes[scope_id + 1] = "kernel" kernel_name = self.generate_name(self.scopes, scope_id + 2) kernel = self.get_tensor(kernel_name) self.scopes[scope_id + 1] = "bias" bias_name = self.generate_name(self.scopes, scope_id + 2) bias = self.get_tensor(bias_name) layer = caffe_net.LayerParameter(name=output_name, type='LSTM', bottom=[input_name, state_name], top=[output_name]) num_output_4 = len(kernel[0]) if (bias is not None): if (len(bias) != num_output_4): print("[ERROR] extract_lstm failed") exit(0) num_output = num_output_4 // 4 layer.lstm_param(num_output, steps) layer.add_data(kernel.transpose([1, 0]), bias) self.caffe_model.add_layer(layer) self.data_dict[output_name], self.data_dict[ state_name] = Operators.lstm(self.data_dict[input_name], self.data_dict[state_name], kernel, bias, output_name, state_name) return output_name
def extract_lstm(self, input_name, state_name, output_name, scope_id, steps=-1, scope_name="basic_lstm_cell", use_proj=False, zoneout_cell=0, zoneout_output=0): if (isinstance(scope_name, str)): scope_name = [scope_name] kernels = [] biases = [] projections = [] projection_biases = [] for i in range(len(scope_name)): kernel, bias = self.get_weights(scope_id, [scope_name[i], "kernel", "bias"]) projection_size = 0 projection = None if (use_proj): self.scopes[scope_id] = scope_name[i] projection, projection_bias = self.get_weights( scope_id + 1, ["projection", "kernel", "bias"]) projection_size = projection.shape[0] num_output_4 = len(kernel[0]) if (bias is not None): if (len(bias) != num_output_4): print("[ERROR] extract_lstm failed") exit(0) if (use_proj): num_output = projection.shape[1] else: num_output = num_output_4 // 4 kernels.append(kernel.transpose([1, 0])) if (bias is None): bias = np.zeros([num_output_4 // 2]) biases.append(bias) if (use_proj): projections.append(projection.transpose([1, 0])) if (projection_bias is not None): projection_bias = np.zeros(num_output) projection_biases.append(projection_bias) else: projections.append(None) projection_biases.append(None) bottom = [input_name] if (state_name is not None): bottom.append(state_name) layer = caffe_net.LayerParameter(name=output_name, type='LSTM', bottom=bottom, top=[output_name]) layer.lstm_param(num_output, steps, projection_size, zoneout_cell, zoneout_output) if (use_proj): layer.add_data(np.concatenate(kernels, axis=0), np.concatenate(biases, axis=0), np.concatenate(projections, axis=0), np.concatenate(projection_biases, axis=0)) else: layer.add_data(np.concatenate(kernels, axis=0), np.concatenate(biases, axis=0)) self.caffe_model.add_layer(layer) if (len(scope_name) == 1): self.data_dict[ output_name], self.data_dict[state_name] = Operators.lstm( self.data_dict[input_name], self.data_dict[state_name], kernels[0], biases[0], projections[0], projection_biases[0], zoneout_cell, zoneout_output, output_name, state_name) elif (len(scope_name) == 2): self.data_dict[output_name] = Operators.bi_lstm( self.data_dict[input_name], kernels, biases, projections, projection_biases, zoneout_cell, zoneout_output, output_name) return output_name