def _get_output_tensors( self, interpreter: tf.lite.Interpreter) -> List[np.ndarray]: """Returns output tensors of given TFLite model Interpreter. Args: interpreter: a tf.lite.Interpreter object with allocated tensors. Returns: a list of numpy arrays representing output tensor results. """ return [ interpreter.get_tensor(tensor['index']) for tensor in interpreter.get_output_details() ]
def _set_input_tensors(self, interpreter: tf.lite.Interpreter, tensor_data: Sequence[np.ndarray], initialize: bool) -> None: """Sets input tensors into TFLite model Interpreter. Args: interpreter: a tf.lite.Interpreter object with allocated tensors. tensor_data: a list of Numpy array data. initialize: set to true when input is first set for the interpreter, to set input shapes and allocate tensors. Raises: ValueError: when inputs can't be set, or size of provided inputs does not match size of model inputs. """ input_details = interpreter.get_input_details() if len(input_details) != len(tensor_data): raise ValueError( 'Number of inputs provided ({}) does not match number of inputs to ' 'the model ({})'.format(len(tensor_data), len(input_details))) if initialize: for input_detail, tensor in zip(input_details, tensor_data): interpreter.resize_tensor_input(input_detail['index'], tensor.shape) interpreter.allocate_tensors() for input_detail, tensor in zip(input_details, tensor_data): if tensor.dtype == np.float32 and input_detail['dtype'] == np.int8: quant_params = _get_quant_params(input_detail) if quant_params: scale, zero_point = quant_params tensor = np.round((tensor / scale) + zero_point).astype( np.int8) interpreter.set_tensor(input_detail['index'], tensor)
def _set_input_tensors( self, interpreter: tf.lite.Interpreter, tensor_data: Sequence[np.ndarray], initialize: bool, ) -> None: """Sets input tensors into TFLite model Interpreter. Args: interpreter: a tf.lite.Interpreter object with allocated tensors. tensor_data: a list of Numpy array data. initialize: set to true when input is first set for the interpreter, to set input shapes and allocate tensors. Raises: ValueError: when inputs can't be set, or size of provided inputs does not match size of model inputs. """ input_indices = [ detail['index'] for detail in interpreter.get_input_details() ] if len(input_indices) != len(tensor_data): raise ValueError( 'Number of inputs provided ({}) does not match number of inputs to ' 'the model ({})'.format(len(tensor_data), len(input_indices))) if initialize: for input_idx, tensor in zip(input_indices, tensor_data): interpreter.resize_tensor_input(input_idx, tensor.shape) interpreter.allocate_tensors() for input_idx, tensor in zip(input_indices, tensor_data): interpreter.set_tensor(input_idx, tensor)
def _get_output_tensors( self, interpreter: tf.lite.Interpreter) -> List[np.ndarray]: """Returns output tensors of given TFLite model Interpreter. Args: interpreter: a tf.lite.Interpreter object with allocated tensors. Returns: a list of numpy arrays representing output tensor results. """ outputs = [] for output_detail in interpreter.get_output_details(): tensor = interpreter.get_tensor(output_detail['index']) if output_detail['dtype'] == np.int8: quant_params = _get_quant_params(output_detail) if quant_params: scale, zero_point = quant_params tensor = ((tensor.astype(np.float32) - zero_point) * scale).astype(np.float32) outputs.append(tensor) return outputs
def clean_speech(audio, interpreter_1: tf.lite.Interpreter, interpreter_2: tf.lite.Interpreter): block_len = 512 block_shift = 128 # load models interpreter_1.allocate_tensors() interpreter_2.allocate_tensors() # Get input and output tensors. input_details_1 = interpreter_1.get_input_details() output_details_1 = interpreter_1.get_output_details() input_details_2 = interpreter_2.get_input_details() output_details_2 = interpreter_2.get_output_details() # create states for the lstms states_1 = np.zeros(input_details_1[1]['shape']).astype('float32') states_2 = np.zeros(input_details_2[1]['shape']).astype('float32') # preallocate output audio out_file = np.zeros((len(audio))) # create buffer in_buffer = np.zeros((block_len)).astype('float32') out_buffer = np.zeros((block_len)).astype('float32') # calculate number of blocks num_blocks = (audio.shape[0] - (block_len - block_shift)) // block_shift # iterate over the number of blcoks for idx in range(num_blocks): # shift values and write to buffer in_buffer[:-block_shift] = in_buffer[block_shift:] in_buffer[-block_shift:] = audio[idx * block_shift:(idx * block_shift) + block_shift] # calculate fft of input block in_block_fft = np.fft.rfft(in_buffer) in_mag = np.abs(in_block_fft) in_phase = np.angle(in_block_fft) # reshape magnitude to input dimensions in_mag = np.reshape(in_mag, (1, 1, -1)).astype('float32') # set tensors to the first model interpreter_1.set_tensor(input_details_1[1]['index'], states_1) interpreter_1.set_tensor(input_details_1[0]['index'], in_mag) # run calculation interpreter_1.invoke() # get the output of the first block out_mask = interpreter_1.get_tensor(output_details_1[0]['index']) states_1 = interpreter_1.get_tensor(output_details_1[1]['index']) # calculate the ifft estimated_complex = in_mag * out_mask * np.exp(1j * in_phase) estimated_block = np.fft.irfft(estimated_complex) # reshape the time domain block estimated_block = np.reshape(estimated_block, (1, 1, -1)).astype('float32') # set tensors to the second block interpreter_2.set_tensor(input_details_2[1]['index'], states_2) interpreter_2.set_tensor(input_details_2[0]['index'], estimated_block) # run calculation interpreter_2.invoke() # get output tensors out_block = interpreter_2.get_tensor(output_details_2[0]['index']) states_2 = interpreter_2.get_tensor(output_details_2[1]['index']) # shift values and write to buffer out_buffer[:-block_shift] = out_buffer[block_shift:] out_buffer[-block_shift:] = np.zeros((block_shift)) out_buffer += np.squeeze(out_block) # write block to output file out_file[idx * block_shift:(idx * block_shift) + block_shift] = out_buffer[:block_shift] output_bytes = io.BytesIO() sf.write('out.wav', out_file, 16000) return output_bytes