def _get_output_tensors( self, interpreter: tf.lite.Interpreter) -> List[np.ndarray]: """Returns output tensors of given TFLite model Interpreter. Args: interpreter: a tf.lite.Interpreter object with allocated tensors. Returns: a list of numpy arrays representing output tensor results. """ return [ interpreter.get_tensor(tensor['index']) for tensor in interpreter.get_output_details() ]
def _get_output_tensors( self, interpreter: tf.lite.Interpreter) -> List[np.ndarray]: """Returns output tensors of given TFLite model Interpreter. Args: interpreter: a tf.lite.Interpreter object with allocated tensors. Returns: a list of numpy arrays representing output tensor results. """ outputs = [] for output_detail in interpreter.get_output_details(): tensor = interpreter.get_tensor(output_detail['index']) if output_detail['dtype'] == np.int8: quant_params = _get_quant_params(output_detail) if quant_params: scale, zero_point = quant_params tensor = ((tensor.astype(np.float32) - zero_point) * scale).astype(np.float32) outputs.append(tensor) return outputs
def clean_speech(audio, interpreter_1: tf.lite.Interpreter, interpreter_2: tf.lite.Interpreter): block_len = 512 block_shift = 128 # load models interpreter_1.allocate_tensors() interpreter_2.allocate_tensors() # Get input and output tensors. input_details_1 = interpreter_1.get_input_details() output_details_1 = interpreter_1.get_output_details() input_details_2 = interpreter_2.get_input_details() output_details_2 = interpreter_2.get_output_details() # create states for the lstms states_1 = np.zeros(input_details_1[1]['shape']).astype('float32') states_2 = np.zeros(input_details_2[1]['shape']).astype('float32') # preallocate output audio out_file = np.zeros((len(audio))) # create buffer in_buffer = np.zeros((block_len)).astype('float32') out_buffer = np.zeros((block_len)).astype('float32') # calculate number of blocks num_blocks = (audio.shape[0] - (block_len - block_shift)) // block_shift # iterate over the number of blcoks for idx in range(num_blocks): # shift values and write to buffer in_buffer[:-block_shift] = in_buffer[block_shift:] in_buffer[-block_shift:] = audio[idx * block_shift:(idx * block_shift) + block_shift] # calculate fft of input block in_block_fft = np.fft.rfft(in_buffer) in_mag = np.abs(in_block_fft) in_phase = np.angle(in_block_fft) # reshape magnitude to input dimensions in_mag = np.reshape(in_mag, (1, 1, -1)).astype('float32') # set tensors to the first model interpreter_1.set_tensor(input_details_1[1]['index'], states_1) interpreter_1.set_tensor(input_details_1[0]['index'], in_mag) # run calculation interpreter_1.invoke() # get the output of the first block out_mask = interpreter_1.get_tensor(output_details_1[0]['index']) states_1 = interpreter_1.get_tensor(output_details_1[1]['index']) # calculate the ifft estimated_complex = in_mag * out_mask * np.exp(1j * in_phase) estimated_block = np.fft.irfft(estimated_complex) # reshape the time domain block estimated_block = np.reshape(estimated_block, (1, 1, -1)).astype('float32') # set tensors to the second block interpreter_2.set_tensor(input_details_2[1]['index'], states_2) interpreter_2.set_tensor(input_details_2[0]['index'], estimated_block) # run calculation interpreter_2.invoke() # get output tensors out_block = interpreter_2.get_tensor(output_details_2[0]['index']) states_2 = interpreter_2.get_tensor(output_details_2[1]['index']) # shift values and write to buffer out_buffer[:-block_shift] = out_buffer[block_shift:] out_buffer[-block_shift:] = np.zeros((block_shift)) out_buffer += np.squeeze(out_block) # write block to output file out_file[idx * block_shift:(idx * block_shift) + block_shift] = out_buffer[:block_shift] output_bytes = io.BytesIO() sf.write('out.wav', out_file, 16000) return output_bytes