def vectorize(self, inputs): """ Vectorize inputs. Parameters ---------- inputs: List[np.array] List[np.array] or List[malaya_speech.model.frame.Frame]. Returns ------- result: np.array returned [B, D]. """ inputs = [ input.array if isinstance(input, Frame) else input for input in inputs ] inputs = [self._vectorizer(input) for input in inputs] inputs, lengths = padding_sequence_nd(inputs, dim=0, return_len=True) r = self._execute( inputs=[inputs, lengths], input_labels=['Placeholder', 'Placeholder_1'], output_labels=['logits'], ) return r['logits']
def predict(self, inputs): """ Enhance inputs, will return melspectrogram. Parameters ---------- inputs: List[np.array] Returns ------- result: List """ inputs = [ input.array if isinstance(input, Frame) else input for input in inputs ] mels = [featurization.scale_mel(s).T for s in inputs] x, lens = padding_sequence_nd(mels, maxlen=256, dim=0, return_len=True) r = self._execute( inputs=[x], input_labels=['Placeholder'], output_labels=['logits'], ) l = r['logits'] results = [] for index in range(len(x)): results.append( featurization.unscale_mel(x[index, :lens[index]].T + l[index, :lens[index], :, 0].T)) return results
def predict_proba(self, inputs): """ Predict inputs, will return probability. Parameters ---------- inputs: List[np.array] List[np.array] or List[malaya_speech.model.frame.Frame]. Returns ------- result: np.array returned [B, D]. """ inputs = [ input.array if isinstance(input, Frame) else input for input in inputs ] inputs = [self._vectorizer(input, **self._extra) for input in inputs] if self.__model__ == 'deep-speaker': dim = 0 else: dim = 1 inputs = padding_sequence_nd(inputs, dim=dim) inputs = np.expand_dims(inputs, -1) r = self._execute( inputs=[inputs], input_labels=['Placeholder'], output_labels=['logits'], ) return softmax(r['logits'], axis=-1)
def vectorize(self, inputs): """ Vectorize inputs. Parameters ---------- inputs: List[np.array] List[np.array] or List[malaya_speech.model.frame.Frame]. Returns ------- result: np.array returned [B, D]. """ inputs = [ input.array if isinstance(input, Frame) else input for input in inputs ] inputs = [self._vectorizer(input) for input in inputs] inputs, lengths = padding_sequence_nd(inputs, dim=0, return_len=True) if self._output_nodes: r = self._sess.run( self._output_nodes['logits'], feed_dict={ self._input_nodes['Placeholder']: inputs, self._input_nodes['Placeholder_1']: lengths, }, ) else: inputs = to_tf(self._eager_g.inputs, [inputs, lengths]) r = self._eager_g(**inputs)[0].numpy() return r
def predict(self, inputs): """ Enhance inputs, will return melspectrogram. Parameters ---------- inputs: List[np.array] Returns ------- result: List """ inputs = [ input.array if isinstance(input, Frame) else input for input in inputs ] mels = [featurization.scale_mel(s).T for s in inputs] x, lens = padding_sequence_nd(mels, maxlen=256, dim=0, return_len=True) if self._output_nodes: l = self._sess.run( self._output_nodes['logits'], feed_dict={self._input_nodes['Placeholder']: x}, ) else: inputs = to_tf(self._eager_g.inputs, [x]) l = self._eager_g(**inputs)[0].numpy() results = [] for index in range(len(x)): results.append( featurization.unscale_mel(x[index, :lens[index]].T + l[index, :lens[index], :, 0].T)) return results
def predict_proba(self, inputs): """ Predict inputs, will return probability. Parameters ---------- inputs: List[np.array] List[np.array] or List[malaya_speech.model.frame.Frame]. Returns ------- result: np.array returned [B, D]. """ inputs = [ input.array if isinstance(input, Frame) else input for input in inputs ] inputs = [self._vectorizer(input, **self._extra) for input in inputs] if self.__model__ == 'deep-speaker': dim = 0 else: dim = 1 inputs = padding_sequence_nd(inputs, dim=dim) inputs = np.expand_dims(inputs, -1) if self._output_nodes: r = self._sess.run( self._output_nodes['logits'], feed_dict={self._input_nodes['Placeholder']: inputs}, ) else: inputs = to_tf(self._eager_g.inputs, [inputs]) r = self._eager_g(**inputs)[0].numpy() return r