Ejemplo n.º 1
0
    def _get_wav_and_melspec(wav_file, length, is_training=True):
        '''
        the range of values of wav is [-1, 1].
        '''

        wav = read_wav(wav_file, sr=hp.signal.sr)
        wav = trim_wav(wav)
        # divide wav into chunks that have the given length and one is randomly selected in training, but first chunk in generation.
        n_clips = math.ceil(len(wav) / length) if is_training else 1
        idx = random.randrange(n_clips)
        start, end = length * idx, length * (idx + 1)
        wav = wav[start:end]
        assert (len(wav) <= length)
        wav = fix_length(wav, length)  # padding in case of last chunk.

        melspec = wav2melspec_db(wav,
                                 sr=hp.signal.sr,
                                 n_fft=hp.signal.n_fft,
                                 win_length=hp.signal.win_length,
                                 hop_length=hp.signal.hop_length,
                                 n_mels=hp.signal.n_mels,
                                 min_db=hp.signal.min_db,
                                 max_db=hp.signal.max_db)
        wav = np.expand_dims(wav, -1)
        return wav, melspec.astype(np.float32)
Ejemplo n.º 2
0
def do_inference(num_tests, concurrency=1):
    channel = implementations.insecure_channel(host, int(port))
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)

    coord = _Coordinator(num_tests, concurrency)

    for _ in range(num_tests):
        # dummy audio
        duration, sr, n_fft, win_length, hop_length, n_mels, max_db, min_db = 4, 16000, 512, 512, 128, 80, 35, -55
        filename = librosa.util.example_audio_file()
        wav = read_wav(filename, sr=sr, duration=duration)
        mel = wav2melspec_db(wav, sr, n_fft, win_length, hop_length, n_mels)
        mel = normalize_db(mel, max_db=max_db, min_db=min_db)
        mel = mel.astype(np.float32)
        mel = np.expand_dims(mel, axis=0)  # single batch
        n_timesteps = sr / hop_length * duration + 1

        # build request
        request = predict_pb2.PredictRequest()
        request.model_spec.name = 'voice_vector'
        request.model_spec.signature_name = 'predict'
        request.inputs['x'].CopyFrom(
            tf.contrib.util.make_tensor_proto(mel,
                                              shape=[1, n_timesteps, n_mels]))

        coord.throttle()

        # send asynchronous response (recommended. use this.)
        result_future = stub.Predict.future(request, 10.0)  # timeout
        result_future.add_done_callback(_create_rpc_callback(coord))

        # send synchronous response (NOT recommended)
        # result = stub.Predict(request, 5.0)

    coord.wait_all_done()
Ejemplo n.º 3
0
    def get_random_wav_and_label(self, tar_wavfiles, ntar_wavfiles):
        """

        :return: wav: raw wave. float32. shape=(t, ),
                 label: 1 if target, 0 otherwise. int32.
                 melspec: mel-spectrogram. float32. shape=(t, n_mels)
        """
        wavfiles, label = (
            tar_wavfiles,
            self.tar_labels) if np.random.sample(1) <= self.tar_ratio else (
                ntar_wavfiles, self.ntar_labels)
        wavfile = wavfiles[np.random.randint(0, len(wavfiles))]
        if type(wavfile) == bytes:
            wavfile = wavfile.decode()
        if wavfile.endswith('arr'):  # pyarrow format
            wav = read_wav_from_arr(wavfile)
        else:
            wav = read_wav(wavfile, sr=hp.signal.sr)
        wav = trim_wav(wav)

        wav = crop_random_wav(wav, self.length)
        wav = augment_volume(wav)
        wav = fix_length(wav, self.length)  # padding
        melspec = wav2melspec_db(wav,
                                 sr=hp.signal.sr,
                                 n_fft=hp.signal.n_fft,
                                 win_length=hp.signal.win_length,
                                 hop_length=hp.signal.hop_length,
                                 n_mels=hp.signal.n_mels,
                                 min_db=hp.signal.min_db,
                                 max_db=hp.signal.max_db)
        melspec = np.float32(melspec)
        label = np.float32(label)
        return wav, melspec, label
Ejemplo n.º 4
0
 def get_data(self):
     while True:
         speaker_id = random.choice(list(self.speaker_dict.keys()))
         wav = self._load_random_wav(speaker_id)
         mel_spec = wav2melspec_db(wav, hp.signal.sr, hp.signal.n_fft,
                                   hp.signal.win_length,
                                   hp.signal.hop_length, hp.signal.n_mels)
         mel_spec = normalize_db(mel_spec,
                                 max_db=hp.signal.max_db,
                                 min_db=hp.signal.min_db)
         yield wav, mel_spec, speaker_id
Ejemplo n.º 5
0
 def get_random_wav(self, wavfile):
     """
     :param: wavfile: a raw wave file.
     :return: wav: raw wave. float32. shape=(t, ),
              melspec: mel-spectrogram. float32. shape=(t, n_mels),
              wavfile: the raw wave file.
     """
     wav = read_wav(wavfile, sr=hp.signal.sr)
     wav = trim_wav(wav)
     wav = fix_length(wav, self.length)  # crop from the beginning.
     melspec = wav2melspec_db(wav,
                              sr=hp.signal.sr,
                              n_fft=hp.signal.n_fft,
                              win_length=hp.signal.win_length,
                              hop_length=hp.signal.hop_length,
                              n_mels=hp.signal.n_mels,
                              min_db=hp.signal.min_db,
                              max_db=hp.signal.max_db)
     melspec = np.float32(melspec)
     return wav, melspec, wavfile
    def _get_wav_and_melspec(wav_file, length=None, is_training=True):
        wav = read_wav(wav_file, sr=hp.signal.sr)
        wav = trim_wav(wav)
        if length:
            n_clips = math.ceil(len(wav) / length) if is_training else 1
            idx = random.randrange(n_clips)
            start, end = length * idx, length * (idx + 1)
            wav = wav[start:end]
            assert (len(wav) <= length)
            wav = fix_length(wav, length)  # padding

        melspec = wav2melspec_db(wav,
                                 sr=hp.signal.sr,
                                 n_fft=hp.signal.n_fft,
                                 win_length=hp.signal.win_length,
                                 hop_length=hp.signal.hop_length,
                                 n_mels=hp.signal.n_mels,
                                 min_db=hp.signal.min_db,
                                 max_db=hp.signal.max_db)
        wav = np.expand_dims(wav, -1)
        return wav, melspec.astype(np.float32)