コード例 #1
0
def generate(hop_size=256):
    while True:
        shuffled = sklearn.utils.shuffle(files)
        for f in shuffled:
            audio, _ = malaya_speech.load(f, sr=sr)
            mel = malaya_speech.featurization.universal_mel(audio)

            batch_max_steps = random.randint(16384, 110_250)
            batch_max_frames = batch_max_steps // hop_size

            if len(mel) > batch_max_frames:
                interval_start = 0
                interval_end = len(mel) - batch_max_frames
                start_frame = random.randint(interval_start, interval_end)
                start_step = start_frame * hop_size
                audio = audio[start_step:start_step + batch_max_steps]
                mel = mel[start_frame:start_frame + batch_max_frames, :]

            v = speaker_model([audio])

            yield {
                'mel': mel,
                'mel_length': [len(mel)],
                'audio': audio,
                'v': v[0],
            }
コード例 #2
0
def signal_augmentation(wav):
    seed = random.randint(0, 100_000_000)
    wav = calc(wav, seed)
    if random.gauss(0.5, 0.14) > 0.6:
        n, _ = malaya_speech.load(random.choice(noises), sr = 16000)
        n = calc(n, seed, True)
        combined = augmentation.add_noise(
            wav, n, factor = random.uniform(0.05, 0.3)
        )
    else:
        combined = wav
    return combined.astype('float32')
コード例 #3
0
def read_wav(f):
    return malaya_speech.load(f, sr = sr)