コード例 #1
0
def prepare_file2():
    filepath = "./noise_raw/degradations/*.wav"
    print("Loading: ambient sounds")
    data, sr = load_and_concat(filepath)
    print("Extracting features: " + filepath)
    features = extract_features_melspec(data, sr)
    print(features.shape)
    np.save("./noise/ambient-sounds", features)
コード例 #2
0
def prepare_file():
    filepath = "./noise_raw/ambient-silence.wav"
    print("Loading: " + filepath)
    data, sr = librosa.load(filepath)
    print("Extracting features: " + filepath)
    features = extract_features_melspec(data, sr)
    print(features.shape)
    np.save("./noise/ambient-silence", features)
コード例 #3
0
def prepare_librispeech():
    libredir = "./voice_raw/librispeech/dev-clean/"
    speaker_count = 0
    for speaker in os.listdir(libredir):
        speaker_dir = libredir + speaker + "/"
        if os.path.isdir(speaker_dir):
            concated, sr = load_and_concat(speaker_dir + "/**/*.flac")
            features = extract_features_melspec(concated, sr)
            write_to_disk(features, speaker_count)
            print("Extracted features - LIBRE - speaker: %i" % speaker_count)
            speaker_count += 1
コード例 #4
0
def main():
    audio_filename = "./samples/speech-test.wav"

    data, sr = librosa.load(audio_filename)

    print("DATA", data.shape, data.shape[0] / sr)

    features = extract_features_melspec(data, sr)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(features, sr=sr, x_axis='time', y_axis='mel')
    plt.title('Mel-frekvenču spektrogramma')
    plt.colorbar(format='%+02.0f dB')
    plt.tight_layout()
    plt.show()
コード例 #5
0
def prepare_timit():
    speaker_count = 0
    for dataset in datasets:
        dataset_dir = basepath + dataset + "/"
        for group in os.listdir(dataset_dir):
            group_dir = dataset_dir + group + "/"
            if os.path.isdir(group_dir):
                for speaker in os.listdir(group_dir):
                    speaker_dir = group_dir + speaker + "/"
                    if os.path.isdir(speaker_dir):
                        concated, sr = load_and_concat(speaker_dir + "/*.WAV")
                        with_degraded = degrade(concated, sr)
                        features = extract_features_melspec(with_degraded, sr)
                        write_to_disk(features, speaker_count)
                        print("Extracted features - TIMIT - speaker: %i" %
                              speaker_count)
                        speaker_count += 1
コード例 #6
0
def prepare_urbansounds():
    features = []
    files = glob.glob("./noise_raw/urbansounds/data/**/*.wav")
    for i, file in enumerate(files):
        print(str(i + 1) + " loading: " + file)
        try:
            data, _sr = librosa.load(file)
        except:
            continue
        sr = _sr
        f = extract_features_melspec(data, sr)
        features.append(f)
        if i > 0 and i % 100 == 0:
            np.save("./noise/vad_noise_" + str(i), flatten(features))
    shuffle(features)
    features = flatten(features)
    print(features.shape)
    np.save("./noise/vad_noise", features)
コード例 #7
0
from keras.models import load_model
import matplotlib.pyplot as plt
import datetime

from utils import extract_features_melspec, extract_features_mfcc, flatten

name = "speech"
run = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")

# audio_filename = "noise_raw/degradations/applause.wav"
audio_filename = "./samples/speech-test.wav"

data, sr = librosa.load(audio_filename)
print("SAMPLE RATE", sr)
print("DATA SHAPE", data.shape)
features = extract_features_melspec(data, sr)
np.save("./samples/" + name + "_" + run, features)
print("FEATURES SHAPE", features.shape)

model = load_model('models/vad2_2018-05-25_13-32/model_vad2.33.hdf5')

timeseries_length = 100
hop_length = 25

length = 0
remainder = len(features)
while remainder >= timeseries_length:
    length += 1
    remainder -= hop_length

x = np.ndarray((length, timeseries_length, features.shape[1]))
コード例 #8
0
def prepare_ljspeech():
    data, sr = load_and_concat("./voice_raw/ljspeech/wavs/LJ01*.wav")
    features = extract_features_melspec(data, sr)
    write_to_disk(features, 0)
コード例 #9
0
ファイル: test_seg.py プロジェクト: dmednis/speaker-segmenter
import numpy as np
from matplotlib import pyplot as plt
import librosa

from postprocess_utils import seg_metrics
from utils import extract_features_melspec

audio_filename = "./samples/seg-test16.wav"
features_filename = "./samples/seg-test_features.npy"
# predictions_filename = "samples/predictions_2018-05-24_17-48.npy"

audio, sr = librosa.load(audio_filename, sr=16000)
# predictions = np.load(predictions_filename)
# features = np.load(features_filename)
features = extract_features_melspec(audio, sr)

print("AUDIO", audio.shape)
# print("PREDICTIONS", predictions.shape)
print("FEATURES", features.shape)

timeseries_length = 100
hop_length = 25

# preds = deoverlap_predictions(predictions, features, hop_length)
# norm_preds = defragment_vad(preds)

# reference = [(6.42, 6.85), (13.49, 13.78)]
reference = [(0, 6.42), (6.42, 13.49), (13.49, 20.43)]

# lium = [(13.55, 13.67)]
lium = [(0, 13.55), (13.55, 20.43)]