def prepare_file2(): filepath = "./noise_raw/degradations/*.wav" print("Loading: ambient sounds") data, sr = load_and_concat(filepath) print("Extracting features: " + filepath) features = extract_features_melspec(data, sr) print(features.shape) np.save("./noise/ambient-sounds", features)
def prepare_file(): filepath = "./noise_raw/ambient-silence.wav" print("Loading: " + filepath) data, sr = librosa.load(filepath) print("Extracting features: " + filepath) features = extract_features_melspec(data, sr) print(features.shape) np.save("./noise/ambient-silence", features)
def prepare_librispeech(): libredir = "./voice_raw/librispeech/dev-clean/" speaker_count = 0 for speaker in os.listdir(libredir): speaker_dir = libredir + speaker + "/" if os.path.isdir(speaker_dir): concated, sr = load_and_concat(speaker_dir + "/**/*.flac") features = extract_features_melspec(concated, sr) write_to_disk(features, speaker_count) print("Extracted features - LIBRE - speaker: %i" % speaker_count) speaker_count += 1
def main(): audio_filename = "./samples/speech-test.wav" data, sr = librosa.load(audio_filename) print("DATA", data.shape, data.shape[0] / sr) features = extract_features_melspec(data, sr) plt.figure(figsize=(12, 4)) librosa.display.specshow(features, sr=sr, x_axis='time', y_axis='mel') plt.title('Mel-frekvenĨu spektrogramma') plt.colorbar(format='%+02.0f dB') plt.tight_layout() plt.show()
def prepare_timit(): speaker_count = 0 for dataset in datasets: dataset_dir = basepath + dataset + "/" for group in os.listdir(dataset_dir): group_dir = dataset_dir + group + "/" if os.path.isdir(group_dir): for speaker in os.listdir(group_dir): speaker_dir = group_dir + speaker + "/" if os.path.isdir(speaker_dir): concated, sr = load_and_concat(speaker_dir + "/*.WAV") with_degraded = degrade(concated, sr) features = extract_features_melspec(with_degraded, sr) write_to_disk(features, speaker_count) print("Extracted features - TIMIT - speaker: %i" % speaker_count) speaker_count += 1
def prepare_urbansounds(): features = [] files = glob.glob("./noise_raw/urbansounds/data/**/*.wav") for i, file in enumerate(files): print(str(i + 1) + " loading: " + file) try: data, _sr = librosa.load(file) except: continue sr = _sr f = extract_features_melspec(data, sr) features.append(f) if i > 0 and i % 100 == 0: np.save("./noise/vad_noise_" + str(i), flatten(features)) shuffle(features) features = flatten(features) print(features.shape) np.save("./noise/vad_noise", features)
from keras.models import load_model import matplotlib.pyplot as plt import datetime from utils import extract_features_melspec, extract_features_mfcc, flatten name = "speech" run = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") # audio_filename = "noise_raw/degradations/applause.wav" audio_filename = "./samples/speech-test.wav" data, sr = librosa.load(audio_filename) print("SAMPLE RATE", sr) print("DATA SHAPE", data.shape) features = extract_features_melspec(data, sr) np.save("./samples/" + name + "_" + run, features) print("FEATURES SHAPE", features.shape) model = load_model('models/vad2_2018-05-25_13-32/model_vad2.33.hdf5') timeseries_length = 100 hop_length = 25 length = 0 remainder = len(features) while remainder >= timeseries_length: length += 1 remainder -= hop_length x = np.ndarray((length, timeseries_length, features.shape[1]))
def prepare_ljspeech(): data, sr = load_and_concat("./voice_raw/ljspeech/wavs/LJ01*.wav") features = extract_features_melspec(data, sr) write_to_disk(features, 0)
import numpy as np from matplotlib import pyplot as plt import librosa from postprocess_utils import seg_metrics from utils import extract_features_melspec audio_filename = "./samples/seg-test16.wav" features_filename = "./samples/seg-test_features.npy" # predictions_filename = "samples/predictions_2018-05-24_17-48.npy" audio, sr = librosa.load(audio_filename, sr=16000) # predictions = np.load(predictions_filename) # features = np.load(features_filename) features = extract_features_melspec(audio, sr) print("AUDIO", audio.shape) # print("PREDICTIONS", predictions.shape) print("FEATURES", features.shape) timeseries_length = 100 hop_length = 25 # preds = deoverlap_predictions(predictions, features, hop_length) # norm_preds = defragment_vad(preds) # reference = [(6.42, 6.85), (13.49, 13.78)] reference = [(0, 6.42), (6.42, 13.49), (13.49, 20.43)] # lium = [(13.55, 13.67)] lium = [(0, 13.55), (13.55, 20.43)]