import datetime import h5py import librosa import numpy as np import os import sys import time import localmodule # Define constants. data_dir = localmodule.get_data_dir() dataset_name = localmodule.get_dataset_name() sample_rate = localmodule.get_sample_rate() args = sys.argv[1:] aug_str = args[0] instance_id = int(args[1]) instance_str = str(instance_id) unit_str = args[2] if aug_str == "original": instanced_aug_str = aug_str else: instanced_aug_str = "-".join([aug_str, instance_str]) pcen_settings = localmodule.get_pcen_settings() # Print header. start_time = int(time.time()) print(str(datetime.datetime.now()) + " Start.") print("Computing per-channel energy normalization (PCEN) for " +\ dataset_name + " clips, with domain-specific librosa parameters.") print("Unit: " + unit_str + ".")
def multiplex_lms_with_background(aug_kind_str, fold_units, n_input_hops, batch_size): # Define constants. aug_dict = localmodule.get_augmentations() data_dir = localmodule.get_data_dir() dataset_name = localmodule.get_dataset_name() tfr_name = "_".join([dataset_name, "clip-logmelspec"]) tfr_dir = os.path.join(data_dir, tfr_name) bg_name = "_".join([dataset_name, "clip-logmelspec-backgrounds"]) bg_dir = os.path.join(data_dir, bg_name) T_str = "T-" + str(bg_duration).zfill(4) T_dir = os.path.join(bg_dir, T_str) # Parse augmentation kind string (aug_kind_str). if aug_kind_str == "none": augs = ["original"] elif aug_kind_str == "pitch": augs = ["original", "pitch"] elif aug_kind_str == "stretch": augs = ["original", "stretch"] elif aug_kind_str == "all-but-noise": augs = ["original", "pitch", "stretch"] else: noise_augs = ["noise-" + unit_str for unit_str in fold_units] if aug_kind_str == "all": augs = noise_augs + ["original", "pitch", "stretch"] elif aug_kind_str == "noise": augs = noise_augs + ["original"] # Loop over augmentations. streams = [] for aug_str in augs: # Define instances. aug_dir = os.path.join(tfr_dir, aug_str) if aug_str == "original": instances = [aug_str] else: n_instances = aug_dict[aug_str] instances = [ "-".join([aug_str, str(instance_id)]) for instance_id in range(n_instances) ] # Define bias. if aug_str[:5] == "noise": bias = np.float32(-17.0) else: bias = np.float32(0.0) # Loop over instances. for instanced_aug_str in instances: # Loop over units. for unit_str in fold_units: # Define path to time-frequency representation. lms_name = "_".join( [dataset_name, instanced_aug_str, unit_str]) lms_path = os.path.join(aug_dir, lms_name + ".hdf5") # Define path to background. bg_name = "_".join([ dataset_name, "background_summaries", unit_str, T_str + ".hdf5" ]) bg_path = os.path.join(T_dir, bg_name) # Define pescador streamer. stream = pescador.Streamer(yield_lms_and_background, lms_path, n_input_hops, bias, bg_path) streams.append(stream) # Multiplex streamers together. mux = pescador.Mux(streams, k=len(streams), lam=None, with_replacement=True, revive=True) # Create buffered streamer with specified batch size. buffered_streamer = pescador.BufferedStreamer(mux, batch_size) return pescador.maps.keras_tuples(buffered_streamer, inputs=["X_spec", "X_bg"], outputs=["y"])