Example #1
0
    parser.add_argument('--data_location', type=str, default="/Users/quentin/Computer/DataSet/Music/speech_music_detection/",
                        help='the location of the data')

    parser.add_argument('--model', type=str, default="trained/model.hdf5",
                        help='path of the model to load when the starting is resumed')

    parser.add_argument('--mean_path', type=str, default="trained/mean.npy",
                        help='path of the mean of the normalization applied with the model')

    parser.add_argument('--std_path', type=str, default="trained/std.npy",
                        help='path of the std of the normalization applied with the model')

    args = parser.parse_args()

    experiments = utils.load_json('experiments.json')
    cfg = experiments[args.config]

    print("Creating the dataset..")

    datasets_config = utils.load_json("datasets.json")
    dataset = DatasetLoader(
        cfg["dataset"], args.data_location, datasets_config)

    print("Creating the data generator..")

    val_set = DataGenerator(dataset.get_val_set(),
                            cfg["batch_size"],
                            cfg["target_seq_length"],
                            validation_data_processing,
                            dataset.get_training_mean(),
    def __init__(self,
                 datasets,
                 dataset_folder,
                 datasets_config,
                 verify=False):
        self.cfg = datasets_config
        self.verify = verify

        self.train_set = {
            "mixed": [],
            "music": [],
            "speech": [],
            "noise": [],
            "n_frame": 0,
            "n_frame_mixed": 0,
            "n_frame_speech": 0,
            "n_frame_music": 0,
            "n_frame_noise": 0
        }

        self.val_set = {
            "mixed": [],
            "speech": [],
            "music": [],
            "noise": [],
            "n_frame": 0
        }

        self.test_set = {"mixed": [], "n_frame": 0}

        dataset_str = ""
        for dataset in datasets:
            dataset_str += "_" + dataset

        CACHED_MEAN_STD = False

        ns_val, ns_test, ns, ns_mixed, ns_speech, ns_music, ns_noise = [], [], [], [], [], [], []

        if os.path.isfile("checkpoint/mean" + dataset_str +
                          ".npy") and os.path.isfile("checkpoint/std" +
                                                     dataset_str + ".npy"):
            CACHED_MEAN_STD = True
            self.train_mean = np.load("checkpoint/mean" + dataset_str + ".npy")
            self.train_std = np.load("checkpoint/std" + dataset_str + ".npy")
        else:
            ms = []
            vs = []

        for dataset in datasets:
            suffix = '_' + str(config.SAMPLING_RATE) + '_' + str(
                config.AUDIO_MAX_LENGTH) + '_' + str(config.N_MELS)
            filelist_path = os.path.join(
                dataset_folder, self.cfg[dataset]["filelists_folder"] + suffix)
            data_path = os.path.join(dataset_folder,
                                     self.cfg[dataset]["data_folder"] + suffix)

            if not (os.path.exists(filelist_path)) or not (
                    os.path.exists(data_path)):
                raise ValueError(
                    "The datatset " + dataset +
                    " is unfound or has not been preprocessed for the chosen hyper-parameters."
                )

            files = glob.glob(filelist_path + "/*")

            for file in files:
                if "mixed_train" in file:
                    self.load_list(file, "mixed", self.train_set, data_path)
                elif "music_train" in file:
                    self.load_list(file, "music", self.train_set, data_path)
                elif "speech_train" in file:
                    self.load_list(file, "speech", self.train_set, data_path)
                elif "noise_train" in file:
                    self.load_list(file, "noise", self.train_set, data_path)
                elif "noise_val" in file:
                    self.load_list(file, "noise", self.val_set, data_path)
                elif "mixed_val" in file:
                    self.load_list(file, "mixed", self.val_set, data_path)
                elif "speech_val" in file:
                    self.load_list(file, "speech", self.val_set, data_path)
                elif "music_val" in file:
                    self.load_list(file, "music", self.val_set, data_path)
                elif "mixed_test" in file:
                    self.load_list(file, "mixed", self.test_set, data_path)
                elif "info.json" in file:
                    data = utils.load_json(file)
                    ns.append(data["N_FRAME_TRAIN"])
                    ns_val.append(data["N_FRAME_VAL"])
                    ns_test.append(data["N_FRAME_TEST"])
                    ns_mixed.append(data["N_FRAME_TRAIN_MIXED"])
                    ns_speech.append(data["N_FRAME_TRAIN_SPEECH"])
                    ns_music.append(data["N_FRAME_TRAIN_MUSIC"])
                    ns_noise.append(data["N_FRAME_TRAIN_NOISE"])
                elif "mean.npy" in file and not (CACHED_MEAN_STD):
                    ms.append(np.load(file))
                elif "var.npy" in file and not (CACHED_MEAN_STD):
                    vs.append(np.load(file))

        if not (CACHED_MEAN_STD):
            self.train_mean = self.combine_means(ms, ns)
            self.train_std = np.sqrt(
                self.combine_var(vs, ns, ms, self.train_mean))
            np.save("checkpoint/mean" + dataset_str + ".npy", self.train_mean)
            np.save("checkpoint/std" + dataset_str + ".npy", self.train_std)

        self.train_set["n_frame"] = np.sum(ns)
        self.val_set["n_frame"] = np.sum(ns_val)
        self.test_set["n_frame"] = np.sum(ns_test)
        self.train_set["n_frame_mixed"] = np.sum(ns_mixed)
        self.train_set["n_frame_speech"] = np.sum(ns_speech)
        self.train_set["n_frame_music"] = np.sum(ns_music)
        self.train_set["n_frame_noise"] = np.sum(ns_noise)
Example #3
0
def data():
    n_eval = 0
    cfg = {
        "dataset": ["ofai", "muspeak", "esc-50"],
        "data_location":
        "/Users/quentin/Computer/DataSet/Music/speech_music_detection/",
        "target_seq_length": 270,
        "batch_size": 16
    }

    def training_data_processing(spec_file,
                                 annotation_file,
                                 mean,
                                 std,
                                 spec_file2=None,
                                 annotation_file2=None):
        spec = np.load(spec_file)
        spec, stretching_rate = pitch_time_deformation_spec(spec)
        spec = random_filter_spec(spec)
        spec = random_loudness_spec(spec)
        label = preprocessing.get_label(annotation_file,
                                        spec.shape[1],
                                        stretching_rate=stretching_rate)

        if not (spec_file2 is None):
            spec2 = np.load(spec_file2)
            spec2, stretching_rate2 = pitch_time_deformation_spec(spec2)
            spec2 = random_filter_spec(spec2)
            spec2 = random_loudness_spec(spec2)
            label2 = preprocessing.get_label(annotation_file2,
                                             spec2.shape[1],
                                             stretching_rate=stretching_rate2)
            spec, label = block_mixing_spec(spec, spec2, label, label2)

        mels = preprocessing.get_scaled_mel_bands(spec)
        mels = preprocessing.normalize(mels, mean, std)
        return mels, label

    def validation_data_processing(spec_file, annotation_file, mean, std):
        spec = np.load(spec_file)

        mels = preprocessing.get_scaled_mel_bands(spec)
        mels = preprocessing.normalize(mels, mean, std)
        n_frame = mels.shape[1]
        label = preprocessing.get_label(annotation_file,
                                        n_frame,
                                        stretching_rate=1)
        return mels, label

    datasets_config = utils.load_json("datasets.json")
    dataset = DatasetLoader(cfg["dataset"], cfg["data_location"],
                            datasets_config)

    train_set = DataGenerator(dataset.get_train_set(),
                              cfg["batch_size"],
                              cfg["target_seq_length"],
                              training_data_processing,
                              dataset.get_training_mean(),
                              dataset.get_training_std(),
                              set_type="train")

    val_set = DataGenerator(dataset.get_val_set(),
                            cfg["batch_size"],
                            cfg["target_seq_length"],
                            validation_data_processing,
                            dataset.get_training_mean(),
                            dataset.get_training_std(),
                            set_type="val")
    return train_set, val_set
Example #4
0
def resample_dataset(dataset_folder, dataset):
    cfg = utils.load_json('../datasets.json')

    DATA_PATH = os.path.join(dataset_folder, cfg[dataset]["data_folder"])
    NEW_DATA_PATH = DATA_PATH + "_" + \
        str(audio_config.SAMPLING_RATE) + "_" + \
        str(audio_config.AUDIO_MAX_LENGTH) + "_" + \
        str(audio_config.N_MELS)

    FILELISTS_FOLDER = os.path.join(dataset_folder,
                                    cfg[dataset]["filelists_folder"])
    NEW_FILELISTS_FOLDER = FILELISTS_FOLDER + "_" + \
        str(audio_config.SAMPLING_RATE) + "_" + \
        str(audio_config.AUDIO_MAX_LENGTH) + "_" + \
        str(audio_config.N_MELS)

    if os.path.isdir(NEW_DATA_PATH):
        raise ValueError(NEW_DATA_PATH + " already exists.")
    else:
        os.makedirs(NEW_DATA_PATH)
        print("Output folder created: " + NEW_DATA_PATH)

    if os.path.isdir(NEW_FILELISTS_FOLDER):
        raise ValueError(NEW_FILELISTS_FOLDER + " already exists.")
        None
    else:
        os.makedirs(NEW_FILELISTS_FOLDER)
        print("Output folder created: " + NEW_FILELISTS_FOLDER)

    audio_files = glob.glob(DATA_PATH + "/*.WAV")
    audio_files += glob.glob(DATA_PATH + "/*.wav")
    audio_files += glob.glob(DATA_PATH + "/*.mp3")

    new_audio_files = []

    filelists = utils.read_filelists(FILELISTS_FOLDER)

    n, n_mixed, n_music, n_speech, n_noise, n_val, n_test, n_tot = 0, 0, 0, 0, 0, 0, 0, 0
    mean = np.zeros(audio_config.N_MELS)
    var = np.zeros(audio_config.N_MELS)

    for file in tqdm(audio_files):
        basename = os.path.basename(file)
        new_file = os.path.join(NEW_DATA_PATH, basename).replace(
            os.path.splitext(file)[1], '.wav')
        new_files = run_sox(file, new_file, audio_config.SAMPLING_RATE,
                            audio_config.AUDIO_MAX_LENGTH)
        new_audio_files += new_files

        for new_file in new_files:
            for key in filelists.keys():
                if basename in filelists[key]:
                    audio_type, set_type = key.split('_')

                    if set_type == 'train':
                        length, bands = savespec_and_get_bands(new_file)
                        if length > 0:
                            n += length
                            n_tot += length
                            delta1 = bands - mean[:, None]
                            mean += np.sum(delta1, axis=1) / n
                            delta2 = bands - mean[:, None]
                            var += np.sum(delta1 * delta2, axis=1)
                            if audio_type == "mixed":
                                n_mixed += length
                            elif audio_type == "speech":
                                n_speech += length
                            elif audio_type == "music":
                                n_music += length
                            elif audio_type == "noise":
                                n_noise += length
                    elif set_type == 'val':
                        length = savespec(new_file)
                        if length > 0:
                            n_tot += length
                            n_val += length
                    elif set_type == 'test':
                        length = savespec(new_file)
                        if length > 0:
                            n_tot += length
                            n_test += length

                    if length > 0:
                        with open(os.path.join(NEW_FILELISTS_FOLDER, key),
                                  'a') as f:
                            f.write(
                                os.path.basename(new_file).replace(".wav", '')
                                + '\t' + str(length) + '\n')

    var /= (n - 1)

    infos = {
        "N_FRAME_TOT": n_tot,
        "N_FRAME_TRAIN": n,
        "N_FRAME_VAL": n_val,
        "N_FRAME_TEST": n_test,
        "N_FRAME_TRAIN_MIXED": n_mixed,
        "N_FRAME_TRAIN_SPEECH": n_speech,
        "N_FRAME_TRAIN_MUSIC": n_music,
        "N_FRAME_TRAIN_NOISE": n_noise,
        "N_MELS": audio_config.N_MELS,
        "SAMPLING_RATE": audio_config.SAMPLING_RATE,
        "FFT_WINDOW_SIZE": audio_config.FFT_WINDOW_SIZE,
        "HOP_LENGTH": audio_config.HOP_LENGTH,
        "F_MIN": audio_config.F_MIN,
        "F_MAX": audio_config.F_MAX,
        "AUDIO_MAX_LENGTH": audio_config.AUDIO_MAX_LENGTH
    }
    np.save(os.path.join(NEW_FILELISTS_FOLDER, "mean.npy"), mean)
    np.save(os.path.join(NEW_FILELISTS_FOLDER, "var.npy"), var)
    with open(os.path.join(NEW_FILELISTS_FOLDER, "info.json"), 'w') as f:
        json.dump(infos, f)