コード例 #1
0
def _parallel_feature_extraction(wav_paths,
                                 label_paths,
                                 out_path,
                                 feat_settings,
                                 num_threads=3):
    label_path_mapping = _gen_wav_label_path_mapping(label_paths)
    iters = enumerate(
        parallel_generator(_all_in_one_extract,
                           wav_paths,
                           max_workers=num_threads,
                           use_thread=True,
                           chunk_size=num_threads,
                           label_path_mapping=label_path_mapping,
                           feat_settings=feat_settings))
    for idx, ((patch_cqt, m_beat_arr, label_128, label_13),
              audio_idx) in iters:
        audio = wav_paths[audio_idx]
        # print(f"Progress: {idx+1}/{len(wav_paths)} - {audio}" + " "*6, end="\r")  # noqa: E226
        logger.info("Progress: %d/%d - %s", idx + 1, len(wav_paths),
                    audio)  # noqa: E226

        basename = os.path.basename(audio)
        filename, _ = os.path.splitext(basename)
        out_hdf = jpath(out_path, filename + ".hdf")

        saved = False
        retry_times = 5
        for retry in range(retry_times):
            if saved:
                break
            try:
                with h5py.File(out_hdf, "w") as out_f:
                    out_f.create_dataset("feature",
                                         data=patch_cqt,
                                         compression="gzip",
                                         compression_opts=3)
                    out_f.create_dataset("label",
                                         data=label_13,
                                         compression="gzip",
                                         compression_opts=3)
                    out_f.create_dataset("label_128",
                                         data=label_128,
                                         compression="gzip",
                                         compression_opts=3)
                    out_f.create_dataset("cqt_mini_beat_arr",
                                         data=m_beat_arr,
                                         compression="gzip",
                                         compression_opts=3)
                    saved = True
            except OSError as exp:
                logger.warning(
                    "OSError occurred, retrying %d times. Reason: %s",
                    retry + 1, str(exp))
                time.sleep(0.5 * 2**retry)
        if not saved:
            logger.error(
                "H5py failed to save the feature file after %d retries.",
                retry_times)
            raise OSError
    print("")
コード例 #2
0
def parallel_extract(x, samples, max_sample, fr, fs, Hop, h, fc, tc, g,
                     bin_per_octave):
    freq_width = max_sample * Hop
    iters = np.ceil(samples / max_sample).astype("int")
    tmpL0, tmpLF, tmpLQ, tmpZ = {}, {}, {}, {}

    slice_list = [x[i * freq_width:(i + 1) * freq_width] for i in range(iters)]

    feat_generator = enumerate(
        parallel_generator(cfp_filterbank,
                           slice_list,
                           fr=fr,
                           fs=fs,
                           Hop=Hop,
                           h=h,
                           fc=fc,
                           tc=tc,
                           g=g,
                           bin_per_octave=bin_per_octave,
                           max_workers=3))
    for idx, (feat_list, slice_idx) in feat_generator:
        logger.debug("Slice feature extracted: %d/%d", idx + 1,
                     len(slice_list))
        tfrL0, tfrLF, tfrLQ, f, q, t, cen_freq = feat_list
        tmpL0[slice_idx] = tfrL0
        tmpLF[slice_idx] = tfrLF
        tmpLQ[slice_idx] = tfrLQ
        tmpZ[slice_idx] = tfrLF * tfrLQ
    return tmpL0, tmpLF, tmpLQ, tmpZ, f, q, t, cen_freq
コード例 #3
0
def _parallel_feature_extraction(audio_list,
                                 out_path,
                                 feat_settings,
                                 num_threads=4):
    feat_extract_params = {
        "hop": feat_settings.hop_size,
        "win_size": feat_settings.window_size,
        "fr": feat_settings.frequency_resolution,
        "fc": feat_settings.frequency_center,
        "tc": feat_settings.time_center,
        "g": feat_settings.gamma,
        "bin_per_octave": feat_settings.bins_per_octave,
        "harmonic_num": feat_settings.harmonic_number
    }

    iters = enumerate(
        parallel_generator(extract_cfp_feature,
                           audio_list,
                           max_workers=num_threads,
                           use_thread=True,
                           chunk_size=num_threads,
                           harmonic=feat_settings.harmonic,
                           **feat_extract_params))
    for idx, (feature, audio_idx) in iters:
        audio = audio_list[audio_idx]
        # logger.info("Progress: %s/%s - %s", idx+1, len(audio_list), audio)
        print(f"Progress: {idx+1}/{len(audio_list)} - {audio}" + " " * 6,
              end="\r")  # noqa: E226

        basename = os.path.basename(audio)
        filename, _ = os.path.splitext(basename)
        out_hdf = jpath(out_path, filename + ".hdf")

        saved = False
        retry_times = 5
        for retry in range(retry_times):
            if saved:
                break
            try:
                with h5py.File(out_hdf, "w") as out_f:
                    out_f.create_dataset("feature", data=feature)
                    saved = True
            except OSError as exp:
                logger.warning(
                    "OSError occurred, retrying %d times. Reason: %s",
                    retry + 1, str(exp))
        if not saved:
            logger.error(
                "H5py failed to save the feature file after %d retries.",
                retry_times)
            raise OSError
    print("")
コード例 #4
0
def _parallel_feature_extraction(data_pair, out_path, num_threads=4):
    iters = enumerate(
        parallel_generator(_extract_feature_arg_wrapper,
                           data_pair,
                           max_workers=num_threads,
                           chunk_size=num_threads))
    for idx, ((feature), feat_idx) in iters:
        f_name = os.path.dirname(data_pair[feat_idx][0])

        # logger.info("Progress: %d/%d - %s", idx + 1, len(data_pair), f_name)
        print(f"Progress: {idx+1}/{len(data_pair)} - {f_name}", end="\r")
        out_hdf = jpath(out_path, os.path.basename(f_name) + ".hdf")
        _write_feature(feature, out_path=out_hdf)
コード例 #5
0
ファイル: app.py プロジェクト: ykhorzon/omnizart
def _parallel_feature_extraction(data_pair,
                                 out_path,
                                 label_extractor,
                                 feat_settings,
                                 num_threads=4):
    feat_extract_params = {
        "hop": feat_settings.hop_size,
        "down_fs": feat_settings.sampling_rate,
        "win_size": feat_settings.window_size
    }

    iters = enumerate(
        parallel_generator(_all_in_one_extract,
                           data_pair,
                           max_workers=num_threads,
                           use_thread=True,
                           chunk_size=num_threads,
                           label_extractor=label_extractor,
                           t_unit=feat_settings.hop_size,
                           **feat_extract_params))

    for idx, ((feature, label), audio_idx) in iters:
        audio = data_pair[audio_idx][0]

        print(f"Progress: {idx+1}/{len(data_pair)} - {audio}" + " " * 6,
              end="\r")  # noqa: E226

        filename, _ = os.path.splitext(os.path.basename(audio))
        out_hdf = jpath(out_path, filename + ".hdf")
        saved = False
        retry_times = 5
        for retry in range(retry_times):
            if saved:
                break
            try:
                with h5py.File(out_hdf, "w") as out_f:
                    out_f.create_dataset("feature", data=feature)
                    out_f.create_dataset("label", data=label)
                    saved = True
            except OSError as exp:
                logger.warning(
                    "OSError occurred, retrying %d times. Reason: %s",
                    retry + 1, str(exp))
        if not saved:
            logger.error(
                "H5py failed to save the feature file after %d retries.",
                retry_times)
            raise OSError
    print("")
コード例 #6
0
def _parallel_feature_extraction(data_pair,
                                 label_extractor,
                                 out_path,
                                 feat_settings,
                                 num_threads=4):
    feat_extract_params = {
        "hop": feat_settings.hop_size,
        "fr": feat_settings.frequency_resolution,
        "fc": feat_settings.frequency_center,
        "tc": feat_settings.time_center,
        "g": feat_settings.gamma,
        "bin_per_octave": feat_settings.bins_per_octave
    }

    iters = enumerate(
        parallel_generator(_all_in_one_extract,
                           data_pair,
                           max_workers=num_threads,
                           chunk_size=num_threads,
                           label_extractor=label_extractor,
                           t_unit=feat_settings.hop_size,
                           **feat_extract_params))
    for idx, ((feature, label), audio_idx) in iters:
        audio = data_pair[audio_idx][0]
        logger.info("Progress: %s/%s - %s", idx + 1, len(data_pair), audio)
        # print(f"Progress: {idx+1}/{len(data_pair)} - {audio}" + " "*6, end="\r")  # noqa: E226

        # Trim to the same length
        max_len = min(len(feature), len(label))
        feature = feature[:max_len]
        label = label[:max_len]

        basename = os.path.basename(audio)
        filename, _ = os.path.splitext(basename)
        out_hdf = jpath(out_path, filename + ".hdf")
        with h5py.File(out_hdf, "w") as out_f:
            out_f.create_dataset("feature",
                                 data=feature,
                                 compression="gzip",
                                 compression_opts=3)
            out_f.create_dataset("label",
                                 data=label,
                                 compression="gzip",
                                 compression_opts=3)
コード例 #7
0
ファイル: app.py プロジェクト: ykhorzon/omnizart
def _parallel_feature_extraction(data_pair_list,
                                 out_path,
                                 feat_settings,
                                 num_threads=4):
    feat_params = {
        "patch_size": feat_settings.patch_size,
        "threshold": feat_settings.peak_threshold,
        "down_fs": feat_settings.sampling_rate,
        "hop": feat_settings.hop_size,
        "win_size": feat_settings.window_size,
        "fr": feat_settings.frequency_resolution,
        "fc": feat_settings.frequency_center,
        "tc": feat_settings.time_center,
        "g": feat_settings.gamma,
        "bin_per_octave": feat_settings.bins_per_octave,
    }

    iters = enumerate(
        parallel_generator(_all_in_one_extract,
                           data_pair_list,
                           max_workers=num_threads,
                           use_thread=True,
                           chunk_size=num_threads,
                           **feat_params))
    for idx, ((feat, mapping, zzz, label), audio_idx) in iters:
        audio = data_pair_list[audio_idx][0]

        # logger.info("Progress: %s/%s - %s", idx+1, len(data_pair_list), audio)
        print(f"Progress: {idx + 1}/{len(data_pair_list)} - {audio}", end="\r")

        filename = get_filename(audio)
        out_hdf = jpath(out_path, filename + ".hdf")
        with h5py.File(out_hdf, "w") as out_f:
            out_f.create_dataset("feature", data=feat)
            out_f.create_dataset("mapping", data=mapping)
            out_f.create_dataset("Z", data=zzz)
            out_f.create_dataset("label", data=label)
    print("")
コード例 #8
0
def _parallel_feature_extraction(feat_list, out_path, feat_settings, num_threads=4):
    iters = enumerate(
        parallel_generator(
            _all_in_one_extract,
            feat_list,
            max_workers=num_threads,
            chunk_size=num_threads,
            t_unit=feat_settings.time_unit
        )
    )

    for idx, ((feature, beat_arr, down_beat_arr), feat_idx) in iters:
        feat = feat_list[feat_idx]

        print(f"Progress: {idx+1}/{len(feat_list)} - {feat}" + " "*6, end="\r")  # noqa: E226
        # logger.info("Progress: %s/%s - %s", idx+1, len(feat_list), feat)

        filename, _ = os.path.splitext(os.path.basename(feat))
        out_hdf = jpath(out_path, filename + ".hdf")
        with h5py.File(out_hdf, "w") as out_f:
            out_f.create_dataset("feature", data=feature)
            out_f.create_dataset("beat", data=beat_arr)
            out_f.create_dataset("down_beat", data=down_beat_arr)
    print("")