def _parallel_feature_extraction(wav_paths, label_paths, out_path, feat_settings, num_threads=3): label_path_mapping = _gen_wav_label_path_mapping(label_paths) iters = enumerate( parallel_generator(_all_in_one_extract, wav_paths, max_workers=num_threads, use_thread=True, chunk_size=num_threads, label_path_mapping=label_path_mapping, feat_settings=feat_settings)) for idx, ((patch_cqt, m_beat_arr, label_128, label_13), audio_idx) in iters: audio = wav_paths[audio_idx] # print(f"Progress: {idx+1}/{len(wav_paths)} - {audio}" + " "*6, end="\r") # noqa: E226 logger.info("Progress: %d/%d - %s", idx + 1, len(wav_paths), audio) # noqa: E226 basename = os.path.basename(audio) filename, _ = os.path.splitext(basename) out_hdf = jpath(out_path, filename + ".hdf") saved = False retry_times = 5 for retry in range(retry_times): if saved: break try: with h5py.File(out_hdf, "w") as out_f: out_f.create_dataset("feature", data=patch_cqt, compression="gzip", compression_opts=3) out_f.create_dataset("label", data=label_13, compression="gzip", compression_opts=3) out_f.create_dataset("label_128", data=label_128, compression="gzip", compression_opts=3) out_f.create_dataset("cqt_mini_beat_arr", data=m_beat_arr, compression="gzip", compression_opts=3) saved = True except OSError as exp: logger.warning( "OSError occurred, retrying %d times. Reason: %s", retry + 1, str(exp)) time.sleep(0.5 * 2**retry) if not saved: logger.error( "H5py failed to save the feature file after %d retries.", retry_times) raise OSError print("")
def parallel_extract(x, samples, max_sample, fr, fs, Hop, h, fc, tc, g, bin_per_octave): freq_width = max_sample * Hop iters = np.ceil(samples / max_sample).astype("int") tmpL0, tmpLF, tmpLQ, tmpZ = {}, {}, {}, {} slice_list = [x[i * freq_width:(i + 1) * freq_width] for i in range(iters)] feat_generator = enumerate( parallel_generator(cfp_filterbank, slice_list, fr=fr, fs=fs, Hop=Hop, h=h, fc=fc, tc=tc, g=g, bin_per_octave=bin_per_octave, max_workers=3)) for idx, (feat_list, slice_idx) in feat_generator: logger.debug("Slice feature extracted: %d/%d", idx + 1, len(slice_list)) tfrL0, tfrLF, tfrLQ, f, q, t, cen_freq = feat_list tmpL0[slice_idx] = tfrL0 tmpLF[slice_idx] = tfrLF tmpLQ[slice_idx] = tfrLQ tmpZ[slice_idx] = tfrLF * tfrLQ return tmpL0, tmpLF, tmpLQ, tmpZ, f, q, t, cen_freq
def _parallel_feature_extraction(audio_list, out_path, feat_settings, num_threads=4): feat_extract_params = { "hop": feat_settings.hop_size, "win_size": feat_settings.window_size, "fr": feat_settings.frequency_resolution, "fc": feat_settings.frequency_center, "tc": feat_settings.time_center, "g": feat_settings.gamma, "bin_per_octave": feat_settings.bins_per_octave, "harmonic_num": feat_settings.harmonic_number } iters = enumerate( parallel_generator(extract_cfp_feature, audio_list, max_workers=num_threads, use_thread=True, chunk_size=num_threads, harmonic=feat_settings.harmonic, **feat_extract_params)) for idx, (feature, audio_idx) in iters: audio = audio_list[audio_idx] # logger.info("Progress: %s/%s - %s", idx+1, len(audio_list), audio) print(f"Progress: {idx+1}/{len(audio_list)} - {audio}" + " " * 6, end="\r") # noqa: E226 basename = os.path.basename(audio) filename, _ = os.path.splitext(basename) out_hdf = jpath(out_path, filename + ".hdf") saved = False retry_times = 5 for retry in range(retry_times): if saved: break try: with h5py.File(out_hdf, "w") as out_f: out_f.create_dataset("feature", data=feature) saved = True except OSError as exp: logger.warning( "OSError occurred, retrying %d times. Reason: %s", retry + 1, str(exp)) if not saved: logger.error( "H5py failed to save the feature file after %d retries.", retry_times) raise OSError print("")
def _parallel_feature_extraction(data_pair, out_path, num_threads=4): iters = enumerate( parallel_generator(_extract_feature_arg_wrapper, data_pair, max_workers=num_threads, chunk_size=num_threads)) for idx, ((feature), feat_idx) in iters: f_name = os.path.dirname(data_pair[feat_idx][0]) # logger.info("Progress: %d/%d - %s", idx + 1, len(data_pair), f_name) print(f"Progress: {idx+1}/{len(data_pair)} - {f_name}", end="\r") out_hdf = jpath(out_path, os.path.basename(f_name) + ".hdf") _write_feature(feature, out_path=out_hdf)
def _parallel_feature_extraction(data_pair, out_path, label_extractor, feat_settings, num_threads=4): feat_extract_params = { "hop": feat_settings.hop_size, "down_fs": feat_settings.sampling_rate, "win_size": feat_settings.window_size } iters = enumerate( parallel_generator(_all_in_one_extract, data_pair, max_workers=num_threads, use_thread=True, chunk_size=num_threads, label_extractor=label_extractor, t_unit=feat_settings.hop_size, **feat_extract_params)) for idx, ((feature, label), audio_idx) in iters: audio = data_pair[audio_idx][0] print(f"Progress: {idx+1}/{len(data_pair)} - {audio}" + " " * 6, end="\r") # noqa: E226 filename, _ = os.path.splitext(os.path.basename(audio)) out_hdf = jpath(out_path, filename + ".hdf") saved = False retry_times = 5 for retry in range(retry_times): if saved: break try: with h5py.File(out_hdf, "w") as out_f: out_f.create_dataset("feature", data=feature) out_f.create_dataset("label", data=label) saved = True except OSError as exp: logger.warning( "OSError occurred, retrying %d times. Reason: %s", retry + 1, str(exp)) if not saved: logger.error( "H5py failed to save the feature file after %d retries.", retry_times) raise OSError print("")
def _parallel_feature_extraction(data_pair, label_extractor, out_path, feat_settings, num_threads=4): feat_extract_params = { "hop": feat_settings.hop_size, "fr": feat_settings.frequency_resolution, "fc": feat_settings.frequency_center, "tc": feat_settings.time_center, "g": feat_settings.gamma, "bin_per_octave": feat_settings.bins_per_octave } iters = enumerate( parallel_generator(_all_in_one_extract, data_pair, max_workers=num_threads, chunk_size=num_threads, label_extractor=label_extractor, t_unit=feat_settings.hop_size, **feat_extract_params)) for idx, ((feature, label), audio_idx) in iters: audio = data_pair[audio_idx][0] logger.info("Progress: %s/%s - %s", idx + 1, len(data_pair), audio) # print(f"Progress: {idx+1}/{len(data_pair)} - {audio}" + " "*6, end="\r") # noqa: E226 # Trim to the same length max_len = min(len(feature), len(label)) feature = feature[:max_len] label = label[:max_len] basename = os.path.basename(audio) filename, _ = os.path.splitext(basename) out_hdf = jpath(out_path, filename + ".hdf") with h5py.File(out_hdf, "w") as out_f: out_f.create_dataset("feature", data=feature, compression="gzip", compression_opts=3) out_f.create_dataset("label", data=label, compression="gzip", compression_opts=3)
def _parallel_feature_extraction(data_pair_list, out_path, feat_settings, num_threads=4): feat_params = { "patch_size": feat_settings.patch_size, "threshold": feat_settings.peak_threshold, "down_fs": feat_settings.sampling_rate, "hop": feat_settings.hop_size, "win_size": feat_settings.window_size, "fr": feat_settings.frequency_resolution, "fc": feat_settings.frequency_center, "tc": feat_settings.time_center, "g": feat_settings.gamma, "bin_per_octave": feat_settings.bins_per_octave, } iters = enumerate( parallel_generator(_all_in_one_extract, data_pair_list, max_workers=num_threads, use_thread=True, chunk_size=num_threads, **feat_params)) for idx, ((feat, mapping, zzz, label), audio_idx) in iters: audio = data_pair_list[audio_idx][0] # logger.info("Progress: %s/%s - %s", idx+1, len(data_pair_list), audio) print(f"Progress: {idx + 1}/{len(data_pair_list)} - {audio}", end="\r") filename = get_filename(audio) out_hdf = jpath(out_path, filename + ".hdf") with h5py.File(out_hdf, "w") as out_f: out_f.create_dataset("feature", data=feat) out_f.create_dataset("mapping", data=mapping) out_f.create_dataset("Z", data=zzz) out_f.create_dataset("label", data=label) print("")
def _parallel_feature_extraction(feat_list, out_path, feat_settings, num_threads=4): iters = enumerate( parallel_generator( _all_in_one_extract, feat_list, max_workers=num_threads, chunk_size=num_threads, t_unit=feat_settings.time_unit ) ) for idx, ((feature, beat_arr, down_beat_arr), feat_idx) in iters: feat = feat_list[feat_idx] print(f"Progress: {idx+1}/{len(feat_list)} - {feat}" + " "*6, end="\r") # noqa: E226 # logger.info("Progress: %s/%s - %s", idx+1, len(feat_list), feat) filename, _ = os.path.splitext(os.path.basename(feat)) out_hdf = jpath(out_path, filename + ".hdf") with h5py.File(out_hdf, "w") as out_f: out_f.create_dataset("feature", data=feature) out_f.create_dataset("beat", data=beat_arr) out_f.create_dataset("down_beat", data=down_beat_arr) print("")