def _output_midi(self, output, input_audio, midi=None, verbose=True): if output is None: return None if os.path.isdir(output): output = jpath(output, get_filename(input_audio)) if midi is not None: out_path = output if output.endswith(".mid") else f"{output}.mid" midi.write(out_path) if verbose: logger.info("MIDI file has been written to %s.", out_path) return output
def _parallel_feature_extraction(data_pair_list, out_path, feat_settings, num_threads=4): feat_params = { "patch_size": feat_settings.patch_size, "threshold": feat_settings.peak_threshold, "down_fs": feat_settings.sampling_rate, "hop": feat_settings.hop_size, "win_size": feat_settings.window_size, "fr": feat_settings.frequency_resolution, "fc": feat_settings.frequency_center, "tc": feat_settings.time_center, "g": feat_settings.gamma, "bin_per_octave": feat_settings.bins_per_octave, } iters = enumerate( parallel_generator(_all_in_one_extract, data_pair_list, max_workers=num_threads, use_thread=True, chunk_size=num_threads, **feat_params)) for idx, ((feat, mapping, zzz, label), audio_idx) in iters: audio = data_pair_list[audio_idx][0] # logger.info("Progress: %s/%s - %s", idx+1, len(data_pair_list), audio) print(f"Progress: {idx + 1}/{len(data_pair_list)} - {audio}", end="\r") filename = get_filename(audio) out_hdf = jpath(out_path, filename + ".hdf") with h5py.File(out_hdf, "w") as out_f: out_f.create_dataset("feature", data=feat) out_f.create_dataset("mapping", data=mapping) out_f.create_dataset("Z", data=zzz) out_f.create_dataset("label", data=label) print("")
def transcribe(self, input_audio, model_path=None, output="./"): """Transcribe vocal notes in the audio. This function transcribes onset, offset, and pitch of the vocal in the audio. This module is reponsible for predicting onset and offset time of each note, and pitches are estimated by the `vocal-contour` submodule. Parameters ---------- input_audio: Path Path to the raw audio file (.wav). model_path: Path Path to the trained model or the supported transcription mode. output: Path (optional) Path for writing out the transcribed MIDI file. Default to the current path. Returns ------- midi: pretty_midi.PrettyMIDI The transcribed vocal notes. Outputs ------- This function will outputs three files as listed below: - <song>.mid: the MIDI file with complete transcription results in piano sondfount. - <song>_f0.csv: pitch contour information of the vocal. - <song>_trans.wav: the rendered pitch contour audio. See Also -------- omnizart.cli.vocal.transcribe: CLI entry point of this function. omnizart.vocal_contour.transcribe: Pitch estimation function. """ logger.info("Separating vocal track from the audio...") command = ["spleeter", "separate", input_audio, "-o", "./"] process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) _, error = process.communicate() if process.returncode != 0: raise SpleeterError(error.decode("utf-8")) # Resolve the path of separated output files folder_path = jpath("./", get_filename(input_audio)) vocal_wav_path = jpath(folder_path, "vocals.wav") wav, fs = load_audio(vocal_wav_path) # Clean out the output files shutil.rmtree(folder_path) logger.info("Loading model...") model, model_settings = self._load_model(model_path) logger.info("Extracting feature...") feature = _extract_vocal_cfp( wav, fs, down_fs=model_settings.feature.sampling_rate, hop=model_settings.feature.hop_size, fr=model_settings.feature.frequency_resolution, fc=model_settings.feature.frequency_center, tc=model_settings.feature.time_center, g=model_settings.feature.gamma, bin_per_octave=model_settings.feature.bins_per_octave ) logger.info("Predicting...") pred = predict(feature, model) logger.info("Infering notes...") interval = infer_interval( pred, ctx_len=model_settings.inference.context_length, threshold=model_settings.inference.threshold, min_dura=model_settings.inference.min_duration, t_unit=model_settings.feature.hop_size ) logger.info("Extracting pitch contour") agg_f0 = vcapp.app.transcribe(input_audio, model_path=model_settings.inference.pitch_model, output=output) logger.info("Inferencing MIDI...") midi = infer_midi(interval, agg_f0, t_unit=model_settings.feature.hop_size) self._output_midi(output=output, input_audio=input_audio, midi=midi) logger.info("Transcription finished") return midi