def test_logger(caplog): logger = utils.get_logger("MyLogger") logger.info("hello") for record in caplog.records: assert record.levelname == "INFO" assert "hello" in record.message logger = utils.get_logger("MyLogger") assert len(logger.handlers) == 1
import numpy as np from omnizart.utils import get_logger logger = get_logger("Vocal Predict") def create_batches(feature, ctx_len=9, batch_size=64): feat_pad = np.pad(feature, ((ctx_len, ctx_len), (0, 0), (0, 0))) slices = [feat_pad[idx - ctx_len:idx + ctx_len + 1] for idx in range(ctx_len, len(feat_pad) - ctx_len)] pad_size = batch_size - len(slices) % batch_size payload = np.zeros_like(slices[0]) for _ in range(pad_size): slices.append(payload) slices = np.array(slices) assert len(slices) % batch_size == 0 batches = [slices[idx:idx + batch_size] for idx in range(0, len(slices), batch_size)] return np.array(batches, dtype=np.float32), pad_size def merge_batches(batch_pred): assert len(batch_pred.shape) == 4 batches, batch_size, frm_len, out_classes = batch_pred.shape total_len = batches * batch_size + frm_len - 1 output = np.zeros((total_len, out_classes)) for bidx, batch in enumerate(batch_pred): for fidx, frame in enumerate(batch):
import tensorflow as tf from omnizart.feature.wrapper_func import extract_patch_cqt from omnizart.drum.prediction import predict from omnizart.drum.labels import extract_label_13_inst from omnizart.drum.inference import inference from omnizart.models.spectral_norm_net import drum_model, ConvSN2D from omnizart.utils import get_logger, ensure_path_exists, parallel_generator from omnizart.io import write_yaml from omnizart.base import BaseTranscription, BaseDatasetLoader from omnizart.setting_loaders import DrumSettings from omnizart.train import get_train_val_feat_file_list from omnizart.constants.datasets import PopStructure from omnizart.constants.feature import NOTE_PRIORITY_ARRAY logger = get_logger("Drum Transcription") class DrumTranscription(BaseTranscription): """Application class for drum transcriptions.""" def __init__(self): super().__init__(DrumSettings) self.custom_objects = {"ConvSN2D": ConvSN2D} def transcribe(self, input_audio, model_path=None, output="./"): """Transcribe drum in the audio. This function transcribes drum activations in the music. Currently the model predicts 13 classes of different drum sets, and 3 of them will be written to the MIDI file.
import numpy as np import pretty_midi from scipy.stats import norm from omnizart.utils import get_logger logger = get_logger("Vocal Inference") def _conv(seq, window): half_len = len(window) // 2 end_idx = len(seq) - half_len total = sum(window) container = [] for val in seq[:half_len]: container.append(val) for idx in range(half_len, end_idx): container.append( np.dot(seq[idx - half_len:idx + half_len + 1], window) / total) # noqa: E226 for val in seq[-half_len:]: container.append(val) return np.array(container) def _find_peaks(seq, ctx_len=2, threshold=0.5): # Discard the first and the last <ctx_len> frames. peaks = [] for idx in range(ctx_len, len(seq) - ctx_len - 1): cur_val = seq[idx] if cur_val < threshold:
# pylint: disable=E1101 import numpy as np from omnizart.utils import get_logger logger = get_logger("Drum Prediction") def create_batches(feature, mini_beat_per_seg, b_size=6): """Create a 4D input for model prediction. Parameters ---------- feature: 3D numpy array Should be in shape [mini_beat_pos x time x freq]. mini_beat_per_seg: int Number of mini beats in one segment (a beat). b_size: int Output batch size. Returns ------- batch_feature: 5D numpy array Dimensions are [batches x b_size x time x freq x mini_beat_per_seg]. pad_size: int The additional padded size at the end of the batch. """ assert ( len(feature.shape) == 3 ), f"Invalid feature shape: {feature.shape}. Should be three dimensional."
import os import abc import numpy as np from omnizart.constants import datasets as dset from omnizart.constants.midi import MUSICNET_INSTRUMENT_PROGRAMS, LOWEST_MIDI_NOTE from omnizart.io import dump_pickle from omnizart.utils import get_logger logger = get_logger("Music Labels") class LabelType: """Defines different types of `music` label for training. Defines functions that converts the customized label format into numpy array. With the customized format, it is more flexible to transform labels into different different numpy formats according to the usage scenario, and also saves a lot of storage space by using the customized format. Parameters ---------- mode: ['note', 'note-stream', 'pop-note-stream', 'frame', 'frame-stream'] Mode of label conversion. * note: outputs onset and duration channel * note-stream: outputs onset and duration channel of instruments (for MusicNet) * pop-note-stream: similar to ``note-stream`` mode, but is for ``Pop`` dataset * frame: same as ``note`` mode. To truely output duration channel only, use \
import tensorflow as tf from omnizart.io import write_yaml from omnizart.base import BaseTranscription, BaseDatasetLoader from omnizart.train import get_train_val_feat_file_list from omnizart.utils import get_logger, ensure_path_exists, parallel_generator from omnizart.constants.datasets import MusicNetStructure from omnizart.setting_loaders import BeatSettings from omnizart.beat.features import extract_musicnet_feature, extract_musicnet_label, extract_feature_from_midi from omnizart.beat.prediction import predict from omnizart.beat.inference import inference from omnizart.models.rnn import blstm, blstm_attn from omnizart.models.t2t import MultiHeadAttention logger = get_logger("Beat Transcription") class BeatTranscription(BaseTranscription): """Application class for beat tracking in MIDI domain.""" def __init__(self, conf_path=None): super().__init__(BeatSettings, conf_path=conf_path) self.custom_objects = {"MultiHeadAttention": MultiHeadAttention} def transcribe(self, input_audio, model_path=None, output="./"): """Transcribe beat positions in the given MIDI. Tracks the beat in symbolic domain. Outputs three files if the output path is given: *<filename>.mid*, <filename>_beat.csv, and <filename>_down_beat.csv, where *filename* is the name of the input MIDI without extension. The *.csv files records the beat
# pylint: disable=W0201 import os import abc import six import numpy as np import tensorflow as tf from tensorflow.python.keras.utils import tf_utils from omnizart.io import write_yaml from omnizart.utils import get_logger, ensure_path_exists logger = get_logger("Callbacks") class Callback(metaclass=abc.ABCMeta): """Base class of all callback classes""" def __init__(self, monitor=None): if monitor is not None: self.monitor = monitor if "acc" in monitor: self.monitor_op = np.greater else: self.monitor_op = np.less def on_train_begin(self, history=None): pass def on_train_end(self, history=None): pass
# pylint: disable=W0102,R0914 import math import pretty_midi import numpy as np from scipy.interpolate import CubicSpline from scipy.signal import find_peaks from librosa import note_to_midi from omnizart.constants.midi import MUSICNET_INSTRUMENT_PROGRAMS, MIDI_PROGRAM_NAME_MAPPING from omnizart.utils import get_logger logger = get_logger("Music Inference") def roll_down_sample(data, occur_num=3, base=88): """Down sample feature size for a single pitch. Down sample the feature size from 354 to 88 for infering the notes. Parameters ---------- data: 2D numpy array The thresholded 2D prediction.. occur_num: int For each pitch, the original prediction expands 4 bins wide. This value determines how many positive bins should there be to say there is a real activation after down sampling. base Should be constant as there are 88 pitches on the piano.
# -*- coding: utf-8 -*- """ Author: Lisu Mantainer: BreezeWhite """ # pylint: disable=C0103,W0102,R0914 import numpy as np import scipy from omnizart.io import load_audio from omnizart.utils import get_logger, parallel_generator logger = get_logger("CFP Feature") def STFT(x, fr, fs, Hop, h): t = np.arange(Hop, np.ceil(len(x) / float(Hop)) * Hop, Hop) N = int(fs / float(fr)) window_size = len(h) f = fs * np.linspace(0, 0.5, np.round(N / 2).astype("int"), endpoint=True) Lh = int(np.floor(float(window_size - 1) / 2)) tfr = np.zeros((int(N), len(t)), dtype=np.float) for icol, ti in enumerate(t): ti = int(ti) tau = np.arange(int(-min([round(N / 2.0) - 1, Lh, ti - 1])), int(min([round(N / 2.0) - 1, Lh, len(x) - ti]))) indices = np.mod(N + tau, N) + 1 tfr[indices - 1,
import librosa import numpy as np from omnizart.io import load_audio from omnizart.utils import get_logger logger = get_logger("CQT Feature") def post_process_cqt(gram): """ Normalize and log-scale a Constant-Q spectrogram Parameters ---------- gram: np.ndarray Constant-Q spectrogram, constructed from ``librosa.cqt``. Returns ------- log_normalized_gram: np.ndarray Log-magnitude, L2-normalized constant-Q spectrogram. """ # Compute log amplitude gram = (librosa.amplitude_to_db(np.abs(gram), amin=1e-06, top_db=80.0) + 80.001) * (100.0 / 80.0) # noqa: E226 # and L2 normalize gram = librosa.util.normalize(gram.T, norm=2.0, axis=1) return gram.astype(np.float32)
from mir_eval import sonify from mir_eval.util import midi_to_hz from scipy.io.wavfile import write as wavwrite from omnizart.io import write_yaml, write_agg_f0_results from omnizart.utils import get_logger, parallel_generator, get_filename, ensure_path_exists, aggregate_f0_info from omnizart.base import BaseTranscription, BaseDatasetLoader from omnizart.constants import datasets as d_struct from omnizart.feature.cfp import extract_patch_cfp from omnizart.setting_loaders import PatchCNNSettings from omnizart.models.patch_cnn import patch_cnn_model from omnizart.patch_cnn.inference import inference from omnizart.vocal.labels import MIR1KlabelExtraction from omnizart.train import get_train_val_feat_file_list logger = get_logger("Patch CNN Transcription") class PatchCNNTranscription(BaseTranscription): """Application class of PatchCNN module.""" def __init__(self, conf_path=None): super().__init__(PatchCNNSettings, conf_path=conf_path) def transcribe(self, input_audio, model_path=None, output="./"): """Transcribe frame-level fundamental frequency of vocal from the given audio. Parameters ---------- input_audio: Path Path to the wav audio file. model_path: Path
"""Utility functions for Music module""" import numpy as np from scipy.special import expit from omnizart.utils import get_logger logger = get_logger("Music Prediction") def cut_frame(frm, ori_feature_size=352, feature_num=384): feat_num = frm.shape[1] assert feat_num == feature_num cut_start = (feat_num - ori_feature_size) // 2 # noqa: E226 c_range = range(cut_start, cut_start + ori_feature_size) return frm[:, c_range] def cut_batch_pred(b_pred): t_len = len(b_pred[0]) cut_rr = range(round(t_len * 0.25), round(t_len * 0.75)) cut_pp = [] for pred in b_pred: cut_pp.append(pred[cut_rr]) return np.array(cut_pp) def create_batches(feature, b_size, timesteps, feature_num=384):
from concurrent.futures import ProcessPoolExecutor import scipy import numpy as np from madmom.features import ( DBNDownBeatTrackingProcessor, RNNDownBeatProcessor, DBNBeatTrackingProcessor, RNNBeatProcessor, BeatTrackingProcessor, ) from omnizart.io import load_audio from omnizart.utils import get_logger logger = get_logger("Beat Extraction") class MadmomBeatTracking: """Extract beat information with madmom library. Three different beat tracking methods are used together for producing a more stable beat tracking result. """ def __init__(self, num_threads=3): self.num_threads = num_threads def _get_dbn_down_beat(self, audio_data_in1, min_bpm_in=50, max_bpm_in=230):
import math import numpy as np from omnizart.utils import get_logger logger = get_logger("Beat Prediction") #: Step size for slicing the feature. Ratio to the timesteps of the model input feature. STEP_SIZE_RATIO = 0.5 def create_batches(feature, timesteps, batch_size=8): """Create a 4D output from the 2D feature for model prediciton. Create overlapped input features, and collect feature slices into batches. The overlap size is 1/4 length to the timesteps. Parameters ---------- feature: 2D numpy array The feature representation for the model. timesteps: int Size of the input feature dimension. batch_size: int Batch size. Returns ------- batches: 4D numpy array Batched feature slices with dimension: batches x batch_size x timesteps x feat. """
import math import numpy as np import pretty_midi from scipy.interpolate import interp1d from omnizart.utils import get_logger from omnizart.base import Label from omnizart.constants.midi import LOWEST_MIDI_NOTE from omnizart.constants.datasets import MusicNetStructure logger = get_logger("Beat features") def extract_feature_from_midi(midi_path, t_unit=0.01): """Extract feature for beat module from MIDI file. See Also -------- omnizart.beat.features.extract_feature: The main feature extraction function of beat module. """ midi = pretty_midi.PrettyMIDI(midi_path) labels = [] for inst in midi.instruments: for note in inst.notes: labels.append( Label(start_time=note.start, end_time=note.end, note=note.pitch)) return extract_feature(labels, t_unit=t_unit)
import os import glob import random from os.path import join as jpath from abc import ABCMeta, abstractmethod import h5py import tensorflow as tf from tensorflow.keras.models import model_from_yaml from omnizart import MODULE_PATH from omnizart.utils import get_logger, ensure_path_exists, get_filename from omnizart.constants.midi import LOWEST_MIDI_NOTE, HIGHEST_MIDI_NOTE logger = get_logger("Base Class") class BaseTranscription(metaclass=ABCMeta): """Base class of transcription applications.""" def __init__(self, setting_class, conf_path=None): self.setting_class = setting_class self.settings = setting_class(conf_path=conf_path) self.custom_objects = {} @abstractmethod def transcribe(self, input_audio, model_path, output="./"): raise NotImplementedError def get_model(self, settings): """Get the model from the python source file.
+-----------+--------------+-----------+--------------+ """ # pylint: disable=C0112 import os import csv import glob from os.path import join as jpath from shutil import copy from omnizart.io import load_yaml from omnizart.utils import ensure_path_exists, get_logger from omnizart.remote import download_large_file_from_google_drive logger = get_logger("Constant Datasets") def _get_file_list(dataset_path, dirs, ext): files = [] for _dir in dirs: files += glob.glob(os.path.join(dataset_path, _dir, "*" + ext)) return files class BaseStructure: """Defines the necessary attributes and common functions for each sub-dataset structure class. All sub-dataset structure class should inherit this base class to ensure the necessary attributes and methods are overriden. """
import os import csv import pickle import yaml import librosa from omnizart.utils import ensure_path_exists, LazyLoader, get_logger # Lazy load the Spleeter pacakge for avoiding pulling large dependencies # and boosting the import speed. adapter = LazyLoader("adapter", globals(), "spleeter.audio.adapter") logger = get_logger("IO") def dump_pickle(data, save_to): """Dump data to the given path. Parameters ---------- data: python objects Data to store. Should be python built-in types like `dict`, `list`, `str`, `int`, etc save_to: Path The full path to store the pickle file, including file name. Will create the directory if the given path doesn't exist. """ base_dir = os.path.dirname(save_to) ensure_path_exists(base_dir) with open(save_to, "wb") as pkl_file: pickle.dump(data, pkl_file)
from spleeter.separator import Separator from spleeter.utils.logging import get_logger as sp_get_logger from omnizart.io import load_audio, write_yaml from omnizart.utils import get_logger, resolve_dataset_type, parallel_generator, ensure_path_exists, LazyLoader from omnizart.constants import datasets as d_struct from omnizart.base import BaseTranscription, BaseDatasetLoader from omnizart.feature.cfp import extract_vocal_cfp, _extract_vocal_cfp from omnizart.setting_loaders import VocalSettings from omnizart.vocal import labels as lextor from omnizart.vocal.prediction import predict from omnizart.vocal.inference import infer_interval, infer_midi from omnizart.train import get_train_val_feat_file_list from omnizart.models.pyramid_net import PyramidNet logger = get_logger("Vocal Transcription") vcapp = LazyLoader("vcapp", globals(), "omnizart.vocal_contour") class VocalTranscription(BaseTranscription): """Application class for vocal note transcription. This application implements the training procedure in a semi-supervised way. """ def __init__(self, conf_path=None): super().__init__(VocalSettings, conf_path=conf_path) # Disable logging information of Spleeter sp_logger = sp_get_logger() sp_logger.setLevel(40) # logging.ERROR
import h5py import numpy as np import tensorflow as tf from omnizart.base import BaseTranscription, BaseDatasetLoader from omnizart.setting_loaders import ChordSettings from omnizart.io import write_yaml from omnizart.utils import get_logger, ensure_path_exists, parallel_generator from omnizart.constants.datasets import McGillBillBoard from omnizart.feature.chroma import extract_chroma from omnizart.chord.features import extract_feature_label from omnizart.chord.inference import inference, write_csv from omnizart.train import get_train_val_feat_file_list from omnizart.models.chord_model import ChordModel, ReduceSlope logger = get_logger("Chord Application") class ChordTranscription(BaseTranscription): """Application class for chord transcription.""" def __init__(self, conf_path=None): super().__init__(ChordSettings, conf_path=conf_path) def transcribe(self, input_audio, model_path=None, output="./"): """Transcribe chords in the audio. This function transcribes chord progression in the audio and will outputs MIDI and CSV files. The MIDI file is provided for quick validation by directly listen to the chords. The complete transcription results are listed in the CSV file, which contains the chord's name and the start and end time.
from omnizart.music.prediction import predict from omnizart.music.labels import (LabelType, MaestroLabelExtraction, MapsLabelExtraction, MusicNetLabelExtraction, PopLabelExtraction) from omnizart.music.losses import focal_loss, smooth_loss from omnizart.base import BaseTranscription, BaseDatasetLoader from omnizart.utils import get_logger, parallel_generator, ensure_path_exists, resolve_dataset_type from omnizart.io import dump_pickle, write_yaml from omnizart.train import train_epochs, get_train_val_feat_file_list from omnizart.callbacks import EarlyStopping, ModelCheckpoint from omnizart.setting_loaders import MusicSettings from omnizart.constants.midi import MUSICNET_INSTRUMENT_PROGRAMS, POP_INSTRUMENT_PROGRAMES from omnizart.constants.feature import FEATURE_NAME_TO_NUMBER import omnizart.constants.datasets as d_struct logger = get_logger("Music Transcription") class MusicTranscription(BaseTranscription): """Application class for music transcription. Inherited from the BaseTranscription class to make sure everything needed got override. """ def __init__(self, conf_path=None): super().__init__(MusicSettings, conf_path=conf_path) self.mode_inst_mapping = { "Piano": MUSICNET_INSTRUMENT_PROGRAMS, "Stream": MUSICNET_INSTRUMENT_PROGRAMS, "Pop": POP_INSTRUMENT_PROGRAMES }
from mir_eval import sonify from omnizart.base import BaseTranscription, BaseDatasetLoader from omnizart.setting_loaders import VocalContourSettings from omnizart.feature.wrapper_func import extract_cfp_feature from omnizart.utils import get_logger, ensure_path_exists, parallel_generator, resolve_dataset_type, aggregate_f0_info from omnizart.io import write_yaml, write_agg_f0_results from omnizart.train import train_epochs, get_train_val_feat_file_list from omnizart.callbacks import EarlyStopping, ModelCheckpoint from omnizart.vocal_contour.inference import inference from omnizart.vocal_contour import labels as lextor from omnizart.constants import datasets as d_struct from omnizart.models.u_net import semantic_segmentation from omnizart.music.losses import focal_loss logger = get_logger("Vocal Contour") class VocalContourTranscription(BaseTranscription): """Application class for vocal-contour transcription.""" def __init__(self, conf_path=None): super().__init__(VocalContourSettings, conf_path=conf_path) def transcribe(self, input_audio, model_path=None, output="./"): """Transcribe frame-level fundamental frequency of vocal from the given audio. Parameters ---------- input_audio: Path Path to the wav audio file. model_path: Path