def __init__(self, experiment_dir: Path, training: bool = False): super().__init__() self.experiment_dir = experiment_dir # load configuration file config_yml = self.CONFIG_YML.format(experiment_dir=self.experiment_dir) with open(config_yml, 'r') as fp: self.config_ = yaml.load(fp) # initialize preprocessors preprocessors = {} for key, preprocessor in self.config_.get('preprocessors', {}).items(): # preprocessors: # key: # name: package.module.ClassName # params: # param1: value1 # param2: value2 if isinstance(preprocessor, dict): Klass = get_class_by_name( preprocessor['name'], default_module_name='pyannote.pipeline') preprocessors[key] = Klass(**preprocessor.get('params', {})) continue try: # preprocessors: # key: /path/to/database.yml database_yml = preprocessor preprocessors[key] = FileFinder(preprocessor) except FileNotFoundError as e: # preprocessors: # key: /path/to/{uri}.wav template = preprocessor preprocessors[key] = template self.preprocessors_ = preprocessors # initialize pipeline pipeline_name = self.config_['pipeline']['name'] Klass = get_class_by_name( pipeline_name, default_module_name='pyannote.pipeline.blocks') self.pipeline_ = Klass(**self.config_['pipeline'].get('params', {})) # freeze parameters if 'freeze' in self.config_: params = self.config_['freeze'] self.pipeline_.freeze(params)
def __init__(self, experiment_dir: Path, training: bool = False): super().__init__() self.experiment_dir = experiment_dir # load configuration file config_yml = self.CONFIG_YML.format(experiment_dir=self.experiment_dir) with open(config_yml, 'r') as fp: self.config_ = yaml.load(fp) # initialize preprocessors preprocessors = {} for key, db_yml in self.config_.get('preprocessors', {}).items(): try: preprocessors[key] = FileFinder(db_yml) except FileNotFoundError as e: template = db_yml preprocessors[key] = template self.preprocessors_ = preprocessors # initialize pipeline pipeline_name = self.config_['pipeline']['name'] Klass = get_class_by_name( pipeline_name, default_module_name='pyannote.pipeline.blocks') self.pipeline_ = Klass(**self.config_['pipeline'].get('params', {})) # freeze parameters if 'freeze' in self.config_: params = self.config_['freeze'] self.pipeline_.freeze(params)
def load_pretrained_pipeline(train_dir: Path) -> Pipeline: """Load pretrained pipeline Parameters ---------- train_dir : Path Path to training directory (i.e. the one that contains `params.yml` created by calling `pyannote-pipeline train ...`) Returns ------- pipeline : Pipeline Pretrained pipeline """ train_dir = Path(train_dir).expanduser().resolve(strict=True) config_yml = train_dir.parents[1] / 'config.yml' with open(config_yml, 'r') as fp: config = yaml.load(fp, Loader=yaml.SafeLoader) pipeline_name = config['pipeline']['name'] Klass = get_class_by_name(pipeline_name, default_module_name='pyannote.audio.pipeline') pipeline = Klass(**config['pipeline'].get('params', {})) return pipeline.load_params(train_dir / 'params.yml')
def extract(protocol_name, file_finder, experiment_dir, robust=False, parallel=False): protocol = get_protocol(protocol_name) # load configuration file config_yml = experiment_dir + "/config.yml" with open(config_yml, "r") as fp: config = yaml.load(fp, Loader=yaml.SafeLoader) FeatureExtraction = get_class_by_name( config["feature_extraction"]["name"], default_module_name="pyannote.audio.features", ) feature_extraction = FeatureExtraction( **config["feature_extraction"].get("params", {}) ) sliding_window = feature_extraction.sliding_window dimension = feature_extraction.dimension # create metadata file at root that contains # sliding window and dimension information precomputed = Precomputed( root_dir=experiment_dir, sliding_window=sliding_window, dimension=dimension ) if parallel: extract_one = functools.partial( helper_extract, file_finder=file_finder, experiment_dir=experiment_dir, config_yml=config_yml, robust=robust, ) n_jobs = cpu_count() pool = Pool(n_jobs) imap = pool.imap else: feature_extraction = init_feature_extraction(experiment_dir) extract_one = functools.partial( helper_extract, file_finder=file_finder, experiment_dir=experiment_dir, feature_extraction=feature_extraction, robust=robust, ) imap = map for result in imap(extract_one, protocol.files()): if result is None: continue print(result)
def init_feature_extraction(experiment_dir): # load configuration file config_yml = experiment_dir + '/config.yml' with open(config_yml, 'r') as fp: config = yaml.load(fp, Loader=yaml.SafeLoader) FeatureExtraction = get_class_by_name( config['feature_extraction']['name'], default_module_name='pyannote.audio.features') feature_extraction = FeatureExtraction( **config['feature_extraction'].get('params', {})) return feature_extraction
def init_feature_extraction(experiment_dir): # load configuration file config_yml = experiment_dir + "/config.yml" with open(config_yml, "r") as fp: config = yaml.load(fp, Loader=yaml.SafeLoader) FeatureExtraction = get_class_by_name( config["feature_extraction"]["name"], default_module_name="pyannote.audio.features", ) feature_extraction = FeatureExtraction( **config["feature_extraction"].get("params", {}) ) return feature_extraction
def load_config( config_yml: Path, training: bool = False, config_default_module: Text = None, pretrained_config_yml: Path = None, ) -> Dict: """ Returns ------- config : Dict ['preprocessors'] ['learning_rate'] ['scheduler'] ['get_optimizer'] ['callbacks'] ['feature_extraction'] ['task'] ['get_model_from_specs'] ['model_resolution'] ['model_alignment'] """ # load pretrained model configuration pretrained_cfg = dict() if pretrained_config_yml is not None: with open(pretrained_config_yml, "r") as fp: pretrained_cfg = yaml.load(fp, Loader=yaml.SafeLoader) # load configuration or complain it's missing cfg = dict() if config_yml.exists(): with open(config_yml, "r") as fp: cfg = yaml.load(fp, Loader=yaml.SafeLoader) # backup user-provided config because it will be updated if pretrained_config_yml is not None: shutil.copy(config_yml, config_yml.parent / "backup+config.yml") elif pretrained_config_yml is None: msg = f"{config_yml} configuration file is missing." raise FileNotFoundError(msg) # override pretrained model config with user-provided config cfg = merge_cfg(pretrained_cfg, cfg) # save (updated) config to disk if pretrained_config_yml is not None: with open(config_yml, "w") as fp: yaml.dump(cfg, fp, default_flow_style=False) # preprocessors preprocessors = dict() for key, preprocessor in cfg.get("preprocessors", {}).items(): # preprocessors: # key: # name: package.module.ClassName # params: # param1: value1 # param2: value2 if isinstance(preprocessor, dict): Klass = get_class_by_name(preprocessor["name"]) preprocessors[key] = Klass(**preprocessor.get("params", {})) continue try: # preprocessors: # key: /path/to/database.yml preprocessors[key] = FileFinder(database_yml=preprocessor) except FileNotFoundError as e: # preprocessors: # key: /path/to/{uri}.wav preprocessors[key] = preprocessor cfg["preprocessors"] = preprocessors # scheduler SCHEDULER_DEFAULT = { "name": "DavisKingScheduler", "params": { "learning_rate": "auto" }, } scheduler_cfg = cfg.get("scheduler", SCHEDULER_DEFAULT) Scheduler = get_class_by_name( scheduler_cfg["name"], default_module_name="pyannote.audio.train.schedulers") scheduler_params = scheduler_cfg.get("params", {}) cfg["learning_rate"] = scheduler_params.pop("learning_rate", "auto") cfg["scheduler"] = Scheduler(**scheduler_params) # optimizer OPTIMIZER_DEFAULT = { "name": "SGD", "params": { "momentum": 0.9, "dampening": 0, "weight_decay": 0, "nesterov": True, }, } optimizer_cfg = cfg.get("optimizer", OPTIMIZER_DEFAULT) try: Optimizer = get_class_by_name(optimizer_cfg["name"], default_module_name="torch.optim") optimizer_params = optimizer_cfg.get("params", {}) cfg["get_optimizer"] = partial(Optimizer, **optimizer_params) # do not raise an error here as it is possible that the optimizer is # not really needed (e.g. in pipeline training) except ModuleNotFoundError as e: warnings.warn(e.args[0]) # data augmentation should only be active when training a model if training and "data_augmentation" in cfg: DataAugmentation = get_class_by_name( cfg["data_augmentation"]["name"], default_module_name="pyannote.audio.augmentation", ) augmentation = DataAugmentation( **cfg["data_augmentation"].get("params", {})) else: augmentation = None # custom callbacks callbacks = [] for callback_config in cfg.get("callbacks", {}): Callback = get_class_by_name(callback_config["name"]) callback = Callback(**callback_config.get("params", {})) callbacks.append(callback) cfg["callbacks"] = callbacks # feature extraction FEATURE_DEFAULT = {"name": "RawAudio", "params": {"sample_rate": 16000}} feature_cfg = cfg.get("feature_extraction", FEATURE_DEFAULT) FeatureExtraction = get_class_by_name( feature_cfg["name"], default_module_name="pyannote.audio.features") feature_params = feature_cfg.get("params", {}) cfg["feature_extraction"] = FeatureExtraction(**feature_params, augmentation=augmentation) # task if config_default_module is None: config_default_module = "pyannote.audio.labeling.tasks" try: TaskClass = get_class_by_name( cfg["task"]["name"], default_module_name=config_default_module) except AttributeError: TaskClass = get_class_by_name( cfg["task"]["name"], default_module_name="pyannote.audio.embedding.approaches", ) cfg["task"] = TaskClass(**cfg["task"].get("params", {})) # architecture Architecture = get_class_by_name( cfg["architecture"]["name"], default_module_name="pyannote.audio.models") params = cfg["architecture"].get("params", {}) cfg["get_model_from_specs"] = partial(Architecture, **params) task = cfg["task"].task cfg["model_resolution"] = Architecture.get_resolution(task, **params) cfg["model_alignment"] = Architecture.get_alignment(task, **params) return cfg
def _generic( name: str, duration: float = None, step: float = 0.25, batch_size: int = 32, device: typing.Optional[typing.Union[typing.Text, torch.device]] = None, pipeline: typing.Optional[bool] = None, force_reload: bool = False) -> typing.Union[_Pretrained, _Pipeline]: """Load pretrained model or pipeline Parameters ---------- name : str Name of pretrained model or pipeline duration : float, optional Override audio chunks duration. Defaults to the one used during training. step : float, optional Ratio of audio chunk duration used for the internal sliding window. Defaults to 0.25 (i.e. 75% overlap between two consecutive windows). Reducing this value might lead to better results (at the expense of slower processing). batch_size : int, optional Batch size used for inference. Defaults to 32. device : torch.device, optional Device used for inference. pipeline : bool, optional Wrap pretrained model in a (not fully optimized) pipeline. force_reload : bool Whether to discard the existing cache and force a fresh download. Defaults to use existing cache. Returns ------- pretrained: `Pretrained` or `Pipeline` Usage ----- >>> sad_pipeline = torch.hub.load('pyannote/pyannote-audio', 'sad_ami') >>> scores = model({'audio': '/path/to/audio.wav'}) """ model_exists = name in _MODELS pipeline_exists = name in _PIPELINES if model_exists and pipeline_exists: if pipeline is None: msg = ( f'Both a pretrained model and a pretrained pipeline called ' f'"{name}" are available. Use option "pipeline=True" to ' f'load the pipeline, and "pipeline=False" to load the model.') raise ValueError(msg) if pipeline: kind = 'pipeline' zip_url = _URL.format(kind=kind, name=name) sha256 = _PIPELINES[name] return_pipeline = True else: kind = 'model' zip_url = _URL.format(kind=kind, name=name) sha256 = _MODELS[name] return_pipeline = False elif pipeline_exists: if pipeline is None: pipeline = True if not pipeline: msg = (f'Could not find any pretrained "{name}" model. ' f'A pretrained "{name}" pipeline does exist. ' f'Did you mean "pipeline=True"?') raise ValueError(msg) kind = 'pipeline' zip_url = _URL.format(kind=kind, name=name) sha256 = _PIPELINES[name] return_pipeline = True elif model_exists: if pipeline is None: pipeline = False kind = 'model' zip_url = _URL.format(kind=kind, name=name) sha256 = _MODELS[name] return_pipeline = pipeline if name.startswith('emb_') and return_pipeline: msg = ( f'Pretrained model "{name}" has no associated pipeline. Use ' f'"pipeline=False" or remove "pipeline" option altogether.') raise ValueError(msg) else: msg = ( f'Could not find any pretrained model nor pipeline called "{name}".' ) raise ValueError(msg) if sha256 is None: msg = (f'Pretrained {kind} "{name}" is not available yet but will be ' f'released shortly. Stay tuned...') raise NotImplementedError(msg) # path where pre-trained models and pipelines are downloaded and cached hub_dir = pathlib.Path( os.environ.get("PYANNOTE_AUDIO_HUB", "~/.pyannote/hub")).expanduser().resolve() pretrained_dir = hub_dir / f'{kind}s' pretrained_subdir = pretrained_dir / f'{name}' pretrained_zip = pretrained_dir / f'{name}.zip' if not pretrained_subdir.exists() or force_reload: if pretrained_subdir.exists(): shutil.rmtree(pretrained_subdir) from pyannote.audio.utils.path import mkdir_p mkdir_p(pretrained_zip.parent) try: msg = ( f'Downloading pretrained {kind} "{name}" to "{pretrained_zip}".' ) print(msg) torch.hub.download_url_to_file(zip_url, pretrained_zip, hash_prefix=sha256, progress=True) except RuntimeError as e: shutil.rmtree(pretrained_subdir) msg = (f'Failed to download pretrained {kind} "{name}".' f'Please try again.') raise RuntimeError(msg) # unzip downloaded file with zipfile.ZipFile(pretrained_zip) as z: z.extractall(path=pretrained_dir) if kind == 'model': params_yml, = pretrained_subdir.glob('*/*/*/*/params.yml') pretrained = _Pretrained(validate_dir=params_yml.parent, duration=duration, step=step, batch_size=batch_size, device=device) if return_pipeline: if name.startswith('sad_'): from pyannote.audio.pipeline.speech_activity_detection import SpeechActivityDetection pipeline = SpeechActivityDetection(scores=pretrained) elif name.startswith('scd_'): from pyannote.audio.pipeline.speaker_change_detection import SpeakerChangeDetection pipeline = SpeakerChangeDetection(scores=pretrained) elif name.startswith('ovl_'): from pyannote.audio.pipeline.overlap_detection import OverlapDetection pipeline = OverlapDetection(scores=pretrained) else: # this should never happen msg = ( f'Pretrained model "{name}" has no associated pipeline. Use ' f'"pipeline=False" or remove "pipeline" option altogether.' ) raise ValueError(msg) return pipeline.load_params(params_yml) return pretrained elif kind == 'pipeline': params_yml, = pretrained_subdir.glob('*/*/params.yml') config_yml = params_yml.parents[2] / 'config.yml' with open(config_yml, 'r') as fp: config = yaml.load(fp, Loader=yaml.SafeLoader) from pyannote.core.utils.helper import get_class_by_name pipeline_name = config['pipeline']['name'] Pipeline = get_class_by_name( pipeline_name, default_module_name='pyannote.audio.pipeline') pipeline = Pipeline(**config['pipeline'].get('params', {})) return pipeline.load_params(params_yml)
def __init__(self, feature_extraction: Optional[dict] = None, architecture: Optional[dict] = None, overlap: Optional[bool] = False, keep_sad: Optional[bool] = False, mask: Optional[dict] = None, augmentation: Optional[bool] = False, duration: Optional[float] = 2.0, batch_size: Optional[float] = 32, gpu: Optional[bool] = False): # feature extraction if feature_extraction is None: from pyannote.audio.features import LibrosaMFCC self.feature_extraction_ = LibrosaMFCC( e=False, De=True, DDe=True, coefs=19, D=True, DD=True, duration=0.025, step=0.010, sample_rate=16000, ) else: FeatureExtraction = get_class_by_name( feature_extraction['name'], default_module_name='pyannote.audio.features') self.feature_extraction_ = FeatureExtraction( **feature_extraction.get('params', {}), augmentation=None) # network architecture if architecture is None: from pyannote.audio.models import PyanNet self.Architecture_ = PyanNet self.architecture_params_ = {'sincnet': {'skip': True}} else: self.Architecture_ = get_class_by_name( architecture['name'], default_module_name='pyannote.audio.models') self.architecture_params_ = architecture.get('params', {}) self.overlap = overlap self.keep_sad = keep_sad self.mask = mask if mask is None: self.mask_dimension_ = None self.mask_logscale_ = False else: self.mask_dimension_ = mask['dimension'] self.mask_logscale_ = mask['log_scale'] self.augmentation = augmentation self.duration = duration self.batch_size = batch_size self.gpu = gpu self.device_ = torch.device('cuda') if self.gpu else torch.device('cpu') # hyper-parameters self.learning_rate = LogUniform(1e-3, 1) self.epochs = Integer(10, 50) self.ensemble = Integer(1, 5) if self.overlap: self.overlap_threshold = Uniform(0, 1)
def load_config(config_yml: Path, training: bool = False, config_default_module: Text = None, pretrained_config_yml: Path = None) -> Dict: """ Returns ------- config : Dict ['preprocessors'] ['learning_rate'] ['scheduler'] ['get_optimizer'] ['callbacks'] ['feature_extraction'] ['task'] ['get_model_from_specs'] ['model_resolution'] ['model_alignment'] """ # load pretrained model configuration pretrained_cfg = dict() if pretrained_config_yml is not None: with open(pretrained_config_yml, 'r') as fp: pretrained_cfg = yaml.load(fp, Loader=yaml.SafeLoader) # load configuration or complain it's missing cfg = dict() if config_yml.exists(): with open(config_yml, 'r') as fp: cfg = yaml.load(fp, Loader=yaml.SafeLoader) # backup user-provided config because it will be updated if pretrained_config_yml is not None: shutil.copy(config_yml, config_yml.parent / 'backup+config.yml') elif pretrained_config_yml is None: msg = f'{config_yml} configuration file is missing.' raise FileNotFoundError(msg) # override pretrained model config with user-provided config cfg = merge_cfg(pretrained_cfg, cfg) # save (updated) config to disk if pretrained_config_yml is not None: with open(config_yml, 'w') as fp: yaml.dump(cfg, fp, default_flow_style=False) # preprocessors preprocessors = dict() for key, preprocessor in cfg.get('preprocessors', {}).items(): # preprocessors: # key: # name: package.module.ClassName # params: # param1: value1 # param2: value2 if isinstance(preprocessor, dict): Klass = get_class_by_name(preprocessor['name']) preprocessors[key] = Klass(**preprocessor.get('params', {})) continue try: # preprocessors: # key: /path/to/database.yml preprocessors[key] = FileFinder(preprocessor) except FileNotFoundError as e: # preprocessors: # key: /path/to/{uri}.wav preprocessors[key] = preprocessor if 'audio' not in preprocessors: preprocessors['audio'] = FileFinder() if 'duration' not in preprocessors: preprocessors['duration'] = get_audio_duration cfg['preprocessors'] = preprocessors # scheduler SCHEDULER_DEFAULT = { 'name': 'DavisKingScheduler', 'params': { 'learning_rate': 'auto' } } scheduler_cfg = cfg.get('scheduler', SCHEDULER_DEFAULT) Scheduler = get_class_by_name( scheduler_cfg['name'], default_module_name='pyannote.audio.train.schedulers') scheduler_params = scheduler_cfg.get('params', {}) cfg['learning_rate'] = scheduler_params.pop('learning_rate', 'auto') cfg['scheduler'] = Scheduler(**scheduler_params) # optimizer OPTIMIZER_DEFAULT = { 'name': 'SGD', 'params': { 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': True } } optimizer_cfg = cfg.get('optimizer', OPTIMIZER_DEFAULT) try: Optimizer = get_class_by_name(optimizer_cfg['name'], default_module_name='torch.optim') optimizer_params = optimizer_cfg.get('params', {}) cfg['get_optimizer'] = partial(Optimizer, **optimizer_params) # do not raise an error here as it is possible that the optimizer is # not really needed (e.g. in pipeline training) except ModuleNotFoundError as e: warnings.warn(e.args[0]) # data augmentation should only be active when training a model if training and 'data_augmentation' in cfg: DataAugmentation = get_class_by_name( cfg['data_augmentation']['name'], default_module_name='pyannote.audio.augmentation') augmentation = DataAugmentation( **cfg['data_augmentation'].get('params', {})) else: augmentation = None # custom callbacks callbacks = [] for callback_config in cfg.get('callbacks', {}): Callback = get_class_by_name(callback_config['name']) callback = Callback(**callback_config.get('params', {})) callbacks.append(callback) cfg['callbacks'] = callbacks # feature extraction FEATURE_DEFAULT = {'name': 'RawAudio', 'params': {'sample_rate': 16000}} feature_cfg = cfg.get('feature_extraction', FEATURE_DEFAULT) FeatureExtraction = get_class_by_name( feature_cfg['name'], default_module_name='pyannote.audio.features') feature_params = feature_cfg.get('params', {}) cfg['feature_extraction'] = FeatureExtraction(**feature_params, augmentation=augmentation) # task if config_default_module is None: config_default_module = 'pyannote.audio.labeling.tasks' try: TaskClass = get_class_by_name( cfg['task']['name'], default_module_name=config_default_module) except AttributeError: TaskClass = get_class_by_name( cfg['task']['name'], default_module_name='pyannote.audio.embedding.approaches') cfg['task'] = TaskClass(**cfg['task'].get('params', {})) # architecture Architecture = get_class_by_name( cfg['architecture']['name'], default_module_name='pyannote.audio.models') params = cfg['architecture'].get('params', {}) cfg['get_model_from_specs'] = partial(Architecture, **params) cfg['model_resolution'] = Architecture.get_resolution(**params) cfg['model_alignment'] = Architecture.get_alignment(**params) return cfg