예제 #1
0
    def __init__(self,
                 protocol=None,
                 subset='train',
                 db_yml=None,
                 snr_min=5,
                 snr_max=20):
        super().__init__()

        self.protocol = protocol
        self.subset = subset
        self.db_yml = db_yml

        self.snr_min = snr_min
        self.snr_max = snr_max

        # returns gaps in annotation as pyannote.core.Timeline instance
        get_gaps = lambda f: f['annotation'].get_timeline().gaps(
            support=get_annotated(f))

        if isinstance(protocol, str):
            preprocessors = {
                'audio': FileFinder(config_yml=db_yml),
                'duration': get_audio_duration,
                'gaps': get_gaps
            }
            protocol = get_protocol(self.protocol, preprocessors=preprocessors)
        else:
            protocol.preprocessors['gaps'] = get_gaps

        self.files_ = list(getattr(protocol, self.subset)())
예제 #2
0
    def __init__(self,
                 protocol=None,
                 subset: Subset = "train",
                 snr_min=5,
                 snr_max=20):
        super().__init__()

        self.protocol = protocol
        self.subset = subset
        self.snr_min = snr_min
        self.snr_max = snr_max

        # returns gaps in annotation as pyannote.core.Timeline instance
        get_gaps = (lambda f: f["annotation"].get_timeline().gaps(
            support=get_annotated(f)))

        if isinstance(protocol, str):
            preprocessors = {
                "audio": FileFinder(),
                "duration": get_audio_duration,
                "gaps": get_gaps,
            }
            protocol = get_protocol(self.protocol, preprocessors=preprocessors)
        else:
            protocol.preprocessors["gaps"] = get_gaps

        self.files_ = list(getattr(protocol, self.subset)())
예제 #3
0
    def __init__(self, experiment_dir: Path, training: bool = False):

        super().__init__()

        self.experiment_dir = experiment_dir

        # load configuration file
        config_yml = self.CONFIG_YML.format(experiment_dir=self.experiment_dir)
        with open(config_yml, 'r') as fp:
            self.config_ = yaml.load(fp)

        # initialize preprocessors
        preprocessors = {}
        for key, db_yml in self.config_.get('preprocessors', {}).items():
            try:
                preprocessors[key] = FileFinder(db_yml)
            except FileNotFoundError as e:
                template = db_yml
                preprocessors[key] = template
        self.preprocessors_ = preprocessors

        # initialize pipeline
        pipeline_name = self.config_['pipeline']['name']
        Klass = get_class_by_name(
            pipeline_name, default_module_name='pyannote.pipeline.blocks')
        self.pipeline_ = Klass(**self.config_['pipeline'].get('params', {}))

        # freeze  parameters
        if 'freeze' in self.config_:
            params = self.config_['freeze']
            self.pipeline_.freeze(params)
예제 #4
0
    def __init__(self, experiment_dir, db_yml=None):
        super(Application, self).__init__()

        self.db_yml = db_yml

        self.preprocessors_ = {'audio': FileFinder(self.db_yml)}

        self.experiment_dir = experiment_dir

        # load configuration
        config_yml = self.CONFIG_YML.format(experiment_dir=self.experiment_dir)
        with open(config_yml, 'r') as fp:
            self.config_ = yaml.load(fp)

        # scheduler
        SCHEDULER_DEFAULT = {
            'name': 'DavisKingScheduler',
            'params': {
                'learning_rate': 'auto'
            }
        }
        scheduler_cfg = self.config_.get('scheduler', SCHEDULER_DEFAULT)
        scheduler_name = scheduler_cfg['name']
        schedulers = __import__('pyannote.audio.train.schedulers',
                                fromlist=[scheduler_name])
        Scheduler = getattr(schedulers, scheduler_name)
        scheduler_params = scheduler_cfg.get('params', {})
        self.get_scheduler_ = partial(Scheduler, **scheduler_params)
        self.learning_rate_ = scheduler_params.get('learning_rate', 'auto')

        # optimizer
        OPTIMIZER_DEFAULT = {
            'name': 'SGD',
            'params': {
                'momentum': 0.9,
                'dampening': 0,
                'weight_decay': 0,
                'nesterov': True
            }
        }
        optimizer_cfg = self.config_.get('optimizer', OPTIMIZER_DEFAULT)
        optimizer_name = optimizer_cfg['name']
        optimizers = __import__('torch.optim', fromlist=[optimizer_name])
        Optimizer = getattr(optimizers, optimizer_name)
        optimizer_params = optimizer_cfg.get('params', {})
        self.get_optimizer_ = partial(Optimizer, **optimizer_params)

        # feature extraction
        if 'feature_extraction' in self.config_:
            extraction_name = self.config_['feature_extraction']['name']
            features = __import__('pyannote.audio.features',
                                  fromlist=[extraction_name])
            FeatureExtraction = getattr(features, extraction_name)
            self.feature_extraction_ = FeatureExtraction(
                **self.config_['feature_extraction'].get('params', {}))
예제 #5
0
    def __init__(self, experiment_dir: Path, training: bool = False):

        super().__init__()

        self.experiment_dir = experiment_dir

        # load configuration file
        config_yml = self.CONFIG_YML.format(experiment_dir=self.experiment_dir)
        with open(config_yml, 'r') as fp:
            self.config_ = yaml.load(fp)

        # initialize preprocessors
        preprocessors = {}
        for key, preprocessor in self.config_.get('preprocessors', {}).items():

            # preprocessors:
            #    key:
            #       name: package.module.ClassName
            #       params:
            #          param1: value1
            #          param2: value2
            if isinstance(preprocessor, dict):
                Klass = get_class_by_name(
                    preprocessor['name'],
                    default_module_name='pyannote.pipeline')
                preprocessors[key] = Klass(**preprocessor.get('params', {}))
                continue

            try:
                # preprocessors:
                #    key: /path/to/database.yml
                database_yml = preprocessor
                preprocessors[key] = FileFinder(preprocessor)

            except FileNotFoundError as e:
                # preprocessors:
                #    key: /path/to/{uri}.wav
                template = preprocessor
                preprocessors[key] = template

        self.preprocessors_ = preprocessors

        # initialize pipeline
        pipeline_name = self.config_['pipeline']['name']
        Klass = get_class_by_name(
            pipeline_name, default_module_name='pyannote.pipeline.blocks')
        self.pipeline_ = Klass(**self.config_['pipeline'].get('params', {}))

        # freeze  parameters
        if 'freeze' in self.config_:
            params = self.config_['freeze']
            self.pipeline_.freeze(params)
예제 #6
0
    def __init__(self, collection: Optional[NoiseCollection] = None):

        if collection is None:
            collection = "MUSAN.Collection.BackgroundNoise"

        if not isinstance(collection, (list, tuple)):
            collection = [collection]

        self.collection = collection

        self.files_ = []
        preprocessors = {'audio': FileFinder(), 'duration': get_audio_duration}
        for collection in self.collection:
            protocol = get_protocol(collection, preprocessors=preprocessors)
            self.files_.extend(protocol.files())
예제 #7
0
def main():

    arguments = docopt(__doc__, version='Feature extraction')

    file_finder = FileFinder()

    protocol_name = arguments['<database.task.protocol>']
    experiment_dir = arguments['<experiment_dir>']

    if arguments['check']:
        check(protocol_name, file_finder, experiment_dir)
    else:
        robust = arguments['--robust']
        parallel = arguments['--parallel']
        extract(protocol_name, file_finder, experiment_dir,
                robust=robust, parallel=parallel)
def main():

    arguments = docopt(__doc__, version="Feature extraction")

    file_finder = FileFinder()

    protocol_name = arguments["<database.task.protocol>"]
    experiment_dir = arguments["<experiment_dir>"]

    if arguments["check"]:
        check(protocol_name, file_finder, experiment_dir)
    else:
        robust = arguments["--robust"]
        parallel = arguments["--parallel"]
        extract(
            protocol_name, file_finder, experiment_dir, robust=robust, parallel=parallel
        )
예제 #9
0
    def __init__(self, collection=None, snr_min=5, snr_max=20):
        super().__init__()

        if collection is None:
            collection = 'MUSAN.Collection.BackgroundNoise'
        if not isinstance(collection, (list, tuple)):
            collection = [collection]
        self.collection = collection

        self.snr_min = snr_min
        self.snr_max = snr_max

        # load noise database
        self.files_ = []
        preprocessors = {'audio': FileFinder(), 'duration': get_audio_duration}
        for collection in self.collection:
            protocol = get_protocol(collection, preprocessors=preprocessors)
            self.files_.extend(protocol.files())
예제 #10
0
    def apply(self, protocol_name, output_dir, step=None):

        model = self.model_.to(self.device)
        model.eval()

        duration = self.duration
        if step is None:
            step = 0.25 * duration

        # do not use memmap as this would lead to too many open files
        if isinstance(self.feature_extraction_, Precomputed):
            self.feature_extraction_.use_memmap = False

        # initialize embedding extraction
        sequence_embedding = SequenceEmbedding(model,
                                               self.feature_extraction_,
                                               duration=duration,
                                               step=step,
                                               batch_size=self.batch_size,
                                               device=self.device)
        sliding_window = sequence_embedding.sliding_window
        dimension = sequence_embedding.dimension

        # create metadata file at root that contains
        # sliding window and dimension information
        precomputed = Precomputed(root_dir=output_dir,
                                  sliding_window=sliding_window,
                                  dimension=dimension)

        # file generator
        protocol = get_protocol(protocol_name,
                                progress=True,
                                preprocessors=self.preprocessors_)

        for current_file in FileFinder.protocol_file_iter(protocol,
                                                          extra_keys=['audio'
                                                                      ]):

            fX = sequence_embedding.apply(current_file)
            precomputed.dump(current_file, fX)
    def apply(self, protocol_name, output_dir, step=None):

        model = self.model_.to(self.device)
        model.eval()

        duration = self.task_.duration
        if step is None:
            step = 0.25 * duration

        # do not use memmap as this would lead to too many open files
        if isinstance(self.feature_extraction_, Precomputed):
            self.feature_extraction_.use_memmap = False

        # initialize embedding extraction
        sequence_labeling = SequenceLabeling(
            model, self.feature_extraction_, duration=duration,
            step=.25 * duration, batch_size=self.batch_size,
            source='audio', device=self.device)

        sliding_window = sequence_labeling.sliding_window
        n_classes = self.task_.n_classes

        # create metadata file at root that contains
        # sliding window and dimension information
        precomputed = Precomputed(
            root_dir=output_dir,
            sliding_window=sliding_window,
            dimension=n_classes)

        # file generator
        protocol = get_protocol(protocol_name, progress=True,
                                preprocessors=self.preprocessors_)

        for current_file in FileFinder.protocol_file_iter(
            protocol, extra_keys=['audio']):

            fX = sequence_labeling.apply(current_file)
            precomputed.dump(current_file, fX)
def xp_objective(args, **kwargs):
    import sys
    sys.path.append("/people/yin/projects/")
    from pyannote.database import get_protocol, get_annotated, FileFinder
    protocol = get_protocol('Etape.SpeakerDiarization.TV',
                            preprocessors={'audio': FileFinder()})

    from pyannote.metrics.diarization import GreedyDiarizationErrorRate
    metric = GreedyDiarizationErrorRate()

    from optimize_cluster import speaker_diarization
    from pyannote.audio.features import Precomputed

    feature_extraction = Precomputed(
        '/vol/work1/bredin/feature_extraction/mfcc')
    sad_pre = '/vol/work1/yin/speech_activity_detection/shallow/train/REPERE.SpeakerDiarization.All.train/tune/Etape.SpeakerDiarization.TV.development/apply'
    scd_pre = '/vol/work1/yin/speaker_change_detection/paper/train/REPERE.SpeakerDiarization.All.train/tune/Etape.SpeakerDiarization.Debug.development/apply'
    emb_pre = '/vol/work1/yin/embedding/20180124'

    args['cls__damping'] = float(args['cls__damping'])
    args['cls__preference'] = float(args['cls__preference'])

    pipeline = speaker_diarization.SpeakerDiarizationPre(
        feature_extraction, sad_pre, scd_pre, emb_pre, **args)
    try:
        for current_file in protocol.train():
            hypothesis = pipeline(current_file, annotated=True)
            if hypothesis is None:
                return 100
            reference = current_file['annotation']
            uem = get_annotated(current_file)
            metric(reference, hypothesis, uem=uem)
    except MemoryError as error:
        return 100

    return abs(metric)
    def validate_init(self, protocol_name: Text, subset: Subset = "development"):
        """Initialize validation data

        Parameters
        ----------
        protocol_name : `str`
        subset : {'train', 'development', 'test'}
            Defaults to 'development'.

        Returns
        -------
        validation_data : object
            Validation data.

        """

        preprocessors = self.preprocessors_
        if "audio" not in preprocessors:
            preprocessors["audio"] = FileFinder()
        if "duration" not in preprocessors:
            preprocessors["duration"] = get_audio_duration
        protocol = get_protocol(protocol_name, preprocessors=preprocessors)
        files = getattr(protocol, subset)()

        # convert lazy ProtocolFile to regular dict for multiprocessing
        files = [dict(file) for file in files]

        if isinstance(self.feature_extraction_, (Precomputed, RawAudio)):
            return files

        validation_data = []
        for current_file in tqdm(files, desc="Feature extraction"):
            current_file["features"] = self.feature_extraction_(current_file)
            validation_data.append(current_file)

        return validation_data
예제 #14
0
    def apply(self, protocol_name, output_dir):

        # file generator
        protocol = get_protocol(protocol_name, progress=True,
                                preprocessors=self.preprocessors_)

        mkdir_p(output_dir)
        path = Path(output_dir) / f'{protocol_name}.txt'

        with open(path, mode='w') as fp:

            for current_file in FileFinder.protocol_file_iter(
                protocol, extra_keys=['audio']):

                uri = get_unique_identifier(current_file)
                hypothesis = self.pipeline_.apply(current_file)

                if isinstance(hypothesis, Timeline):
                    for s in hypothesis:
                        fp.write(f'{uri} {s.start:.3f} {s.end:.3f}\n')
                    continue

                for s, t, l in hypothesis.itertracks(yield_label=True):
                    fp.write(f'{uri} {s.start:.3f} {s.end:.3f} {t} {l}\n')
예제 #15
0
import time
import torch

from pyannote.database import FileFinder, get_protocol
from pyannote.metrics.diarization import DiarizationErrorRate, JaccardErrorRate

preprocessors = {'audio': FileFinder()}
protocol = get_protocol('VOXCON.SpeakerDiarization.Challenge', preprocessors=preprocessors)

diarization_pipeline = torch.hub.load('pyannote/pyannote-audio', 'dia_dihard', device = 'gpu')

ders = []
jers = []
hypotheses = []

derMetric = DiarizationErrorRate(collar=0.25)
jerMetric = JaccardErrorRate(collar=0.25)

for file in protocol.test():
    hypothesis = diarization_pipeline(file)
    hypotheses.append(hypothesis)

    reference = file["annotation"]
    # uem = file['annotated']
    der = derMetric(reference, hypothesis)
    jer = jerMetric(reference, hypothesis)
    ders.append(der)
    jers.append(jer)

    uri = file['uri']
    print(f'{uri} DER = {100 * der:.1f}% JER = {100 * jer:.1f}% {time.strftime("%H:%M:%S")}')
예제 #16
0
def load_config(
    config_yml: Path,
    training: bool = False,
    config_default_module: Text = None,
    pretrained_config_yml: Path = None,
) -> Dict:
    """

    Returns
    -------
    config : Dict
        ['preprocessors']
        ['learning_rate']
        ['scheduler']
        ['get_optimizer']
        ['callbacks']
        ['feature_extraction']
        ['task']
        ['get_model_from_specs']
        ['model_resolution']
        ['model_alignment']
    """

    # load pretrained model configuration
    pretrained_cfg = dict()
    if pretrained_config_yml is not None:
        with open(pretrained_config_yml, "r") as fp:
            pretrained_cfg = yaml.load(fp, Loader=yaml.SafeLoader)

    # load configuration or complain it's missing
    cfg = dict()
    if config_yml.exists():
        with open(config_yml, "r") as fp:
            cfg = yaml.load(fp, Loader=yaml.SafeLoader)

        # backup user-provided config because it will be updated
        if pretrained_config_yml is not None:
            shutil.copy(config_yml, config_yml.parent / "backup+config.yml")

    elif pretrained_config_yml is None:
        msg = f"{config_yml} configuration file is missing."
        raise FileNotFoundError(msg)

    # override pretrained model config with user-provided config
    cfg = merge_cfg(pretrained_cfg, cfg)

    # save (updated) config to disk
    if pretrained_config_yml is not None:
        with open(config_yml, "w") as fp:
            yaml.dump(cfg, fp, default_flow_style=False)

    # preprocessors
    preprocessors = dict()

    for key, preprocessor in cfg.get("preprocessors", {}).items():
        # preprocessors:
        #    key:
        #       name: package.module.ClassName
        #       params:
        #          param1: value1
        #          param2: value2
        if isinstance(preprocessor, dict):
            Klass = get_class_by_name(preprocessor["name"])
            preprocessors[key] = Klass(**preprocessor.get("params", {}))
            continue

        try:
            # preprocessors:
            #    key: /path/to/database.yml
            preprocessors[key] = FileFinder(database_yml=preprocessor)

        except FileNotFoundError as e:
            # preprocessors:
            #    key: /path/to/{uri}.wav
            preprocessors[key] = preprocessor

    cfg["preprocessors"] = preprocessors

    # scheduler
    SCHEDULER_DEFAULT = {
        "name": "DavisKingScheduler",
        "params": {
            "learning_rate": "auto"
        },
    }
    scheduler_cfg = cfg.get("scheduler", SCHEDULER_DEFAULT)
    Scheduler = get_class_by_name(
        scheduler_cfg["name"],
        default_module_name="pyannote.audio.train.schedulers")
    scheduler_params = scheduler_cfg.get("params", {})

    cfg["learning_rate"] = scheduler_params.pop("learning_rate", "auto")
    cfg["scheduler"] = Scheduler(**scheduler_params)

    # optimizer
    OPTIMIZER_DEFAULT = {
        "name": "SGD",
        "params": {
            "momentum": 0.9,
            "dampening": 0,
            "weight_decay": 0,
            "nesterov": True,
        },
    }
    optimizer_cfg = cfg.get("optimizer", OPTIMIZER_DEFAULT)
    try:
        Optimizer = get_class_by_name(optimizer_cfg["name"],
                                      default_module_name="torch.optim")
        optimizer_params = optimizer_cfg.get("params", {})
        cfg["get_optimizer"] = partial(Optimizer, **optimizer_params)

    # do not raise an error here as it is possible that the optimizer is
    # not really needed (e.g. in pipeline training)
    except ModuleNotFoundError as e:
        warnings.warn(e.args[0])

    # data augmentation should only be active when training a model
    if training and "data_augmentation" in cfg:
        DataAugmentation = get_class_by_name(
            cfg["data_augmentation"]["name"],
            default_module_name="pyannote.audio.augmentation",
        )
        augmentation = DataAugmentation(
            **cfg["data_augmentation"].get("params", {}))
    else:
        augmentation = None

    # custom callbacks
    callbacks = []
    for callback_config in cfg.get("callbacks", {}):
        Callback = get_class_by_name(callback_config["name"])
        callback = Callback(**callback_config.get("params", {}))
        callbacks.append(callback)
    cfg["callbacks"] = callbacks

    # feature extraction
    FEATURE_DEFAULT = {"name": "RawAudio", "params": {"sample_rate": 16000}}
    feature_cfg = cfg.get("feature_extraction", FEATURE_DEFAULT)
    FeatureExtraction = get_class_by_name(
        feature_cfg["name"], default_module_name="pyannote.audio.features")
    feature_params = feature_cfg.get("params", {})
    cfg["feature_extraction"] = FeatureExtraction(**feature_params,
                                                  augmentation=augmentation)

    # task
    if config_default_module is None:
        config_default_module = "pyannote.audio.labeling.tasks"

    try:
        TaskClass = get_class_by_name(
            cfg["task"]["name"], default_module_name=config_default_module)
    except AttributeError:
        TaskClass = get_class_by_name(
            cfg["task"]["name"],
            default_module_name="pyannote.audio.embedding.approaches",
        )

    cfg["task"] = TaskClass(**cfg["task"].get("params", {}))

    # architecture
    Architecture = get_class_by_name(
        cfg["architecture"]["name"],
        default_module_name="pyannote.audio.models")
    params = cfg["architecture"].get("params", {})

    cfg["get_model_from_specs"] = partial(Architecture, **params)
    task = cfg["task"].task
    cfg["model_resolution"] = Architecture.get_resolution(task, **params)
    cfg["model_alignment"] = Architecture.get_alignment(task, **params)

    return cfg
예제 #17
0
 def _create_config(self, segment_size_sec: float):
     return metrics.SpeakerValidationConfig(
         protocol_name='VoxCeleb.SpeakerVerification.VoxCeleb2',
         feature_extraction=RawAudio(sample_rate=self.sample_rate),
         preprocessors={'audio': FileFinder()},
         duration=segment_size_sec)
def extract(protocol_name,
            file_finder,
            experiment_dir,
            robust=False,
            parallel=False):

    protocol = get_protocol(protocol_name, progress=False)

    # load configuration file
    config_yml = experiment_dir + '/config.yml'
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    feature_extraction_name = config['feature_extraction']['name']
    features = __import__('pyannote.audio.features',
                          fromlist=[feature_extraction_name])
    FeatureExtraction = getattr(features, feature_extraction_name)
    feature_extraction = FeatureExtraction(
        **config['feature_extraction'].get('params', {}))

    sliding_window = feature_extraction.sliding_window()
    dimension = feature_extraction.dimension()

    if 'normalization' in config:
        normalization_name = config['normalization']['name']
        normalization_module = __import__(
            'pyannote.audio.features.normalization',
            fromlist=[normalization_name])
        Normalization = getattr(normalization_module, normalization_name)
        normalization = Normalization(
            **config['normalization'].get('params', {}))
    else:
        normalization = None

    # create metadata file at root that contains
    # sliding window and dimension information

    precomputed = Precomputed(root_dir=experiment_dir,
                              sliding_window=sliding_window,
                              dimension=dimension)

    if parallel:

        extract_one = functools.partial(helper_extract,
                                        file_finder=file_finder,
                                        experiment_dir=experiment_dir,
                                        config_yml=config_yml,
                                        normalization=normalization,
                                        robust=robust)

        n_jobs = cpu_count()
        pool = Pool(n_jobs)
        imap = pool.imap

    else:

        feature_extraction = init_feature_extraction(experiment_dir)
        extract_one = functools.partial(helper_extract,
                                        file_finder=file_finder,
                                        experiment_dir=experiment_dir,
                                        feature_extraction=feature_extraction,
                                        normalization=normalization,
                                        robust=robust)
        imap = map

    for result in imap(
            extract_one,
            FileFinder.protocol_file_iter(protocol, extra_keys=['audio'])):
        if result is None:
            continue
        print(result)
    def _validate_epoch_diarization(
        self,
        epoch,
        validation_data,
        protocol=None,
        subset: Subset = "development",
        device: Optional[torch.device] = None,
        batch_size: int = 32,
        n_jobs: int = 1,
        duration: float = None,
        step: float = 0.25,
        metric: str = None,
        **kwargs,
    ):

        # initialize embedding extraction
        pretrained = Pretrained(
            validate_dir=self.validate_dir_,
            epoch=epoch,
            duration=duration,
            step=step,
            batch_size=batch_size,
            device=device,
        )

        preprocessors = self.preprocessors_
        if "audio" not in preprocessors:
            preprocessors["audio"] = FileFinder()
        if "duration" not in preprocessors:
            preprocessors["duration"] = get_audio_duration
        _protocol = get_protocol(protocol, preprocessors=preprocessors)

        Z, t = dict(), dict()
        min_d, max_d = np.inf, -np.inf

        for current_file in getattr(_protocol, subset)():

            uri = get_unique_identifier(current_file)
            uem = get_annotated(current_file)
            reference = current_file["annotation"]

            X_, t_ = [], []
            embedding = pretrained(current_file)
            for i, (turn, _) in enumerate(reference.itertracks()):

                # extract embedding for current speech turn
                x_ = embedding.crop(turn, mode="center")
                if len(x_) < 1:
                    x_ = embedding.crop(turn, mode="loose")
                if len(x_) < 1:
                    msg = f"No embedding for {turn} in {uri:s}."
                    raise ValueError(msg)

                # each speech turn is represented by its average embedding
                X_.append(np.mean(x_, axis=0))
                t_.append(turn)

            X_ = np.array(X_)
            # apply hierarchical agglomerative clustering
            # all the way up to just one cluster (ie complete dendrogram)
            D = pdist(X_, metric=metric)
            min_d = min(np.min(D), min_d)
            max_d = max(np.max(D), max_d)

            Z[uri] = linkage(X_, method="pool", metric=metric)
            t[uri] = np.array(t_)

        def fun(threshold):

            _metric = DiarizationPurityCoverageFMeasure(weighted=False)

            for current_file in getattr(_protocol, subset)():

                uri = get_unique_identifier(current_file)
                uem = get_annotated(current_file)
                reference = current_file["annotation"]

                clusters = fcluster(Z[uri], threshold, criterion="distance")

                hypothesis = Annotation(uri=uri)
                for (start_time, end_time), cluster in zip(t[uri], clusters):
                    hypothesis[Segment(start_time, end_time)] = cluster

                _ = _metric(reference, hypothesis, uem=uem)

            return 1.0 - abs(_metric)

        res = scipy.optimize.minimize_scalar(fun,
                                             bounds=(0.0, 1.0),
                                             method="bounded",
                                             options={"maxiter": 10})

        threshold = res.x.item()

        return {
            "metric": "diarization_fscore",
            "minimize": False,
            "value": float(1.0 - res.fun),
        }
    def _validate_epoch_verification(
        self,
        epoch,
        validation_data,
        protocol=None,
        subset: Subset = "development",
        device: Optional[torch.device] = None,
        batch_size: int = 32,
        n_jobs: int = 1,
        duration: float = None,
        step: float = 0.25,
        metric: str = None,
        **kwargs,
    ):

        # initialize embedding extraction
        pretrained = Pretrained(
            validate_dir=self.validate_dir_,
            epoch=epoch,
            duration=duration,
            step=step,
            batch_size=batch_size,
            device=device,
        )

        preprocessors = self.preprocessors_
        if "audio" not in preprocessors:
            preprocessors["audio"] = FileFinder()
        if "duration" not in preprocessors:
            preprocessors["duration"] = get_audio_duration
        _protocol = get_protocol(protocol, preprocessors=preprocessors)

        y_true, y_pred, cache = [], [], {}

        for trial in getattr(_protocol, f"{subset}_trial")():

            # compute embedding for file1
            file1 = trial["file1"]
            hash1 = self.get_hash(file1)
            if hash1 in cache:
                emb1 = cache[hash1]
            else:
                emb1 = self.get_embedding(file1, pretrained)
                cache[hash1] = emb1

            # compute embedding for file2
            file2 = trial["file2"]
            hash2 = self.get_hash(file2)
            if hash2 in cache:
                emb2 = cache[hash2]
            else:
                emb2 = self.get_embedding(file2, pretrained)
                cache[hash2] = emb2

            # compare embeddings
            distance = cdist(emb1, emb2, metric=metric)[0, 0]
            y_pred.append(distance)

            y_true.append(trial["reference"])
        _, _, _, eer = det_curve(np.array(y_true),
                                 np.array(y_pred),
                                 distances=True)

        return {
            "metric": "equal_error_rate",
            "minimize": True,
            "value": float(eer)
        }
예제 #21
0
    def train(
        self,
        protocol_name: Text,
        subset: Subset = "train",
        warm_start: Union[int, Literal["last"], Path] = 0,
        epochs: int = 1000,
        device: Optional[torch.device] = None,
        n_jobs: int = 1,
    ):
        """Train model

        Parameters
        ----------
        protocol_name : `str`
        subset : {'train', 'development', 'test'}, optional
            Defaults to 'train'.
        warm_start : `int`, "last", or `Path`, optional
            When `int`, restart training at this epoch.
            When "last", restart from last epoch.
            When `Path`, restart from this model checkpoint.
            Defaults to training from scratch (warm_start = 0).
        epochs : `int`, optional
            Train for that many epochs. Defaults to 1000.
        device : `torch.device`, optional
            Device on which the model will be allocated. Defaults to using CPU.
        n_jobs : `int`, optional
        """

        # initialize batch generator
        preprocessors = self.preprocessors_
        if "audio" not in preprocessors:
            preprocessors["audio"] = FileFinder()
        if "duration" not in preprocessors:
            preprocessors["duration"] = get_audio_duration
        protocol = get_protocol(protocol_name, preprocessors=preprocessors)

        batch_generator = self.task_.get_batch_generator(
            self.feature_extraction_,
            protocol,
            subset=subset,
            resolution=self.model_resolution_,
            alignment=self.model_alignment_,
        )

        # initialize model architecture based on specifications
        model = self.get_model_from_specs_(batch_generator.specifications)

        # freeze (when requested)
        model.freeze(getattr(self, "freeze_", []))

        train_dir = Path(
            self.TRAIN_DIR.format(
                experiment_dir=self.experiment_dir,
                protocol=protocol_name,
                subset=subset,
            ))

        # use last available epoch as starting point
        if warm_start == "last":
            warm_start = self.get_number_of_epochs(train_dir=train_dir) - 1

        iterations = self.task_.fit_iter(
            model,
            batch_generator,
            warm_start=warm_start,
            epochs=epochs,
            get_optimizer=self.get_optimizer_,
            scheduler=self.scheduler_,
            learning_rate=self.learning_rate_,
            train_dir=train_dir,
            device=device,
            callbacks=self.callbacks_,
            n_jobs=n_jobs,
        )

        for _ in iterations:
            pass
예제 #22
0
    def apply(self,
              protocol_name: str,
              output_dir: Path,
              subset: Optional[str] = None):
        """Apply current best pipeline

        Parameters
        ----------
        protocol_name : `str`
            Name of pyannote.database protocol to process.
        subset : `str`, optional
            Subset to process. Defaults processing all subsets.
        """

        # file generator
        protocol = get_protocol(protocol_name,
                                progress=True,
                                preprocessors=self.preprocessors_)

        output_dir.mkdir(parents=True, exist_ok=True)
        extension = self.pipeline_.write_format
        if subset is None:
            path = output_dir / f'{protocol_name}.all.{extension}'
        else:
            path = output_dir / f'{protocol_name}.{subset}.{extension}'

        # initialize evaluation metric
        try:
            metric = self.pipeline_.get_metric()
        except NotImplementedError as e:
            metric = None
            losses = []

        skip_metric = False

        with open(path, mode='w') as fp:

            if subset is None:
                files = FileFinder.protocol_file_iter(protocol)
            else:
                files = getattr(protocol, subset)()

            for current_file in files:

                # apply pipeline and dump output to file
                output = self.pipeline_(current_file)
                self.pipeline_.write(fp, output)

                if skip_metric:
                    continue

                try:

                    if metric is None:
                        loss = self.pipeline_.loss(current_file, output)
                        losses.append(loss)

                    else:
                        from pyannote.database import get_annotated
                        _ = metric(current_file['annotation'],
                                   output,
                                   uem=get_annotated(current_file))

                except Exception as e:
                    # this may happen for files with no available groundtruth.
                    # in this case, we simply do not perform evaluation
                    skip_metric = True

        if skip_metric:
            msg = (f'For some (possibly good) reason, the output of this '
                   f'pipeline could not be evaluated on {protocol_name}.')
            print(msg)
            return

        # report evaluation metric
        if metric is None:
            loss = np.mean(losses)
            print(f'Loss = {loss:g}')
        else:
            _ = metric.report(display=True)
예제 #23
0
def apply_pretrained(
    validate_dir: Path,
    protocol_name: Text,
    subset: Subset = "test",
    duration: Optional[float] = None,
    step: float = 0.25,
    device: Optional[torch.device] = None,
    batch_size: int = 32,
    pretrained: Optional[str] = None,
    Pipeline: type = None,
    **kwargs,
):
    """Apply pre-trained model

    Parameters
    ----------
    validate_dir : Path
    protocol_name : `str`
    subset : 'train' | 'development' | 'test', optional
        Defaults to 'test'.
    duration : `float`, optional
    step : `float`, optional
    device : `torch.device`, optional
    batch_size : `int`, optional
    pretrained : `str`, optional
    Pipeline : `type`
    """

    if pretrained is None:
        pretrained = Pretrained(
            validate_dir=validate_dir,
            duration=duration,
            step=step,
            batch_size=batch_size,
            device=device,
        )
        output_dir = validate_dir / "apply" / f"{pretrained.epoch_:04d}"
    else:

        if pretrained in torch.hub.list("pyannote/pyannote-audio"):
            output_dir = validate_dir / pretrained
        else:
            output_dir = validate_dir

        pretrained = Wrapper(
            pretrained,
            duration=duration,
            step=step,
            batch_size=batch_size,
            device=device,
        )

    params = {}
    try:
        params["classes"] = pretrained.classes
    except AttributeError as e:
        pass
    try:
        params["dimension"] = pretrained.dimension
    except AttributeError as e:
        pass

    # create metadata file at root that contains
    # sliding window and dimension information
    precomputed = Precomputed(root_dir=output_dir,
                              sliding_window=pretrained.sliding_window,
                              **params)

    # file generator
    preprocessors = getattr(pretrained, "preprocessors_", dict())
    if "audio" not in preprocessors:
        preprocessors["audio"] = FileFinder()
    if "duration" not in preprocessors:
        preprocessors["duration"] = get_audio_duration
    protocol = get_protocol(protocol_name, preprocessors=preprocessors)

    files = getattr(protocol, subset)()
    for current_file in tqdm(iterable=files,
                             desc=f"{subset.title()}",
                             unit="file"):
        fX = pretrained(current_file)
        precomputed.dump(current_file, fX)

    # do not proceed with the full pipeline
    # when there is no such thing for current task
    if Pipeline is None:
        return

    # do not proceed with the full pipeline when its parameters cannot be loaded.
    # this might happen when applying a model that has not been validated yet
    try:
        pipeline_params = pretrained.pipeline_params_
    except AttributeError as e:
        return

    # instantiate pipeline
    pipeline = Pipeline(scores=output_dir)
    pipeline.instantiate(pipeline_params)

    # load pipeline metric (when available)
    try:
        metric = pipeline.get_metric()
    except NotImplementedError as e:
        metric = None

    # apply pipeline and dump output to RTTM files
    output_rttm = output_dir / f"{protocol_name}.{subset}.rttm"
    with open(output_rttm, "w") as fp:
        files = getattr(protocol, subset)()
        for current_file in tqdm(iterable=files,
                                 desc=f"{subset.title()}",
                                 unit="file"):
            hypothesis = pipeline(current_file)
            pipeline.write_rttm(fp, hypothesis)

            # compute evaluation metric (when possible)
            reference = current_file.get("annotation", None)
            if reference is None:
                metric = None

            # compute evaluation metric (when available)
            if metric is None:
                continue

            uem = get_annotated(current_file)
            _ = metric(reference, hypothesis, uem=uem)

    # print pipeline metric (when available)
    if metric is None:
        return

    output_eval = output_dir / f"{protocol_name}.{subset}.eval"
    with open(output_eval, "w") as fp:
        fp.write(str(metric))
def extract(protocol_name, file_finder, experiment_dir,
            robust=False, parallel=False):

    protocol = get_protocol(protocol_name, progress=False)

    # load configuration file
    config_yml = experiment_dir + '/config.yml'
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    feature_extraction_name = config['feature_extraction']['name']
    features = __import__('pyannote.audio.features',
                          fromlist=[feature_extraction_name])
    FeatureExtraction = getattr(features, feature_extraction_name)
    feature_extraction = FeatureExtraction(
        **config['feature_extraction'].get('params', {}))

    sliding_window = feature_extraction.sliding_window()
    dimension = feature_extraction.dimension()

    if 'normalization' in config:
        normalization_name = config['normalization']['name']
        normalization_module = __import__('pyannote.audio.features.normalization',
                                   fromlist=[normalization_name])
        Normalization = getattr(normalization_module, normalization_name)
        normalization = Normalization(
            **config['normalization'].get('params', {}))
    else:
        normalization = None

    # create metadata file at root that contains
    # sliding window and dimension information

    precomputed = Precomputed(root_dir=experiment_dir,
                              sliding_window=sliding_window,
                              dimension=dimension)

    if parallel:

        extract_one = functools.partial(helper_extract,
                                        file_finder=file_finder,
                                        experiment_dir=experiment_dir,
                                        config_yml=config_yml,
                                        normalization=normalization,
                                        robust=robust)

        n_jobs = cpu_count()
        pool = Pool(n_jobs)
        imap = pool.imap

    else:

        feature_extraction = init_feature_extraction(experiment_dir)
        extract_one = functools.partial(helper_extract,
                                        file_finder=file_finder,
                                        experiment_dir=experiment_dir,
                                        feature_extraction=feature_extraction,
                                        normalization=normalization,
                                        robust=robust)
        imap = map


    for result in imap(extract_one, FileFinder.protocol_file_iter(
        protocol, extra_keys=['audio'])):
        if result is None:
            continue
        print(result)
예제 #25
0
def load_config(config_yml: Path,
                training: bool = False,
                config_default_module: Text = None,
                pretrained_config_yml: Path = None) -> Dict:
    """

    Returns
    -------
    config : Dict
        ['preprocessors']
        ['learning_rate']
        ['scheduler']
        ['get_optimizer']
        ['callbacks']
        ['feature_extraction']
        ['task']
        ['get_model_from_specs']
        ['model_resolution']
        ['model_alignment']
    """

    # load pretrained model configuration
    pretrained_cfg = dict()
    if pretrained_config_yml is not None:
        with open(pretrained_config_yml, 'r') as fp:
            pretrained_cfg = yaml.load(fp, Loader=yaml.SafeLoader)

    # load configuration or complain it's missing
    cfg = dict()
    if config_yml.exists():
        with open(config_yml, 'r') as fp:
            cfg = yaml.load(fp, Loader=yaml.SafeLoader)

        # backup user-provided config because it will be updated
        if pretrained_config_yml is not None:
            shutil.copy(config_yml, config_yml.parent / 'backup+config.yml')

    elif pretrained_config_yml is None:
        msg = f'{config_yml} configuration file is missing.'
        raise FileNotFoundError(msg)

    # override pretrained model config with user-provided config
    cfg = merge_cfg(pretrained_cfg, cfg)

    # save (updated) config to disk
    if pretrained_config_yml is not None:
        with open(config_yml, 'w') as fp:
            yaml.dump(cfg, fp, default_flow_style=False)

    # preprocessors
    preprocessors = dict()

    for key, preprocessor in cfg.get('preprocessors', {}).items():
        # preprocessors:
        #    key:
        #       name: package.module.ClassName
        #       params:
        #          param1: value1
        #          param2: value2
        if isinstance(preprocessor, dict):
            Klass = get_class_by_name(preprocessor['name'])
            preprocessors[key] = Klass(**preprocessor.get('params', {}))
            continue

        try:
            # preprocessors:
            #    key: /path/to/database.yml
            preprocessors[key] = FileFinder(preprocessor)

        except FileNotFoundError as e:
            # preprocessors:
            #    key: /path/to/{uri}.wav
            preprocessors[key] = preprocessor

    if 'audio' not in preprocessors:
        preprocessors['audio'] = FileFinder()

    if 'duration' not in preprocessors:
        preprocessors['duration'] = get_audio_duration

    cfg['preprocessors'] = preprocessors

    # scheduler
    SCHEDULER_DEFAULT = {
        'name': 'DavisKingScheduler',
        'params': {
            'learning_rate': 'auto'
        }
    }
    scheduler_cfg = cfg.get('scheduler', SCHEDULER_DEFAULT)
    Scheduler = get_class_by_name(
        scheduler_cfg['name'],
        default_module_name='pyannote.audio.train.schedulers')
    scheduler_params = scheduler_cfg.get('params', {})

    cfg['learning_rate'] = scheduler_params.pop('learning_rate', 'auto')
    cfg['scheduler'] = Scheduler(**scheduler_params)

    # optimizer
    OPTIMIZER_DEFAULT = {
        'name': 'SGD',
        'params': {
            'momentum': 0.9,
            'dampening': 0,
            'weight_decay': 0,
            'nesterov': True
        }
    }
    optimizer_cfg = cfg.get('optimizer', OPTIMIZER_DEFAULT)
    try:
        Optimizer = get_class_by_name(optimizer_cfg['name'],
                                      default_module_name='torch.optim')
        optimizer_params = optimizer_cfg.get('params', {})
        cfg['get_optimizer'] = partial(Optimizer, **optimizer_params)

    # do not raise an error here as it is possible that the optimizer is
    # not really needed (e.g. in pipeline training)
    except ModuleNotFoundError as e:
        warnings.warn(e.args[0])

    # data augmentation should only be active when training a model
    if training and 'data_augmentation' in cfg:
        DataAugmentation = get_class_by_name(
            cfg['data_augmentation']['name'],
            default_module_name='pyannote.audio.augmentation')
        augmentation = DataAugmentation(
            **cfg['data_augmentation'].get('params', {}))
    else:
        augmentation = None

    # custom callbacks
    callbacks = []
    for callback_config in cfg.get('callbacks', {}):
        Callback = get_class_by_name(callback_config['name'])
        callback = Callback(**callback_config.get('params', {}))
        callbacks.append(callback)
    cfg['callbacks'] = callbacks

    # feature extraction
    FEATURE_DEFAULT = {'name': 'RawAudio', 'params': {'sample_rate': 16000}}
    feature_cfg = cfg.get('feature_extraction', FEATURE_DEFAULT)
    FeatureExtraction = get_class_by_name(
        feature_cfg['name'], default_module_name='pyannote.audio.features')
    feature_params = feature_cfg.get('params', {})
    cfg['feature_extraction'] = FeatureExtraction(**feature_params,
                                                  augmentation=augmentation)

    # task
    if config_default_module is None:
        config_default_module = 'pyannote.audio.labeling.tasks'

    try:
        TaskClass = get_class_by_name(
            cfg['task']['name'], default_module_name=config_default_module)
    except AttributeError:
        TaskClass = get_class_by_name(
            cfg['task']['name'],
            default_module_name='pyannote.audio.embedding.approaches')

    cfg['task'] = TaskClass(**cfg['task'].get('params', {}))

    # architecture
    Architecture = get_class_by_name(
        cfg['architecture']['name'],
        default_module_name='pyannote.audio.models')
    params = cfg['architecture'].get('params', {})

    cfg['get_model_from_specs'] = partial(Architecture, **params)
    cfg['model_resolution'] = Architecture.get_resolution(**params)
    cfg['model_alignment'] = Architecture.get_alignment(**params)

    return cfg
예제 #26
0
    def apply(self,
              protocol_name: str,
              output_dir: Path,
              subset: Optional[str] = None):
        """Apply current best pipeline

        Parameters
        ----------
        protocol_name : `str`
            Name of pyannote.database protocol to process.
        subset : `str`, optional
            Subset to process. Defaults processing all subsets.
        """

        # file generator
        protocol = get_protocol(protocol_name,
                                progress=True,
                                preprocessors=self.preprocessors_)

        output_dir.mkdir(parents=True, exist_ok=False)
        if subset is None:
            path = output_dir / f'{protocol_name}.all.txt'
        else:
            path = output_dir / f'{protocol_name}.{subset}.txt'

        # initialize evaluation metric
        try:
            metric = self.pipeline_.get_metric()
        except NotImplementedError as e:
            metric = None
            losses = []

        with open(path, mode='w') as fp:

            if subset is None:
                files = FileFinder.protocol_file_iter(protocol)
            else:
                files = getattr(protocol, subset)()

            for current_file in files:
                output = self.pipeline_(current_file)

                # evaluate output
                if metric is None:
                    loss = self.pipeline_.loss(current_file, output)
                    losses.append(loss)

                else:
                    from pyannote.database import get_annotated
                    _ = metric(current_file['annotation'],
                               output,
                               uem=get_annotated(current_file))

                self.pipeline_.write(fp, output)

        # report evaluation metric
        if metric is None:
            loss = np.mean(losses)
            print(f'Loss = {loss:g}')
        else:
            _ = metric.report(display=True)