Python MDTMParser.MDTMParser Exemples, pyannote.parser.MDTMParser.MDTMParser Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : __init__.py Projet : langdoc/pyannote-db-ikdp

    def dev_iter(self):
        # here, you should do the same as above, but for the development set
        # absolute path to 'data' directory where annotations are stored

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        annotations = MDTMParser().read(op.join(data_dir,
                                                'protocol1.dev.mdtm'))

        # iterate over each file in training set
        for uri in sorted(annotations.uris):

            # get annotations as pyannote.core.Annotation instance
            annotation = annotations(uri)

            # `trn_iter` (as well as `dev_iter` and `tst_iter`) are expected
            # to yield dictionary with the following fields:
            yield {
                # name of the database class
                'database': 'ikdp',
                # unique file identifier
                'uri': uri,
                # reference as pyannote.core.Annotation instance
                'annotation': annotation
            }

Exemple #2

0

Afficher le fichier

    def tst_iter(self):
        # absolute path to 'data' directory where annotations are stored
        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        annotations = MDTMParser().read(
            op.join(data_dir, 'fullref.mdtm'))

        # iterate over each file in training set
        for uri in sorted(annotations.uris):

            # get annotations as pyannote.core.Annotation instance
            annotation = annotations(uri)

            # `trn_iter` (as well as `dev_iter` and `tst_iter`) are expected
            # to yield dictionary with the following fields:
            yield {
                # name of the database class
                'database': 'CallHome',
                # unique file identifier
                'uri': uri,
                # reference as pyannote.core.Annotation instance
                'annotation': annotation,
                # annotated part as pyannote.core.Timeline instance
                'annotated': annotation.get_timeline().extent()
            }

Exemple #3

0

Afficher le fichier

Fichier : __init__.py Projet : hbredin/pyannote-db-timit

    def dev_iter(self):
        # here, you should do the same as above, but for the development set
        # absolute path to 'data' directory where annotations are stored
        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        # in this example, we assume annotations are distributed in MDTM format.
        # this is obviously not mandatory but pyannote.parser conveniently
        # provides a built-in parser for MDTM files...
        annotations = MDTMParser().read(
            op.join(data_dir, 'TimitSpeakerVerificationProtocol.val.mdtm'))

        # iterate over each file in training set
        for uri in sorted(annotations.uris):

            # get annotations as pyannote.core.Annotation instance
            annotation = annotations(uri)

            # `trn_iter` (as well as `dev_iter` and `tst_iter`) are expected
            # to yield dictionary with the following fields:
            yield {
                # name of the database class
                'database': 'Timit',
                # unique file identifier
                'uri': uri,
                # reference as pyannote.core.Annotation instance
                'annotation': annotation
            }

Exemple #4

0

Afficher le fichier

Fichier : __init__.py Projet : langdoc/pyannote-db-ikdp

    def trn_iter(self):

        # absolute path to 'data' directory where annotations are stored

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        # in this example, we assume annotations are distributed in MDTM format.
        # this is obviously not mandatory but pyannote.parser conveniently
        # provides a built-in parser for MDTM files...
        # Niko's comment: This can be later tested with reading ELAN files
        # more directly with pympi
        annotations = MDTMParser().read(
            op.join(data_dir, 'protocol1.train.mdtm'))

        # iterate over each file in training set
        for uri in sorted(annotations.uris):

            # get annotations as pyannote.core.Annotation instance
            annotation = annotations(uri)

            # `trn_iter` (as well as `dev_iter` and `tst_iter`) are expected
            # to yield dictionary with the following fields:
            yield {
                # name of the database class
                'database': 'ikdp',
                # unique file identifier
                'uri': uri,
                # reference as pyannote.core.Annotation instance
                'annotation': annotation
            }

Exemple #5

0

Afficher le fichier

def test_load(sample):
    parser = MDTMParser()
    annotations = parser.read(sample)
    speech1 = annotations(uri="uri1", modality="speech")
    assert list(speech1.itertracks(label=True)) == [
        (Segment(1, 3.5), 0, 'alice'), (Segment(3, 7.5), 1, 'barbara'),
        (Segment(6, 9), 2, 'chris')
    ]

Exemple #6

0

Afficher le fichier

Fichier : pyannote_hmm.py Projet : AdolfVonKleist/pyannote-algorithms

def do_apply(model_pkl,
             features_pkl,
             hypothesis_mdtm,
             min_duration=0.250,
             constraint_mdtm=None):

    with open(model_pkl, 'rb') as f:
        hmm = pickle.load(f)

    hmm.min_duration = min_duration

    with open(features_pkl, 'rb') as f:
        features = pickle.load(f)

    constraint = None
    if constraint_mdtm:
        constraint = MDTMParser().read(constraint_mdtm)()

    hypothesis = hmm.apply(features, constraint=constraint)

    with open(hypothesis_mdtm, 'w') as f:
        MDTMParser().write(hypothesis, f=f)

Exemple #7

0

Afficher le fichier

        llss = []
        trials = getattr(protocol, '{subset}_trial'.format(subset=subset))()
        for current_trial in trials:
            reference = current_trial.pop('reference')
            hypothesis = speaker_spotting_try_diarization(current_trial)
            llss.append(process_trial(current_trial, hypothesis))

        import simplejson as json
        with open(output_file, 'w') as outfile:
            json.dump(llss, outfile)

    if arguments['automatic']:
        from pyannote.parser import MDTMParser
        diarization_mdtm = arguments['<diarization.mdtm>']
        parser = MDTMParser()
        annotations = parser.read(diarization_mdtm)

        REFERENCE = {}
        for uri_part in annotations.uris:
            uri = uri_part.split('_')[0] + '.Mix-Headset'
            if uri not in REFERENCE:
                REFERENCE[uri] = Annotation(uri=uri)
            REFERENCE[uri].update(annotations(uri=uri_part,
                                              modality="speaker"))

        llss = []
        trials = getattr(protocol, '{subset}_trial'.format(subset=subset))()

        for current_trial in trials:
            reference = current_trial.pop('reference')

Exemple #8

0

Afficher le fichier

import numpy as np

from pyannote.database import get_protocol, FileFinder
protocol = get_protocol('AMI.SpeakerSpotting.MixHeadset', progress=True)
from pyannote.core import Annotation, Segment, Timeline

REFERENCE = {}
for current_file in protocol.development():
    uri = current_file['uri']
    if uri not in REFERENCE:
        REFERENCE[uri] = Annotation(uri=uri)
    REFERENCE[uri].update(current_file['annotation'])

from pyannote.parser import MDTMParser
sad_dev = '/people/yin/projects/online_clustering/spotting/AMI.SpeakerSpotting.MixHeadset.development.mdtm'
parser_dev = MDTMParser()
annotations_dev = parser_dev.read(sad_dev)
SAD = {}
for item in protocol.development():
    uri = item['uri']
    SAD[uri] = annotations_dev(uri=uri,
                               modality="speaker").get_timeline().support()


class PyannoteFeatureExtractionError(Exception):
    pass


class Precomputed(object):
    """Load precomputed features from HDF5 file
    Parameters

Exemple #9

0

Afficher le fichier

Fichier : __init__.py Projet : Yanivs24/SpeechSegmentor

 def __init__(self, preprocessors={}, **kwargs):
     super(SwitchBoardSpeakerRecognitionProtocol,
           self).__init__(preprocessors=preprocessors, **kwargs)
     self.mdtm_parser_ = MDTMParser()

Exemple #10

0

Afficher le fichier

Fichier : bic_clustering.py Projet : zXpp/pyannote-audio

    def tune(self, protocol_name, subset='development'):

        tune_dir = self.TUNE_DIR.format(experiment_dir=self.experiment_dir,
                                        protocol=protocol_name,
                                        subset=subset)

        mkdir_p(tune_dir)

        tune_yml = self.TUNE_YML.format(tune_dir=tune_dir)
        tune_png = self.TUNE_PNG.format(tune_dir=tune_dir)

        protocol = get_protocol(protocol_name,
                                progress=False,
                                preprocessors=self.preprocessors_)

        items = list(getattr(protocol, subset)())

        # segmentation
        segmentation_mdtm = self.SEGMENTATION_MDTM.format(
            segmentation_dir=self.segmentation_dir_,
            protocol=protocol_name,
            subset=subset)
        parser = MDTMParser().read(segmentation_mdtm)
        segmentations = [parser(item['uri']) for item in items]

        # features
        features = [self.feature_extraction_(item) for item in items]

        n_jobs = min(cpu_count(), len(items))
        pool = Pool(n_jobs)

        print(n_jobs, 'jobs')

        def callback(res):

            # plot convergence
            import matplotlib
            matplotlib.use('Agg')
            import matplotlib.pyplot as plt
            import skopt.plots
            _ = skopt.plots.plot_convergence(res)
            plt.savefig(tune_png, dpi=75)
            plt.close()

            # save state
            params = {
                'status': {
                    'objective': float(res.fun)
                },
                'covariance_type': str(res.x[0]),
                'penalty_coef': float(res.x[1])
            }

            with io.open(tune_yml, 'w') as fp:
                yaml.dump(params, fp, default_flow_style=False)

        def objective_function(params):

            metric = GreedyDiarizationErrorRate()

            covariance_type, penalty_coef, = params
            process_one_file = functools.partial(
                helper_cluster_tune,
                metric=metric,
                covariance_type=covariance_type,
                penalty_coef=penalty_coef)

            if n_jobs > 1:
                results = list(
                    pool.map(process_one_file,
                             zip(items, segmentations, features)))
            else:
                results = [
                    process_one_file(isf)
                    for isf in zip(items, segmentations, features)
                ]

            return abs(metric)

        space = [
            skopt.space.Categorical(['full', 'diag']),
            skopt.space.Real(0., 5., prior='uniform')
        ]

        res = skopt.gp_minimize(objective_function,
                                space,
                                random_state=1337,
                                n_calls=20,
                                n_random_starts=10,
                                verbose=True,
                                callback=callback)

        return {'covariance_type': str(res.x[0])}, res.fun

Exemple #11

0

Afficher le fichier

Fichier : __init__.py Projet : Ruslanmlnkv/pyannote-db-odessa-ami

 def __init__(self, preprocessors={}, **kwargs):
     super(OdessaAMISpeakerDiarizationProtocol, self).__init__(
         preprocessors=preprocessors, **kwargs)
     self.mdtm_parser_ = MDTMParser()
     self.uem_parser_ = UEMParser()

Exemple #12

0

Afficher le fichier

Fichier : speaker_spotting_no_cluster_pyannote_sad_ivec_test.py Projet : twistedmove/speaker_spotting

import numpy as np

from pyannote.database import get_protocol, FileFinder
protocol = get_protocol('AMI.SpeakerSpotting.MixHeadset', progress=True)
from pyannote.core import Annotation,Segment, Timeline

REFERENCE = {}
for current_file in protocol.test():
    uri = current_file['uri']
    if uri not in REFERENCE:
        REFERENCE[uri] = Annotation(uri=uri)
    REFERENCE[uri].update(current_file['annotation'])
   
from pyannote.parser import MDTMParser
sad_tst = '/people/yin/projects/online_clustering/spotting_test/AMI.SpeakerSpotting.MixHeadset.test.mdtm'
parser_tst = MDTMParser()
annotations_tst = parser_tst.read(sad_tst)
SAD = {}
for item in protocol.test():
    uri = item['uri']
    SAD[uri] = annotations_tst(uri=uri, modality="speaker").get_timeline().support()

    
class PyannoteFeatureExtractionError(Exception):
    pass
class Precomputed(object):
    """Load precomputed features from HDF5 file
    Parameters
    ----------
    features_h5 : str
        Path to HDF5 file generated by script 'feature_extraction.py'.

Exemple #13

0

Afficher le fichier

    def apply(self, protocol_name, subset='test'):

        apply_dir = self.APPLY_DIR.format(tune_dir=self.tune_dir_)

        mkdir_p(apply_dir)

        # load tuning results
        tune_yml = self.TUNE_YML.format(tune_dir=self.tune_dir_)
        with io.open(tune_yml, 'r') as fp:
            self.tune_ = yaml.load(fp)

        # load model for epoch 'epoch'
        epoch = self.tune_['epoch']
        sequence_labeling = SequenceLabeling.from_disk(self.train_dir_, epoch)

        # initialize sequence labeling
        duration = self.config_['sequences']['duration']
        step = self.config_['sequences']['step']
        aggregation = SequenceLabelingAggregation(sequence_labeling,
                                                  self.feature_extraction_,
                                                  duration=duration,
                                                  step=step)

        # initialize protocol
        protocol = get_protocol(protocol_name,
                                progress=True,
                                preprocessors=self.preprocessors_)

        for i, item in enumerate(getattr(protocol, subset)()):

            prediction = aggregation.apply(item)

            if i == 0:
                # create metadata file at root that contains
                # sliding window and dimension information
                path = Precomputed.get_config_path(apply_dir)
                f = h5py.File(path)
                f.attrs['start'] = prediction.sliding_window.start
                f.attrs['duration'] = prediction.sliding_window.duration
                f.attrs['step'] = prediction.sliding_window.step
                f.attrs['dimension'] = 2
                f.close()

            path = Precomputed.get_path(apply_dir, item)

            # create parent directory
            mkdir_p(dirname(path))

            f = h5py.File(path)
            f.attrs['start'] = prediction.sliding_window.start
            f.attrs['duration'] = prediction.sliding_window.duration
            f.attrs['step'] = prediction.sliding_window.step
            f.attrs['dimension'] = 2
            f.create_dataset('features', data=prediction.data)
            f.close()

        # initialize binarizer
        onset = self.tune_['onset']
        offset = self.tune_['offset']
        binarize = Binarize(onset=onset, offset=offset)

        precomputed = Precomputed(root_dir=apply_dir)

        writer = MDTMParser()
        path = self.HARD_MDTM.format(apply_dir=apply_dir,
                                     protocol=protocol_name,
                                     subset=subset)
        with io.open(path, mode='w') as gp:
            for item in getattr(protocol, subset)():
                prediction = precomputed(item)
                segmentation = binarize.apply(prediction, dimension=1)
                writer.write(segmentation.to_annotation(),
                             f=gp,
                             uri=item['uri'],
                             modality='speaker')

Exemple #14

0

Afficher le fichier

Fichier : __init__.py Projet : Ruslanmlnkv/pyannote-db-etape

 def __init__(self, **kwargs):
     super(EtapeSpeakerDiarizationProtocol, self).__init__(**kwargs)
     self.uem_parser_ = UEMParser()
     self.mdtm_parser_ = MDTMParser()

Exemple #15

0

Afficher le fichier

Fichier : __init__.py Projet : dhillonr/db

 def __init__(self, preprocessors={}, **kwargs):
     super(GameOfThronesSpeakerDiarizationProtocol,
           self).__init__(preprocessors=preprocessors, **kwargs)
     self.mdtm_parser_ = MDTMParser()