예제 #1
0
def test_load(sample):
    parser = MDTMParser()
    annotations = parser.read(sample)
    speech1 = annotations(uri="uri1", modality="speech")
    assert list(speech1.itertracks(label=True)) == [
        (Segment(1, 3.5), 0, 'alice'),
        (Segment(3, 7.5), 1, 'barbara'),
        (Segment(6, 9), 2, 'chris') ]
예제 #2
0
def test_load(sample):
    parser = MDTMParser()
    annotations = parser.read(sample)
    speech1 = annotations(uri="uri1", modality="speech")
    assert list(speech1.itertracks(label=True)) == [
        (Segment(1, 3.5), 0, 'alice'), (Segment(3, 7.5), 1, 'barbara'),
        (Segment(6, 9), 2, 'chris')
    ]
예제 #3
0
    def dev_iter(self):
        # here, you should do the same as above, but for the development set
        # absolute path to 'data' directory where annotations are stored

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        annotations = MDTMParser().read(op.join(data_dir,
                                                'protocol1.dev.mdtm'))

        # iterate over each file in training set
        for uri in sorted(annotations.uris):

            # get annotations as pyannote.core.Annotation instance
            annotation = annotations(uri)

            # `trn_iter` (as well as `dev_iter` and `tst_iter`) are expected
            # to yield dictionary with the following fields:
            yield {
                # name of the database class
                'database': 'ikdp',
                # unique file identifier
                'uri': uri,
                # reference as pyannote.core.Annotation instance
                'annotation': annotation
            }
예제 #4
0
    def trn_iter(self):

        # absolute path to 'data' directory where annotations are stored

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        # in this example, we assume annotations are distributed in MDTM format.
        # this is obviously not mandatory but pyannote.parser conveniently
        # provides a built-in parser for MDTM files...
        # Niko's comment: This can be later tested with reading ELAN files
        # more directly with pympi
        annotations = MDTMParser().read(
            op.join(data_dir, 'protocol1.train.mdtm'))

        # iterate over each file in training set
        for uri in sorted(annotations.uris):

            # get annotations as pyannote.core.Annotation instance
            annotation = annotations(uri)

            # `trn_iter` (as well as `dev_iter` and `tst_iter`) are expected
            # to yield dictionary with the following fields:
            yield {
                # name of the database class
                'database': 'ikdp',
                # unique file identifier
                'uri': uri,
                # reference as pyannote.core.Annotation instance
                'annotation': annotation
            }
예제 #5
0
class EsterSpeakerDiarizationProtocol(SpeakerDiarizationProtocol):
    """Base speaker diarization protocol for ESTER database"""
    def __init__(self, preprocessors={}, **kwargs):
        super(EsterSpeakerDiarizationProtocol,
              self).__init__(preprocessors=preprocessors, **kwargs)
        self.uem_parser_ = UEMParser()
        self.mdtm_parser_ = MDTMParser()

    def _subset(self, protocol, subset):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        # load annotated parts
        # e.g. /data/{tv|radio|all}.{train|dev|test}.uem
        path = op.join(
            data_dir, '{protocol}.{subset}.uem'.format(subset=subset,
                                                       protocol=protocol))
        uems = self.uem_parser_.read(path)

        # load annotations
        path = op.join(
            data_dir, '{protocol}.{subset}.mdtm'.format(subset=subset,
                                                        protocol=protocol))
        mdtms = self.mdtm_parser_.read(path)

        for uri in sorted(uems.uris):
            annotated = uems(uri)
            annotation = mdtms(uri)
            current_file = {
                'database': 'Ester',
                'uri': uri,
                'annotated': annotated,
                'annotation': annotation
            }
            yield current_file
예제 #6
0
class LibriSpeechSpeakerRecognitionProtocol(SpeakerDiarizationProtocol):
    """My first speaker diarization protocol """
    def __init__(self, preprocessors={}, **kwargs):
        super(LibriSpeechSpeakerRecognitionProtocol,
              self).__init__(preprocessors=preprocessors, **kwargs)
        self.mdtm_parser_ = MDTMParser()

    def _subset(self, protocol, subset):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        # load annotations
        path = op.join(
            data_dir,
            'librispeech-{protocol}.{subset}.mdtm'.format(subset=subset,
                                                          protocol=protocol))
        mdtms = self.mdtm_parser_.read(path)

        for uri in sorted(mdtms.uris):
            annotation = mdtms(uri)
            current_file = {
                'database':
                'LibriSpeech',
                'uri':
                uri,
                'annotation':
                annotation,
                # annotated part as pyannote.core.Timeline instance
                'annotated':
                Timeline(uri=uri,
                         segments=[annotation.get_timeline().extent()])
            }

            yield current_file
예제 #7
0
    def tst_iter(self):
        # absolute path to 'data' directory where annotations are stored
        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        annotations = MDTMParser().read(
            op.join(data_dir, 'fullref.mdtm'))

        # iterate over each file in training set
        for uri in sorted(annotations.uris):

            # get annotations as pyannote.core.Annotation instance
            annotation = annotations(uri)

            # `trn_iter` (as well as `dev_iter` and `tst_iter`) are expected
            # to yield dictionary with the following fields:
            yield {
                # name of the database class
                'database': 'CallHome',
                # unique file identifier
                'uri': uri,
                # reference as pyannote.core.Annotation instance
                'annotation': annotation,
                # annotated part as pyannote.core.Timeline instance
                'annotated': annotation.get_timeline().extent()
            }
예제 #8
0
class SwitchBoardSpeakerRecognitionProtocol(SpeakerDiarizationProtocol):
    """My first speaker diarization protocol """
    def __init__(self, preprocessors={}, **kwargs):
        super(SwitchBoardSpeakerRecognitionProtocol,
              self).__init__(preprocessors=preprocessors, **kwargs)
        self.mdtm_parser_ = MDTMParser()

    def _subset(self, protocol, subset):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        # load annotations
        path = op.join(
            data_dir,
            'switchboard-{protocol}.{subset}.mdtm'.format(subset=subset,
                                                          protocol=protocol))
        mdtms = self.mdtm_parser_.read(path)

        for uri in sorted(mdtms.uris):
            annotation = mdtms(uri)
            current_file = {
                'database': 'SwitchBoard',
                'uri': uri,
                'annotation': annotation
            }
            yield current_file
예제 #9
0
    def dev_iter(self):
        # here, you should do the same as above, but for the development set
        # absolute path to 'data' directory where annotations are stored
        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        # in this example, we assume annotations are distributed in MDTM format.
        # this is obviously not mandatory but pyannote.parser conveniently
        # provides a built-in parser for MDTM files...
        annotations = MDTMParser().read(
            op.join(data_dir, 'TimitSpeakerVerificationProtocol.val.mdtm'))

        # iterate over each file in training set
        for uri in sorted(annotations.uris):

            # get annotations as pyannote.core.Annotation instance
            annotation = annotations(uri)

            # `trn_iter` (as well as `dev_iter` and `tst_iter`) are expected
            # to yield dictionary with the following fields:
            yield {
                # name of the database class
                'database': 'Timit',
                # unique file identifier
                'uri': uri,
                # reference as pyannote.core.Annotation instance
                'annotation': annotation
            }
def do_apply(model_pkl,
             features_pkl,
             hypothesis_mdtm,
             min_duration=0.250,
             constraint_mdtm=None):

    with open(model_pkl, 'rb') as f:
        hmm = pickle.load(f)

    hmm.min_duration = min_duration

    with open(features_pkl, 'rb') as f:
        features = pickle.load(f)

    constraint = None
    if constraint_mdtm:
        constraint = MDTMParser().read(constraint_mdtm)()

    hypothesis = hmm.apply(features, constraint=constraint)

    with open(hypothesis_mdtm, 'w') as f:
        MDTMParser().write(hypothesis, f=f)
예제 #11
0
class EtapeSpeakerDiarizationProtocol(SpeakerDiarizationProtocol):
    """Base speaker diarization protocol for ETAPE database

    This class should be inherited from, not used directly.

    Parameters
    ----------
    preprocessors : dict or (key, preprocessor) iterable
        When provided, each protocol item (dictionary) are preprocessed, such
        that item[key] = preprocessor(**item). In case 'preprocessor' is not
        callable, it should be a string containing placeholder for item keys
        (e.g. {'wav': '/path/to/{uri}.wav'})
    """
    def __init__(self, preprocessors={}, **kwargs):
        super(EtapeSpeakerDiarizationProtocol,
              self).__init__(preprocessors=preprocessors, **kwargs)
        self.uem_parser_ = UEMParser()
        self.mdtm_parser_ = MDTMParser()

    def _subset(self, protocol, subset):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        # load annotated parts
        # e.g. /data/{tv|radio|all}.{train|dev|test}.uem
        path = op.join(
            data_dir, '{protocol}.{subset}.uem'.format(subset=subset,
                                                       protocol=protocol))
        uems = self.uem_parser_.read(path)

        # load annotations
        path = op.join(
            data_dir, '{protocol}.{subset}.mdtm'.format(subset=subset,
                                                        protocol=protocol))
        mdtms = self.mdtm_parser_.read(path)

        for uri in sorted(uems.uris):
            annotated = uems(uri)
            annotation = mdtms(uri)
            current_file = {
                'database': 'Etape',
                'uri': uri,
                'annotated': annotated,
                'annotation': annotation
            }
            yield current_file
예제 #12
0
        llss = []
        trials = getattr(protocol, '{subset}_trial'.format(subset=subset))()
        for current_trial in trials:
            reference = current_trial.pop('reference')
            hypothesis = speaker_spotting_try_diarization(current_trial)
            llss.append(process_trial(current_trial, hypothesis))

        import simplejson as json
        with open(output_file, 'w') as outfile:
            json.dump(llss, outfile)

    if arguments['automatic']:
        from pyannote.parser import MDTMParser
        diarization_mdtm = arguments['<diarization.mdtm>']
        parser = MDTMParser()
        annotations = parser.read(diarization_mdtm)

        REFERENCE = {}
        for uri_part in annotations.uris:
            uri = uri_part.split('_')[0] + '.Mix-Headset'
            if uri not in REFERENCE:
                REFERENCE[uri] = Annotation(uri=uri)
            REFERENCE[uri].update(annotations(uri=uri_part,
                                              modality="speaker"))

        llss = []
        trials = getattr(protocol, '{subset}_trial'.format(subset=subset))()

        for current_trial in trials:
            reference = current_trial.pop('reference')
예제 #13
0
파일: __init__.py 프로젝트: dhillonr/db
 def __init__(self, preprocessors={}, **kwargs):
     super(GameOfThronesSpeakerDiarizationProtocol,
           self).__init__(preprocessors=preprocessors, **kwargs)
     self.mdtm_parser_ = MDTMParser()
예제 #14
0
 def __init__(self, preprocessors={}, **kwargs):
     super(SwitchBoardSpeakerRecognitionProtocol,
           self).__init__(preprocessors=preprocessors, **kwargs)
     self.mdtm_parser_ = MDTMParser()
예제 #15
0
import numpy as np

from pyannote.database import get_protocol, FileFinder
protocol = get_protocol('AMI.SpeakerSpotting.MixHeadset', progress=True)
from pyannote.core import Annotation, Segment, Timeline

REFERENCE = {}
for current_file in protocol.development():
    uri = current_file['uri']
    if uri not in REFERENCE:
        REFERENCE[uri] = Annotation(uri=uri)
    REFERENCE[uri].update(current_file['annotation'])

from pyannote.parser import MDTMParser
sad_dev = '/people/yin/projects/online_clustering/spotting/AMI.SpeakerSpotting.MixHeadset.development.mdtm'
parser_dev = MDTMParser()
annotations_dev = parser_dev.read(sad_dev)
SAD = {}
for item in protocol.development():
    uri = item['uri']
    SAD[uri] = annotations_dev(uri=uri,
                               modality="speaker").get_timeline().support()


class PyannoteFeatureExtractionError(Exception):
    pass


class Precomputed(object):
    """Load precomputed features from HDF5 file
    Parameters
        llss = []
        trials = getattr(protocol, '{subset}_trial'.format(subset=subset))()
        for current_trial in trials:
            reference = current_trial.pop('reference')
            hypothesis = speaker_spotting_try_diarization(current_trial)
            llss.append(process_trial(current_trial, hypothesis))

        import simplejson as json
        with open(output_file, 'w') as outfile:  
            json.dump(llss, outfile)

    if arguments['automatic']:
        from pyannote.parser import MDTMParser
        diarization_mdtm = arguments['<diarization.mdtm>']
        parser = MDTMParser()
        annotations = parser.read(diarization_mdtm)

        REFERENCE = {}
        for uri_part in annotations.uris:
            uri = uri_part.split('_')[0] + '.Mix-Headset'
            if uri not in REFERENCE:
                REFERENCE[uri] = Annotation(uri=uri)
            REFERENCE[uri].update(annotations(uri=uri_part, modality="speaker"))

        llss = []
        trials = getattr(protocol, '{subset}_trial'.format(subset=subset))()

        for current_trial in trials:
            reference = current_trial.pop('reference')
            hypothesis = speaker_spotting_try_diarization(current_trial)
예제 #17
0
 def __init__(self, **kwargs):
     super(EtapeSpeakerDiarizationProtocol, self).__init__(**kwargs)
     self.uem_parser_ = UEMParser()
     self.mdtm_parser_ = MDTMParser()
# enrolment consists in summing all relevant embeddings
def speaker_spotting_enrol(current_enrolment):
    enrol_with = current_enrolment['enrol_with']
    embeddings = precomputed(current_enrolment)
    return np.sum(embeddings.crop(enrol_with), axis=0, keepdims=True)


models = {}
for current_enrolment in protocol.development_enrolment():
    model_id = current_enrolment.pop('model_id')
    models[model_id] = speaker_spotting_enrol(current_enrolment)

from pyannote.parser import MDTMParser
cluster_mdtm = '/people/yin/projects/online_clustering/spotting/EURECOM-online-diarization-pyannote-VAD.dev.WithOffset.mdtm'
parser_dev = MDTMParser()
annotations_dev = parser_dev.read(cluster_mdtm)

REFERENCE = {}
for uri_part in annotations_dev.uris:
    uri = uri_part.split('_')[0] + '.Mix-Headset'
    if uri not in REFERENCE:
        REFERENCE[uri] = Annotation(uri=uri)
    REFERENCE[uri].update(annotations_dev(uri=uri_part, modality="speaker"))

# Trials

from pyannote.core import SlidingWindow, SlidingWindowFeature
from pyannote.audio.embedding.utils import cdist

예제 #19
0
    def tune(self, protocol_name, subset='development'):

        tune_dir = self.TUNE_DIR.format(experiment_dir=self.experiment_dir,
                                        protocol=protocol_name,
                                        subset=subset)

        mkdir_p(tune_dir)

        tune_yml = self.TUNE_YML.format(tune_dir=tune_dir)
        tune_png = self.TUNE_PNG.format(tune_dir=tune_dir)

        protocol = get_protocol(protocol_name,
                                progress=False,
                                preprocessors=self.preprocessors_)

        items = list(getattr(protocol, subset)())

        # segmentation
        segmentation_mdtm = self.SEGMENTATION_MDTM.format(
            segmentation_dir=self.segmentation_dir_,
            protocol=protocol_name,
            subset=subset)
        parser = MDTMParser().read(segmentation_mdtm)
        segmentations = [parser(item['uri']) for item in items]

        # features
        features = [self.feature_extraction_(item) for item in items]

        n_jobs = min(cpu_count(), len(items))
        pool = Pool(n_jobs)

        print(n_jobs, 'jobs')

        def callback(res):

            # plot convergence
            import matplotlib
            matplotlib.use('Agg')
            import matplotlib.pyplot as plt
            import skopt.plots
            _ = skopt.plots.plot_convergence(res)
            plt.savefig(tune_png, dpi=75)
            plt.close()

            # save state
            params = {
                'status': {
                    'objective': float(res.fun)
                },
                'covariance_type': str(res.x[0]),
                'penalty_coef': float(res.x[1])
            }

            with io.open(tune_yml, 'w') as fp:
                yaml.dump(params, fp, default_flow_style=False)

        def objective_function(params):

            metric = GreedyDiarizationErrorRate()

            covariance_type, penalty_coef, = params
            process_one_file = functools.partial(
                helper_cluster_tune,
                metric=metric,
                covariance_type=covariance_type,
                penalty_coef=penalty_coef)

            if n_jobs > 1:
                results = list(
                    pool.map(process_one_file,
                             zip(items, segmentations, features)))
            else:
                results = [
                    process_one_file(isf)
                    for isf in zip(items, segmentations, features)
                ]

            return abs(metric)

        space = [
            skopt.space.Categorical(['full', 'diag']),
            skopt.space.Real(0., 5., prior='uniform')
        ]

        res = skopt.gp_minimize(objective_function,
                                space,
                                random_state=1337,
                                n_calls=20,
                                n_random_starts=10,
                                verbose=True,
                                callback=callback)

        return {'covariance_type': str(res.x[0])}, res.fun
예제 #20
0
# enrolment consists in summing all relevant embeddings
def speaker_spotting_enrol(current_enrolment):
    enrol_with = current_enrolment['enrol_with']
    embeddings = precomputed(current_enrolment)
    return np.sum(embeddings.crop(enrol_with), axis=0, keepdims=True)


models = {}
for current_enrolment in protocol.test_enrolment():
    model_id = current_enrolment.pop('model_id')
    models[model_id] = speaker_spotting_enrol(current_enrolment)

from pyannote.parser import MDTMParser

cluster_mdtm = 'OD_AVAD_tst.mdtm'
parser_tst = MDTMParser()
annotations_tst = parser_tst.read(cluster_mdtm)

REFERENCE = {}
for uri_part in annotations_tst.uris:
    uri = uri_part.split('_')[0] + '.Mix-Headset'
    if uri not in REFERENCE:
        REFERENCE[uri] = Annotation(uri=uri)
    REFERENCE[uri].update(annotations_tst(uri=uri_part, modality="speaker"))

# Trials

from pyannote.core import SlidingWindow, SlidingWindowFeature
from pyannote.audio.embedding.utils import cdist

예제 #21
0
 def __init__(self, preprocessors={}, **kwargs):
     super(OdessaAMISpeakerDiarizationProtocol, self).__init__(
         preprocessors=preprocessors, **kwargs)
     self.mdtm_parser_ = MDTMParser()
     self.uem_parser_ = UEMParser()
import numpy as np

from pyannote.database import get_protocol, FileFinder
protocol = get_protocol('AMI.SpeakerSpotting.MixHeadset', progress=True)
from pyannote.core import Annotation,Segment, Timeline

REFERENCE = {}
for current_file in protocol.test():
    uri = current_file['uri']
    if uri not in REFERENCE:
        REFERENCE[uri] = Annotation(uri=uri)
    REFERENCE[uri].update(current_file['annotation'])
   
from pyannote.parser import MDTMParser
sad_tst = '/people/yin/projects/online_clustering/spotting_test/AMI.SpeakerSpotting.MixHeadset.test.mdtm'
parser_tst = MDTMParser()
annotations_tst = parser_tst.read(sad_tst)
SAD = {}
for item in protocol.test():
    uri = item['uri']
    SAD[uri] = annotations_tst(uri=uri, modality="speaker").get_timeline().support()

    
class PyannoteFeatureExtractionError(Exception):
    pass
class Precomputed(object):
    """Load precomputed features from HDF5 file
    Parameters
    ----------
    features_h5 : str
        Path to HDF5 file generated by script 'feature_extraction.py'.
예제 #23
0
    def apply(self, protocol_name, subset='test'):

        apply_dir = self.APPLY_DIR.format(tune_dir=self.tune_dir_)

        mkdir_p(apply_dir)

        # load tuning results
        tune_yml = self.TUNE_YML.format(tune_dir=self.tune_dir_)
        with io.open(tune_yml, 'r') as fp:
            self.tune_ = yaml.load(fp)

        # load model for epoch 'epoch'
        epoch = self.tune_['epoch']
        sequence_labeling = SequenceLabeling.from_disk(self.train_dir_, epoch)

        # initialize sequence labeling
        duration = self.config_['sequences']['duration']
        step = self.config_['sequences']['step']
        aggregation = SequenceLabelingAggregation(sequence_labeling,
                                                  self.feature_extraction_,
                                                  duration=duration,
                                                  step=step)

        # initialize protocol
        protocol = get_protocol(protocol_name,
                                progress=True,
                                preprocessors=self.preprocessors_)

        for i, item in enumerate(getattr(protocol, subset)()):

            prediction = aggregation.apply(item)

            if i == 0:
                # create metadata file at root that contains
                # sliding window and dimension information
                path = Precomputed.get_config_path(apply_dir)
                f = h5py.File(path)
                f.attrs['start'] = prediction.sliding_window.start
                f.attrs['duration'] = prediction.sliding_window.duration
                f.attrs['step'] = prediction.sliding_window.step
                f.attrs['dimension'] = 2
                f.close()

            path = Precomputed.get_path(apply_dir, item)

            # create parent directory
            mkdir_p(dirname(path))

            f = h5py.File(path)
            f.attrs['start'] = prediction.sliding_window.start
            f.attrs['duration'] = prediction.sliding_window.duration
            f.attrs['step'] = prediction.sliding_window.step
            f.attrs['dimension'] = 2
            f.create_dataset('features', data=prediction.data)
            f.close()

        # initialize binarizer
        onset = self.tune_['onset']
        offset = self.tune_['offset']
        binarize = Binarize(onset=onset, offset=offset)

        precomputed = Precomputed(root_dir=apply_dir)

        writer = MDTMParser()
        path = self.HARD_MDTM.format(apply_dir=apply_dir,
                                     protocol=protocol_name,
                                     subset=subset)
        with io.open(path, mode='w') as gp:
            for item in getattr(protocol, subset)():
                prediction = precomputed(item)
                segmentation = binarize.apply(prediction, dimension=1)
                writer.write(segmentation.to_annotation(),
                             f=gp,
                             uri=item['uri'],
                             modality='speaker')
videos = [line.strip() for line in f.readlines()]
f.close()

# standard condition
standard_condition = UEMParser("data/standard_condition.uem")

# annotated frames
annotated_frames = UEMParser("data/annotated_frames.uem")

# list of anchors
f = open("data/anchors.txt", "r")
anchors = [line.strip() for line in f.readlines()]
f.close()

# manual speaker identification
manual_speaker_identification = MDTMParser("data/manual_speaker.mdtm", \
                                           multitrack=True)

# --------------------------------------------------
# LOAD MONOMODAL COMPONENTS OUTPUT ON TEST SET
# as described in Section "2. Monomodal Components"
# --------------------------------------------------

# automatic speaker diarization
auto_speaker_diarization = MDTMParser("data/auto_speaker_diarization.mdtm", \
                                      multitrack=True)

# automatic speaker identification
auto_speaker_identification = \
                     REPEREParser("data/auto_speaker_identification.repere", \
                                  multitrack=True, confidence=False)