Esempio n. 1
0
def test_load(sample):
    parser = UEMParser()
    timelines = parser.read(sample)
    timeline1 = timelines(uri="uri1")
    assert list(timeline1) == [Segment(1, 3.5),
                               Segment(3, 7.5),
                               Segment(6, 9)]
Esempio n. 2
0
class EsterSpeakerDiarizationProtocol(SpeakerDiarizationProtocol):
    """Base speaker diarization protocol for ESTER database"""
    def __init__(self, preprocessors={}, **kwargs):
        super(EsterSpeakerDiarizationProtocol,
              self).__init__(preprocessors=preprocessors, **kwargs)
        self.uem_parser_ = UEMParser()
        self.mdtm_parser_ = MDTMParser()

    def _subset(self, protocol, subset):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        # load annotated parts
        # e.g. /data/{tv|radio|all}.{train|dev|test}.uem
        path = op.join(
            data_dir, '{protocol}.{subset}.uem'.format(subset=subset,
                                                       protocol=protocol))
        uems = self.uem_parser_.read(path)

        # load annotations
        path = op.join(
            data_dir, '{protocol}.{subset}.mdtm'.format(subset=subset,
                                                        protocol=protocol))
        mdtms = self.mdtm_parser_.read(path)

        for uri in sorted(uems.uris):
            annotated = uems(uri)
            annotation = mdtms(uri)
            current_file = {
                'database': 'Ester',
                'uri': uri,
                'annotated': annotated,
                'annotation': annotation
            }
            yield current_file
Esempio n. 3
0
class EtapeSpeakerDiarizationProtocol(SpeakerDiarizationProtocol):
    """Base speaker diarization protocol for ETAPE database

    This class should be inherited from, not used directly.

    Parameters
    ----------
    preprocessors : dict or (key, preprocessor) iterable
        When provided, each protocol item (dictionary) are preprocessed, such
        that item[key] = preprocessor(**item). In case 'preprocessor' is not
        callable, it should be a string containing placeholder for item keys
        (e.g. {'wav': '/path/to/{uri}.wav'})
    """
    def __init__(self, preprocessors={}, **kwargs):
        super(EtapeSpeakerDiarizationProtocol,
              self).__init__(preprocessors=preprocessors, **kwargs)
        self.uem_parser_ = UEMParser()
        self.mdtm_parser_ = MDTMParser()

    def _subset(self, protocol, subset):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        # load annotated parts
        # e.g. /data/{tv|radio|all}.{train|dev|test}.uem
        path = op.join(
            data_dir, '{protocol}.{subset}.uem'.format(subset=subset,
                                                       protocol=protocol))
        uems = self.uem_parser_.read(path)

        # load annotations
        path = op.join(
            data_dir, '{protocol}.{subset}.mdtm'.format(subset=subset,
                                                        protocol=protocol))
        mdtms = self.mdtm_parser_.read(path)

        for uri in sorted(uems.uris):
            annotated = uems(uri)
            annotation = mdtms(uri)
            current_file = {
                'database': 'Etape',
                'uri': uri,
                'annotated': annotated,
                'annotation': annotation
            }
            yield current_file