def test_load(sample): parser = UEMParser() timelines = parser.read(sample) timeline1 = timelines(uri="uri1") assert list(timeline1) == [Segment(1, 3.5), Segment(3, 7.5), Segment(6, 9)]
class EsterSpeakerDiarizationProtocol(SpeakerDiarizationProtocol): """Base speaker diarization protocol for ESTER database""" def __init__(self, preprocessors={}, **kwargs): super(EsterSpeakerDiarizationProtocol, self).__init__(preprocessors=preprocessors, **kwargs) self.uem_parser_ = UEMParser() self.mdtm_parser_ = MDTMParser() def _subset(self, protocol, subset): data_dir = op.join(op.dirname(op.realpath(__file__)), 'data') # load annotated parts # e.g. /data/{tv|radio|all}.{train|dev|test}.uem path = op.join( data_dir, '{protocol}.{subset}.uem'.format(subset=subset, protocol=protocol)) uems = self.uem_parser_.read(path) # load annotations path = op.join( data_dir, '{protocol}.{subset}.mdtm'.format(subset=subset, protocol=protocol)) mdtms = self.mdtm_parser_.read(path) for uri in sorted(uems.uris): annotated = uems(uri) annotation = mdtms(uri) current_file = { 'database': 'Ester', 'uri': uri, 'annotated': annotated, 'annotation': annotation } yield current_file
class EtapeSpeakerDiarizationProtocol(SpeakerDiarizationProtocol): """Base speaker diarization protocol for ETAPE database This class should be inherited from, not used directly. Parameters ---------- preprocessors : dict or (key, preprocessor) iterable When provided, each protocol item (dictionary) are preprocessed, such that item[key] = preprocessor(**item). In case 'preprocessor' is not callable, it should be a string containing placeholder for item keys (e.g. {'wav': '/path/to/{uri}.wav'}) """ def __init__(self, preprocessors={}, **kwargs): super(EtapeSpeakerDiarizationProtocol, self).__init__(preprocessors=preprocessors, **kwargs) self.uem_parser_ = UEMParser() self.mdtm_parser_ = MDTMParser() def _subset(self, protocol, subset): data_dir = op.join(op.dirname(op.realpath(__file__)), 'data') # load annotated parts # e.g. /data/{tv|radio|all}.{train|dev|test}.uem path = op.join( data_dir, '{protocol}.{subset}.uem'.format(subset=subset, protocol=protocol)) uems = self.uem_parser_.read(path) # load annotations path = op.join( data_dir, '{protocol}.{subset}.mdtm'.format(subset=subset, protocol=protocol)) mdtms = self.mdtm_parser_.read(path) for uri in sorted(uems.uris): annotated = uems(uri) annotation = mdtms(uri) current_file = { 'database': 'Etape', 'uri': uri, 'annotated': annotated, 'annotation': annotation } yield current_file