Ejemplo n.º 1
0
def test_positional_dissimilarity_figure20_scale_effect():
    pos_dis = PositionalDissimilarity(delta_empty=1.0)
    unit_align_a = UnitaryAlignment(
        (("pierrot", Unit(Segment(0, 7))), ("liza", Unit(Segment(0, 10)))))
    unit_align_b = UnitaryAlignment(
        (("pierrot", Unit(Segment(0, 21))), ("liza", Unit(Segment(0, 30)))))

    assert (unit_align_a.compute_disorder(pos_dis) ==
            unit_align_b.compute_disorder(pos_dis))
Ejemplo n.º 2
0
def test_relabel_tracks(annotation):
    actual = annotation.relabel_tracks()
    expected = Annotation(uri='TheBigBangTheory.Season01.Episode01',
                          modality='speaker')
    expected[Segment(3, 5), '_'] = 'A'
    expected[Segment(5.5, 7), '_', ] = 'B'
    expected[Segment(8, 10), '_'] = 'C'
    expected[Segment(8, 10), 'anything'] = 'D'
    assert actual == expected
Ejemplo n.º 3
0
def test_support(timeline):
    # No collar (default).
    assert list(timeline.support()) == [Segment(0.5, 4),
                                        Segment(5, 8),
                                        Segment(8.5, 10)]

    # Collar of 600 ms.
    assert list(timeline.support(.600)) == [Segment(0.5, 4),
                                            Segment(5, 10)]
Ejemplo n.º 4
0
def annotation():
    annotation = Annotation(uri='TheBigBangTheory.Season01.Episode01',
                            modality='speaker')
    annotation[Segment(3, 5), '_'] = 'Penny'
    annotation[Segment(5.5, 7), '_'] = 'Leonard'
    annotation[Segment(8, 10), '_'] = 'Penny'
    annotation[Segment(8, 10), 'anything'] = 'Sheldon'

    return annotation
Ejemplo n.º 5
0
def test_crop_loose(annotation):
    expected = Annotation(
        uri='TheBigBangTheory.Season01.Episode01',
        modality='speaker')
    expected[Segment(5.5, 7), '_'] = 'Leonard'
    expected[Segment(8, 10), '_'] = 'Penny'
    expected[Segment(8, 10), 'anything'] = 'Sheldon'
    actual = annotation.crop(Segment(5, 9), mode='loose')
    assert actual == expected, str(actual)
Ejemplo n.º 6
0
def test_support(annotation):
    actual = annotation.support(collar=3.5)
    expected = Annotation(
            uri='TheBigBangTheory.Season01.Episode01',
            modality='speaker')
    expected[Segment(3, 10), 'B'] = 'Penny'
    expected[Segment(5.5, 7), 'A'] = 'Leonard'
    expected[Segment(8, 10), 'C'] = 'Sheldon'
    assert actual == expected
Ejemplo n.º 7
0
def test_load(sample):
    parser = JSONParser()
    annotations = parser.read(sample)
    speech1 = annotations(uri="uri1", modality="speech")
    assert list(speech1.itertracks(label=True)) == [
        (Segment(1, 3.5), 'track1', 'alice'),
        (Segment(3, 7.5), 'track2', 'barbara'),
        (Segment(6, 9), 'track3', 'chris')
    ]
Ejemplo n.º 8
0
    def extrude(self, uem, reference, collar=0.0, skip_overlap=False):
        """Extrude reference boundary collars from uem

        reference     |----|     |--------------|       |-------------|
        uem       |---------------------|    |-------------------------------|
        extruded  |--| |--| |---| |-----|    |-| |-----| |-----------| |-----|

        Parameters
        ----------
        uem : Timeline
            Evaluation map.
        reference : Annotation
            Reference annotation.
        collar : float, optional
            When provided, set the duration of collars centered around
            reference segment boundaries that are extruded from both reference
            and hypothesis. Defaults to 0. (i.e. no collar).
        skip_overlap : bool, optional
            Set to True to not evaluate overlap regions.
            Defaults to False (i.e. keep overlap regions).

        Returns
        -------
        extruded_uem : Timeline
        """

        if collar == 0. and not skip_overlap:
            return uem

        collars, overlap_regions = [], []

        # build list of collars if needed
        if collar > 0.:
            # iterate over all segments in reference
            for segment in reference.itersegments():

                # add collar centered on start time
                t = segment.start
                collars.append(Segment(t - .5 * collar, t + .5 * collar))

                # add collar centered on end time
                t = segment.end
                collars.append(Segment(t - .5 * collar, t + .5 * collar))

        # build list of overlap regions if needed
        if skip_overlap:
            # iterate over pair of intersecting segments
            for (segment1, track1), (segment2,
                                     track2) in reference.co_iter(reference):
                if segment1 == segment2 and track1 == track2:
                    continue
                # add their intersection
                overlap_regions.append(segment1 & segment2)

        segments = collars + overlap_regions

        return Timeline(segments=segments).support().gaps(support=uem)
Ejemplo n.º 9
0
def test_get_track_scores(scores):

    assert scores.get_track_scores(Segment(0, 2.5), 'track') == {
        'A': 0.2,
        'B': 0.3,
        'C': 0.4,
        'D': 0.1
    }

    assert np.isnan(scores.get_track_scores(Segment(3, 4), 'other_track')['A'])
Ejemplo n.º 10
0
def test_intersection():

    segment = Segment(start=1, end=9)
    other_segment = Segment(4, 13)

    assert segment.intersects(other_segment)
    assert segment & other_segment == Segment(4, 9)

    other_segment = Segment(13, 20)

    assert not segment.intersects(other_segment)
Ejemplo n.º 11
0
def random_subsegment(segment: Segment,
                      duration: float,
                      min_duration: Optional[float] = None) -> \
                                                            Iterator[Segment]:
    """Pick a subsegment at random

    Parameters
    ----------
    segment : Segment
    duration : float
        Duration of random subsegment
    min_duration : float, optional
        When provided, choose segment duration at random between `min_duration`
        and `duration` (instead of fixed `duration`).

    Yields
    ------
    segment : `Segment`

    Usage
    -----
    >>> for subsegment in random_subsegment(segment, duration):
    >>> ... # do something with subsegment
    >>> ... pass

    >>> generator = random_subsegment(segment, duration)
    >>> subsegment = next(generator)
    """
    if min_duration is None:

        if duration > segment.duration:
            msg = (f'`duration` (= {duration:g}) should be smaller '
                   f'than `segment` duration (= {segment.duration:g}).')
            raise ValueError(msg)

        while True:
            # draw start time from [segment.start, segment.end - duration]
            t = segment.start + \
                np.random.random() * (segment.duration - duration)
            yield Segment(t, t + duration)

    else:
        # make sure max duration is smaller than actual segment duration
        max_duration = min(segment.duration, duration)

        while True:
            # draw duration from [min_duration, max_duration] interval
            rnd_duration = min_duration + \
                           np.random.random() * (max_duration - min_duration)

            # draw start from [segment.start, segment.end - rnd_duration] interval
            t = segment.start + np.random.random() * (segment.duration -
                                                      rnd_duration)
            yield Segment(t, t + rnd_duration)
Ejemplo n.º 12
0
def test_del(scores):
    segment, track = Segment(0., 2.5), 'track'
    del scores[segment, track]
    assert not scores.has_track(segment, track)
    assert not segment in scores

    segment, track = Segment(3., 4.), 'other_track'
    del scores[segment, track]
    assert not scores.has_track(segment, track)
    assert segment in scores
    segment, track = Segment(3., 4.), 'track'
    assert scores.has_track(segment, track)
Ejemplo n.º 13
0
 def compute_gamma(self, ref_tg: TextGrid,
                   target_tg: TextGrid) -> Optional[float]:
     continuum = Continuum()
     for annot in ref_tg.getFirst(self.name):
         continuum.add("ref", Segment(annot.minTime, annot.maxTime))
     for annot in target_tg.getFirst(self.name):
         continuum.add("target", Segment(annot.minTime, annot.maxTime))
     dissim = PositionalSporadicDissimilarity(delta_empty=1)
     gamma_results = continuum.compute_gamma(dissim,
                                             n_samples=10,
                                             precision_level="medium")
     return gamma_results.gamma
Ejemplo n.º 14
0
 def compute_gamma(self, ref_tg: TextGrid, target_tg: TextGrid):
     continuum = Continuum()
     for annot in ref_tg.getFirst(self.name):
         continuum.add("ref", Segment(annot.minTime, annot.maxTime),
                       annot.mark)
     for annot in target_tg.getFirst(self.name):
         continuum.add("target", Segment(annot.minTime, annot.maxTime),
                       annot.mark)
     dissim = CombinedCategoricalDissimilarity(alpha=1, beta=1)
     gamma_results = continuum.compute_gamma(dissim,
                                             n_samples=10,
                                             precision_level="medium")
     return gamma_results.gamma
Ejemplo n.º 15
0
def test_unitary_alignment():
    categories = ['Carol', 'Bob', 'Alice', 'Jeremy']
    cat = np.array([[0, 0.5, 0.3, 0.7], [0.5, 0., 0.6, 0.4],
                    [0.3, 0.6, 0., 0.7], [0.7, 0.4, 0.7, 0.]])
    combi_dis = CombinedCategoricalDissimilarity(categories=categories,
                                                 delta_empty=0.5,
                                                 cat_dissimilarity_matrix=cat,
                                                 alpha=1)
    n_tuple = (('liza', Unit(Segment(12, 18), "Carol")),
               ('pierrot', Unit(Segment(12, 18), "Alice")), ('hadrien', None))
    unitary_alignment = UnitaryAlignment(n_tuple)

    assert (unitary_alignment.compute_disorder(combi_dis) == pytest.approx(
        0.383, 0.001))
Ejemplo n.º 16
0
    def _subset(self, subset):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')

        # load annotations
        #path = op.join(data_dir, '{protocol}.{subset}.mdtm'.format(subset=subset, protocol=protocol))
        #rttms = self.rttm_parser_.read(path)

        path = op.join(data_dir, '{subset}.lst'.format(subset=subset))
        with open(path) as f:
            uris = f.readlines()
        uris = [x.strip() for x in uris]

        rttms = {}
        for file in listdir(op.join(data_dir, 'rttm', subset)):
            if subset == 'trn':
                rttm = read_rttm_file_trn(
                    op.join(data_dir, 'rttm', subset, file))
            else:
                rttm = read_rttm_file_other(
                    op.join(data_dir, 'rttm', subset, file))
            uri = rttm['uri'].iloc[0]
            annotation = Annotation(uri=uri)
            for index, row in rttm.iterrows():
                annotation[Segment(
                    float(row['start']),
                    float(row['start']) + float(row['duration']))] = str(
                        row['label'])
            rttms[uri] = annotation

        #By default it take all the file time
        path = op.join(data_dir, '{subset}.time'.format(subset=subset))
        with open(path) as f:
            rows = f.readlines()
        times = {}
        for row in rows:
            kv = row.split(' ')
            times[kv[0]] = Timeline([Segment(0, float(kv[1]))])

        for uri in uris:
            annotated = times[uri]
            annotation = rttms[uri]
            current_file = {
                'database': 'Albayzin2016',
                'uri': uri,
                'annotated': annotated,
                'annotation': annotation
            }
            yield current_file
Ejemplo n.º 17
0
def calculate_der(reference_filename, hypothesis_filename):
    lbls = Util.read_audacity_labels(reference_filename)
    reference = Annotation()
    for lbl in lbls:
        reference[Segment(lbl.start_seconds, lbl.end_seconds)] = lbl.label

    predicted_lbls = Util.read_audacity_labels(hypothesis_filename)
    hypothesis = Annotation()
    for lbl in predicted_lbls:
        if lbl.label != 'non_speech':
            hypothesis[Segment(lbl.start_seconds, lbl.end_seconds)] = lbl.label

    metric = DiarizationErrorRate()
    der = metric(reference, hypothesis)
    return der
Ejemplo n.º 18
0
def test_get(scores):
    segment, track = Segment(0., 2.5), 'track'
    assert scores[segment, track, 'A'] == 0.2
    assert scores[segment, track, 'B'] == 0.3
    assert scores[segment, track, 'C'] == 0.4
    assert scores[segment, track, 'D'] == 0.1

    segment, track = Segment(3., 4.), 'track'
    assert scores[segment, track, 'A'] == 0.4
    assert scores[segment, track, 'B'] == 0.5

    segment, track = Segment(3., 4.), 'other_track'
    assert scores[segment, track, 'B'] == 0.1
    assert scores[segment, track, 'C'] == 0.3
    assert scores[segment, track, 'D'] == 0.1
Ejemplo n.º 19
0
    def preprocess(self, current_file, identifier=None):
        """Pre-compute file-wise X and y"""

        # extract features for the whole file
        # (if it has not been done already)
        current_file = self.periodic_preprocess(current_file,
                                                identifier=identifier)

        # if labels have already been extracted, do nothing
        if identifier in self.preprocessed_.setdefault('y', {}):
            return current_file

        # get features as pyannote.core.SlidingWindowFeature instance
        X = self.preprocessed_['X'][identifier]
        sw = X.sliding_window
        n_samples = X.getNumber()

        y = np.zeros((n_samples + 4, 1), dtype=np.int8) - 1
        # [-1] ==> unknown / [0] ==> not change part / [1] ==> change part

        annotated = get_annotated(current_file)
        annotation = current_file['annotation']

        segments = []
        for segment, _ in annotation.itertracks():
            segments.append(
                Segment(segment.start - self.balance,
                        segment.start + self.balance))
            segments.append(
                Segment(segment.end - self.balance,
                        segment.end + self.balance))
        change_part = Timeline(segments).support().crop(annotated,
                                                        mode='intersection')

        # iterate over non-change regions
        for non_changes in change_part.gaps(annotated):
            indices = sw.crop(non_changes, mode='loose')
            y[indices, 0] = 0

        # iterate over change regions
        for changes in change_part:
            indices = sw.crop(changes, mode='loose')
            y[indices, 0] = 1

        y = SlidingWindowFeature(y[:-1], sw)
        self.preprocessed_['y'][identifier] = y

        return current_file
Ejemplo n.º 20
0
def run_metrics(references_f,
                hypothesis_f,
                metrics,
                visualization=False,
                class_to_keep=None):
    if len(references_f) != len(hypothesis_f):
        raise ValueError(
            "The number of reference files and hypothesis files must match ! (%d != %d)"
            % (len(references_f), len(hypothesis_f)))
    if visualization:
        visualization_dir = os.path.join(os.path.dirname(hypothesis_f[0]),
                                         "visualization")
        if not os.path.exists(visualization_dir):
            os.makedirs(visualization_dir)
    for ref_f, hyp_f in zip(references_f, hypothesis_f):
        ref, hyp = rttm_to_annotation(
            ref_f, class_to_keep=class_to_keep), rttm_to_annotation(
                hyp_f, class_to_keep=class_to_keep)
        basename = os.path.basename(ref_f)
        # Set the uri as the basename for both reference and hypothesis
        ref.uri, hyp.uri = basename, basename
        # Let's accumulate the score for each metrics
        # Let's accumulate the score for each metrics

        for m in metrics.values():
            res = m(ref, hyp)

        # Let's generate a visualization of the results
        if visualization:
            moment = find_1mn_highest_volubility(ref)
            if moment is not None:
                # Set figure size, and crop the annotation
                # for the highest volubile moment
                start, end = moment[0], moment[1]
                notebook.width = end / 4
                plt.rcParams['figure.figsize'] = (notebook.width, 10)
                notebook.crop = Segment(start, end)

                # Plot reference
                plt.subplot(211)
                notebook.plot_annotation(ref, legend=True, time=False)
                plt.gca().set_title(
                    'reference ' +
                    os.path.basename(ref_f).replace('.rttm', ''),
                    fontdict={'fontsize': 18})

                # Plot hypothesis
                plt.subplot(212)
                notebook.plot_annotation(hyp, legend=True, time=True)
                plt.gca().set_title(
                    'hypothesis ' +
                    os.path.basename(hyp_f).replace('.rttm', ''),
                    fontdict={'fontsize': 18})

                plt.savefig(
                    os.path.join(
                        visualization_dir,
                        os.path.basename(hyp_f).replace('.rttm', '.png')))
                plt.close()
    return metrics
Ejemplo n.º 21
0
    def _xxx_iter(self, subset):

        if not isinstance(subset, list):
            subsets = [subset]
        else:
            subsets = subset

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')
        data_csv = op.join(data_dir, 'voxceleb1.csv')
        data = pd.read_csv(data_csv, index_col=['segment'])
        data = data.groupby('verification')

        # segment                          uri                      start end  speaker      verification identification
        # A.J._Buckley/1zcIwhmdeo4_0000001 A.J._Buckley/1zcIwhmdeo4 14.7  22.8 A.J._Buckley dev          trn

        for subset in subsets:

            subset_data = data.get_group(subset)

            for uri, datum in subset_data.iterrows():

                annotation = Annotation(uri=uri)
                segment = Segment(0., datum.end - datum.start)
                annotation[segment] = datum.speaker

                annotated = annotation.get_timeline()

                current_file = {
                    'uri': uri,
                    'database': 'VoxCeleb',
                    'annotation': annotation,
                    'annotated': annotated,
                }

                yield current_file
Ejemplo n.º 22
0
def load_mdtm(file_mdtm):
    """Load MDTM file

    Parameter
    ---------
    file_mdtm : `str`
        Path to MDTM file.

    Returns
    -------
    annotations : `dict`
        Speaker diarization as a {uri: pyannote.core.Annotation} dictionary.
    """

    names = ['uri', 'NA1', 'start', 'duration', 'NA2', 'NA3', 'NA4', 'speaker']
    dtype = {'uri': str, 'start': float, 'duration': float, 'speaker': str}
    data = pd.read_csv(file_mdtm, names=names, dtype=dtype,
                       delim_whitespace=True)

    annotations = dict()
    for uri, turns in data.groupby('uri'):
        annotation = Annotation(uri=uri)
        for i, turn in turns.iterrows():
            segment = Segment(turn.start, turn.start + turn.duration)
            annotation[segment, i] = turn.speaker
        annotations[uri] = annotation

    return annotations
Ejemplo n.º 23
0
def load_uem(file_uem):
    """Load UEM file

    Parameter
    ---------
    file_uem : `str`
        Path to UEM file.

    Returns
    -------
    timelines : `dict`
        Evaluation map as a {uri: pyannote.core.Timeline} dictionary.
    """

    names = ['uri', 'NA1', 'start', 'end']
    dtype = {'uri': str, 'start': float, 'end': float}
    data = pd.read_csv(file_uem, names=names, dtype=dtype,
                       delim_whitespace=True)

    timelines = dict()
    for uri, parts in data.groupby('uri'):
        segments = [Segment(part.start, part.end)
                    for i, part in parts.iterrows()]
        timelines[uri] = Timeline(segments=segments, uri=uri)

    return timelines
Ejemplo n.º 24
0
def test_labels(annotation):
    assert annotation.labels() == ['Leonard', 'Penny', 'Sheldon']
    assert annotation.get_labels(Segment(8, 10)) == {'Penny', 'Sheldon'}

    expected_res = Annotation(
        uri='TheBigBangTheory.Season01.Episode01',
        modality='speaker')
    expected_res[Segment(3, 5), '_'] = 'Kaley Cuoco'
    expected_res[Segment(5.5, 7), '_',] = 'Johnny Galecki'
    expected_res[Segment(8, 10), '_'] = 'Kaley Cuoco'
    expected_res[Segment(8, 10), 'anything'] = 'Jim Parsons'

    mapping = {'Penny': 'Kaley Cuoco',
               'Sheldon': 'Jim Parsons',
               'Leonard': 'Johnny Galecki'}
    assert annotation.rename_labels(mapping) == expected_res
Ejemplo n.º 25
0
def load_mdtm(file_mdtm):
    """Load MDTM file

    Parameter
    ---------
    file_mdtm : `str`
        Path to MDTM file.

    Returns
    -------
    annotations : `dict`
        Speaker diarization as a {uri: pyannote.core.Annotation} dictionary.
    """

    names = ["uri", "NA1", "start", "duration", "NA2", "NA3", "NA4", "speaker"]
    dtype = {"uri": str, "start": float, "duration": float, "speaker": str}
    data = pd.read_csv(
        file_mdtm,
        names=names,
        dtype=dtype,
        delim_whitespace=True,
        keep_default_na=False,
    )

    annotations = dict()
    for uri, turns in data.groupby("uri"):
        annotation = Annotation(uri=uri)
        for i, turn in turns.iterrows():
            segment = Segment(turn.start, turn.start + turn.duration)
            annotation[segment, i] = turn.speaker
        annotations[uri] = annotation

    return annotations
Ejemplo n.º 26
0
    def _xxx_enrol_iter(self, subset):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')
        data_csv = op.join(data_dir, 'voxceleb1.csv')
        data = pd.read_csv(data_csv, index_col=['segment'])

        trial_csv = op.join(
            data_dir,
            'voxceleb1.verification.{subset}.csv'.format(subset=subset))
        trials = pd.read_csv(trial_csv)

        for model_id in trials['enrolment'].unique():

            try:
                row = data.ix[model_id]
            except KeyError as e:
                # file_id = model_id.split('/')[1][:-8]
                # msg = '{file_id} marked as duplicate in VoxCeleb 1.1'
                # warnings.warn(msg.format(file_id=file_id))
                continue

            uri = model_id
            segment = Segment(0., row.end - row.start)
            current_enrolment = {
                'database': 'VoxCeleb',
                'uri': uri,
                'model_id': model_id,
                'enrol_with': Timeline(uri=uri, segments=[segment]),
            }

            yield current_enrolment
Ejemplo n.º 27
0
    def common_enrol_iter(self):

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')
        data_csv = op.join(data_dir, 'voxceleb1.csv')
        data = pd.read_csv(data_csv, index_col=['segment'])

        data = data.groupby('identification').get_group('trn')

        for model_id, model_rows in data.groupby('speaker'):
            uris = []
            enrol_with = []
            for uri, rows in model_rows.groupby('uri'):
                uris.append(uri)
                segments = []
                for row in rows.itertuples():
                    segments.append(Segment(row.start, row.end))
                enrol_with.append(Timeline(uri=uri, segments=segments))

            current_enrolment = {
                'database': 'VoxCeleb',
                'model_id': model_id,
                'uri': uris,
                'enrol_with': enrol_with
            }

            yield current_enrolment
Ejemplo n.º 28
0
    def _subset(self, subset):

        path = Path(__file__).parent / 'data' / f'subtitles.{subset}.txt.gz'
        names = ['uri', 'start', 'stop', 'track', 'label', 'subtitle']
        with gzip.open(path, 'rb') as fp:
            data = pd.read_table(fp,
                                 sep="|",
                                 names=names,
                                 converters={'uri': str})

        for uri, datum in data.groupby('uri'):

            annotation = Annotation(uri=uri, modality='subtitles')
            subtitles = dict()
            for _, row in datum.iterrows():
                segment = Segment(row.start, row.stop)
                annotation[segment, row.track] = row.label
                subtitles[segment, row.track] = row.subtitle

            current_file = {
                'database': 'Eastenders',
                'uri': uri,
                'annotation': annotation,
                'subtitles': subtitles
            }

            yield current_file
Ejemplo n.º 29
0
    def _xxx_iter(self, subset):

        if not isinstance(subset, list):
            subsets = [subset]
        else:
            subsets = subset

        data_dir = op.join(op.dirname(op.realpath(__file__)), 'data')
        data_csv = op.join(data_dir, 'voxceleb1.csv')
        data = pd.read_csv(data_csv, index_col=['segment'])
        data = data.groupby('verification')

        for subset in subsets:

            subset_data.get_group(subset)

            for uri, rows in subset_data.groupby('uri'):
                annotation = Annotation(uri=uri)
                for row in rows.itertuples():
                    segment = Segment(row.start, row.end)
                    annotation[segment] = row.speaker
                annotated = annotation.get_timeline()

                current_file = {
                    'uri': uri,
                    'database': 'VoxCeleb',
                    'annotation': annotation,
                    'annotated': annotated,
                }

                yield current_file
Ejemplo n.º 30
0
    def _xxx_enrol_iter(self, subset):

        # load enrolments
        data_dir = Path(__file__).parent / 'data' / 'speaker_spotting'
        enrolments = data_dir / f'{subset}.enrol.txt'
        names = [
            'uri', 'NA0', 'start', 'duration', 'NA1', 'NA2', 'NA3', 'model_id'
        ]
        enrolments = read_table(enrolments, delim_whitespace=True, names=names)

        for model_id, turns in enrolments.groupby(by='model_id'):

            # gather enrolment data
            segments = []
            for t, turn in enumerate(turns.itertuples()):
                if t == 0:
                    raw_uri = turn.uri
                    uri = f'{raw_uri}.Mix-Headset'
                segment = Segment(start=turn.start,
                                  end=turn.start + turn.duration)
                if segment:
                    segments.append(segment)
            enrol_with = Timeline(segments=segments, uri=uri)

            current_enrolment = {
                'database': 'Test',
                'uri': uri,
                'model_id': model_id,
                'enrol_with': enrol_with,
            }

            yield current_enrolment