Beispiel #1
0
    def setUp(self):
        # Create tiers
        self.tierP = Tier("PointTier")
        self.tierI = Tier("IntervalTier")
        for i in range(8):
            self.tierP.Append(Annotation(TimePoint(i), Label(str(i))))
            self.tierI.Append(
                Annotation(TimeInterval(TimePoint(i), TimePoint(i + 1)),
                           Label(str(i * 10))))

        self.tierI.Append(
            Annotation(TimeInterval(TimePoint(9), TimePoint(10)),
                       Label("quatre-vingts-dix|nonante")))

        # Create TierMapping
        self.tier_map = TierMapping()
        self.tier_map.add("1", "un")
        self.tier_map.add("2", "deux")
        self.tier_map.add("3", "trois")
        self.tier_map.add("4", "quatre")
        self.tier_map.add("5", "cinq")
        self.tier_map.add("6", "six")
        self.tier_map.add("7", "sept")
        self.tier_map.add("8", "huit")
        self.tier_map.add("9", "neuf")
        self.tier_map.add("10", "dix")
        self.tier_map.add("20", "vingt")
        self.tier_map.add("30", "trente")
        self.tier_map.add("40", "quarante")
        self.tier_map.add("50", "cinquante")
        self.tier_map.add("60", "soixante")
        self.tier_map.add("70", "septante")
        self.tier_map.add("70", "soixante-dix")
        self.tier_map.add("80", "octante")
        self.tier_map.add("80", "quatre-vingts")
Beispiel #2
0
def unalign(aligned_tier, ipus_separators=['#', 'dummy']):
    """ Convert a time-aligned tier into a non-aligned tier.

    :param aligned_tier: (Tier)
    :param ipus_separators: (list)
    :returns: (Tier)
    
    """
    new_tier = Tier("Un-aligned")
    b = aligned_tier.GetBegin()
    e = b
    l = ""
    for a in aligned_tier:
        label = a.GetLabel().GetValue()
        if label in ipus_separators or a.GetLabel().IsEmpty() is True:
            if e > b:
                at = Annotation(TimeInterval(b, e), Label(l))
                new_tier.Add(at)
            new_tier.Add(a)
            b = a.GetLocation().GetEnd()
            e = b
            l = ""
        else:
            e = a.GetLocation().GetEnd()
            label = label.replace('.', ' ')
            l += " " + label

    if e > b:
        a = aligned_tier[-1]
        e = a.GetLocation().GetEnd()
        at = Annotation(TimeInterval(b, e), Label(l))
        new_tier.Add(at)

    return new_tier
Beispiel #3
0
    def __add_repetition(repetition, spk1_tier, spk2_tier, start_spk1,
                         start_spk2, src_tier, echo_tier):
        """ Add a repetition - source and echos - in tiers.

        :param repetition: (DataRepetition)
        :param spk1_tier: (Tier) The tier of speaker 1 (to detect sources)
        :param spk2_tier: (Tier) The tier of speaker 2 (to detect echos)
        :param start_spk1: start index of the interval in spk1_tier
        :param start_spk2: start index of the interval in spk2_tier
        :param src_tier: (Tier) The resulting tier with sources
        :param echo_tier: (Tier) The resulting tier with echos
        :returns: (bool) the repetition was added or not

        """
        index = len(src_tier)
        # Source
        s, e = repetition.get_source()
        src_begin = spk1_tier[start_spk1 + s].GetLocation().GetBegin()
        src_end = spk1_tier[start_spk1 + e].GetLocation().GetEnd()
        time = TimeInterval(src_begin.Copy(), src_end.Copy())
        src_ann = Annotation(time, Label("S" + str(index + 1)))
        try:
            src_tier.Add(src_ann)
        except Exception:
            return False

        # Echos
        for (s, e) in repetition.get_echos():
            rep_begin = spk2_tier[start_spk2 + s].GetLocation().GetBegin()
            rep_end = spk2_tier[start_spk2 + e].GetLocation().GetEnd()
            time = TimeInterval(rep_begin.Copy(), rep_end.Copy())
            rep_ann = Annotation(time, Label("R" + str(index + 1)))
            echo_tier.Add(rep_ann)

        return True
Beispiel #4
0
    def _append_tuples(self, tier, tdata, delta, unitend):
        """ Append a list of (start,end,text,score) into the tier.
        Shift start/end of a delta value and set the last end value.

        """
        try:

            for i, t in enumerate(tdata):
                (loc_s, loc_e, lab, scr) = t
                loc_s += delta
                loc_e += delta
                if i == (len(tdata) - 1):
                    loc_e = unitend

                # prepare the code in case we'll find a solution with
                # alternatives phonetizations/tokenization....
                #lab = [lab]
                #scr = [scr]
                #label = Label()
                #for l,s in zip(lab,scr):
                #    label.AddValue(Text(l,s))
                label = Label(Text(lab, scr))
                annotationw = Annotation(
                    TimeInterval(TimePoint(loc_s, self._radius),
                                 TimePoint(loc_e, self._radius)), label)
                tier.Append(annotationw)

        except Exception:
            pass
Beispiel #5
0
 def setUp(self):
     self.x = Annotation(TimeInterval(TimePoint(1, 0.), TimePoint(2, 0.01)),
                         Label('toto'))
     self.y = Annotation(
         TimeInterval(TimePoint(3, 0.01), TimePoint(4, 0.01)),
         Label('titi'))
     self.a = Annotation(
         TimeInterval(TimePoint(5, 0.01), TimePoint(6.5, 0.005)),
         Label('toto'))
     self.b = Annotation(
         TimeInterval(TimePoint(6.5, 0.005), TimePoint(9.5, 0.)),
         Label('toto'))
     self.tier = Tier()
     self.tier.Append(self.x)
     self.tier.Append(self.y)
     self.tier.Append(self.a)
     self.tier.Append(self.b)
Beispiel #6
0
 def __write_trs_track(track_filename, track_content, duration):
     begin = TimePoint(0.)
     end = TimePoint(duration)
     ann = Annotation(TimeInterval(begin, end), Label(track_content))
     trs = Transcription()
     tier = trs.NewTier("Transcription")
     tier.Append(ann)
     sppas.src.annotationdata.aio.write(track_filename, trs)
Beispiel #7
0
    def get_tier(self, trs):
        """ Create and return the activity tier.

        :param trs: (Transcription) a Transcription containing a tier
        with exactly the name 'TokensAlign'.
        :returns: Tier
        :raises: NoInputError

        """
        tokens_tier = sppasSearchTier.aligned_tokens(trs)
        tokens = fill_gaps(tokens_tier, trs.GetMinTime(), trs.GetMaxTime())

        new_tier = Tier('Activity')
        activity = "<INIT>"  # initial activity

        for ann in tokens:

            # Fix the activity name of this new token
            if ann.GetLabel().IsEmpty():
                l = unk_stamp
            else:
                l = ann.GetLabel().GetValue()
            new_activity = self._activities.get(l, "speech")

            # The activity has changed
            if activity != new_activity and activity != "<INIT>":
                new_tier.Append(Annotation(TimeInterval(new_tier.GetEnd(), ann.GetLocation().GetBegin()), 
                                           Label(activity)))

            # In any case, update current activity
            activity = new_activity

        # Last interval
        if new_tier.GetEnd() < tokens.GetEnd():
            new_tier.Append(Annotation(TimeInterval(new_tier.GetEnd(), tokens.GetEnd()),
                                       Label(activity)))

        return unfill_gaps(new_tier)
Beispiel #8
0
    window_end = windows[idx_end]
    i = w - 1
    while window_end[i] == 0:
        i = i - 1
    ann_idx_end = idx_end + i

    # Assign a label to the new annotation
    max_dist = round(max(distances[idx_begin:idx_end + 1]), 2)
    if max_dist == 0:
        print(" ERROR: max dist equal to 0...")

    begin = tier[ann_idx_begin].GetLocation().GetBegin()
    end = tier[ann_idx_end].GetLocation().GetEnd()
    label = Label(max_dist, data_type="float")

    a = Annotation(TimeInterval(begin, end), label)
    filtered_tier.Append(a)

#     for i in range(idxbegin,idxend+1):
#         print windows[i],distances[i]
#         for j in range (w):
#             print tier[i+j].GetLocation().GetDuration().GetValue(),tier[i+j].GetLabel().GetValue(), "/",
#     print " -> maxdist=",maxdist
#     print a
#     print

# ----------------------------------------------------------------------------
# Save result

if file_output is None:
    for a in filtered_tier:
Beispiel #9
0
    def tracks2transcription(self, ipustrs, ipusaudio, add_ipu_idx=False):
        """ Create a Transcription object from tracks.

        :param ipustrs: (IPUsTrs)
        :param ipusaudio: (IPUsAudio)
        :param add_ipu_idx: (bool)

        """
        if len(self.tracks) == 0:
            raise IOError('No IPUs to write.\n')

        # Extract the info we need from IPUsAudio
        framerate = ipusaudio.get_channel().get_framerate()
        end_time = ipusaudio.get_channel().get_duration()

        # Extract the info we need from ipustrs
        try:
            medialist = ipustrs.trsinput.GetMedia()
            if len(medialist) > 0:
                media = medialist[0]
            else:
                media = None
        except Exception:
            media = None
        units = ipustrs.get_units()
        if len(units) != 0:
            if len(self.tracks) != len(units):
                raise Exception('Inconsistent number of tracks and units. '
                                'Got %d audio tracks, and %d units.\n' %
                                (len(self.tracks), len(units)))

        # Create the transcription and tiers
        trs = Transcription("IPU-Segmentation")
        tieripu = trs.NewTier("IPUs")
        tier = trs.NewTier("Transcription")
        radius = ipusaudio.get_win_length() / 8.
        # vagueness is win_length divided by 4 (see "refine" method of sppasChannelSilence class)
        # radius is vagueness divided by 2

        # Convert the tracks: from frames to times
        tracks_times = frames2times(self.tracks, framerate)
        i = 0
        to_prec = 0.

        for (from_time, to_time) in tracks_times:

            # From the previous track to the current track: silence
            if to_prec < from_time:
                begin = to_prec
                end = from_time
                a = Annotation(
                    TimeInterval(TimePoint(begin, radius),
                                 TimePoint(end, radius)), Label("#"))
                tieripu.Append(a)
                tier.Append(a.Copy())

            # New track with speech
            begin = from_time
            end = to_time

            # ... IPU tier
            label = "ipu_%d" % (i + 1)
            a = Annotation(
                TimeInterval(TimePoint(begin, radius), TimePoint(end, radius)),
                Label(label))
            tieripu.Append(a)

            # ... Transcription tier
            if add_ipu_idx is False:
                label = ""
            if len(units) > 0:
                label = label + " " + units[i]
            a = Annotation(
                TimeInterval(TimePoint(begin, radius), TimePoint(end, radius)),
                Label(label))
            tier.Append(a)

            # Go to the next
            i += 1
            to_prec = to_time

        # The end is a silence?
        if to_prec < end_time:
            begin = TimePoint(to_prec, radius)
            end = TimePoint(end_time, radius)
            if begin < end:
                a = Annotation(TimeInterval(begin, end), Label("#"))
                tieripu.Append(a)
                tier.Append(a.Copy())

        # Link both tiers: IPU and Transcription
        try:
            trs.GetHierarchy().add_link('TimeAssociation', tieripu, tier)
        except Exception:
            pass

        # Set media
        if media is not None:
            trs.AddMedia(media)
            for tier in trs:
                tier.SetMedia(media)

        return trs
Beispiel #10
0
    def _add_anchors(anchorlist, anchor_tier):
        """ Add anchors in the anchor tier

        """
        if len(anchorlist) == 0:
            return

        logging.debug('... ... ... Anchors:')
        for (s,e,i) in anchorlist:

            # provide overlaps with a previous anchor
            previ = anchor_tier.Near(s, -1)
            if previ != -1:
                prevann = anchor_tier[previ]
                if prevann.GetLocation().GetEnd().GetMidpoint() > s:
                    if prevann.GetLabel().IsSilence():
                        prevann.GetLocation().GetEnd().SetMidpoint(s)
                        if prevann.GetLocation().GetEnd() < prevann.GetLocation().GetBegin():
                            anchor_tier.Pop(previ)
                    else:
                        s = prevann.GetLocation().GetEnd().SetMidpoint(s)

            # provide overlaps with a following anchor
            nexti = anchor_tier.Near(e, 1)
            if nexti != -1:
                nextann = anchor_tier[nexti]
                if nextann.GetLocation().GetBegin().GetMidpoint() < e:
                    if nextann.GetLabel().IsSilence():
                        nextann.GetLocation().GetBegin().SetMidpoint(e)
                        if nextann.GetLocation().GetEnd() < nextann.GetLocation().GetBegin():
                            anchor_tier.Pop(nexti)
                    else:
                        e = nextann.GetLocation().GetBegin().SetMidpoint(e)

            valid = True
            # previous index must be lesser
            p = anchor_tier.near_indexed_anchor(s, -1)
            if p is not None:
                pidx = p.GetLabel().GetTypedValue()
                if i <= pidx:
                    valid = False
                else:
                    # solve a small amount of issues...
                    # duration between the previous and the one we want to add
                    deltatime = s-p.GetLocation().GetEnd().GetMidpoint()
                    if deltatime < 0.2:
                        if (i-10) > pidx:
                            valid = False

            # next index must be higher
            n = anchor_tier.near_indexed_anchor(e, 1)
            if n is not None and i >= n.GetLabel().GetTypedValue():
                valid = False

            # add the anchor
            if valid is True:
                time = TimeInterval(TimePoint(s), TimePoint(e))
                label = Label(Text(i, data_type="int"))
                anchor_tier.Add(Annotation(time, label))
                logging.debug("... ... ... ... Add: %f %f %d" % (s, e, i))
            else:
                logging.debug("... ... ... ... ... Ignore: %f %f %d" % (s, e, i))

        # Then, fill the very evident holes
        anchor_tier.fill_evident_holes()