def __convert(self, tier, actions):
        """Normalize all tags of all labels of an annotation.

        """
        tokens_tier = sppasTier("Tokens")
        for i, ann in enumerate(tier):
            self.logfile.print_message(
                (info(1220, "annotations")).format(number=i + 1), indent=1)

            location = ann.get_location().copy()
            labels = list()
            # Normalize all labels of the orthographic transcription
            for label in ann.get_labels():

                tokens = list()
                # Normalize only the best tag because each label of an ortho
                # should only concern 1 tag!
                text = label.get_best()
                # Do not tokenize an empty label, noises, laughter...
                if text.is_speech() is True:
                    try:
                        tokens = self.__normalizer.normalize(
                            text.get_content(), actions)
                    except Exception as e:
                        message = (info(1258, "annotations")).format(i) + \
                                  "{:s}".format(str(e))
                        self.logfile.print_message(message, indent=2)

                elif text.is_silence():
                    # in ortho a silence could be one of "#" or "gpf_".
                    # we normalize!
                    tokens = [SIL_ORTHO]
                else:
                    tokens = [text.get_content()]

                # New in SPPAS 1.9.6.
                #  - The result is a sequence of labels.
                #  - Token variants are stored into alternative tags
                for tok in tokens:
                    if tok.startswith('{') and tok.endswith('}'):
                        tok = tok[1:-1]
                        tags = [sppasTag(p) for p in tok.split('|')]
                    else:
                        tags = sppasTag(tok)
                    labels.append(sppasLabel(tags))

            tokens_tier.create_annotation(location, labels)

        return tokens_tier
Beispiel #2
0
    def syllabify_interval(self, phonemes, from_p, to_p, syllables):
        """Perform the syllabification of one interval.

        :param phonemes: (sppasTier)
        :param from_p: (int) index of the first phoneme to be syllabified
        :param to_p: (int) index of the last phoneme to be syllabified
        :param syllables: (sppasTier)

        """
        # create the sequence of phonemes to syllabify
        p = list()
        for ann in phonemes[from_p:to_p + 1]:
            tag = ann.get_best_tag()
            p.append(tag.get_typed_content())

        # create the sequence of syllables
        s = self.__syllabifier.annotate(p)

        # add the syllables into the tier
        for i, syll in enumerate(s):
            start_idx, end_idx = syll

            # create the location
            begin = phonemes[start_idx +
                             from_p].get_lowest_localization().copy()
            end = phonemes[end_idx + from_p].get_highest_localization().copy()
            location = sppasLocation(sppasInterval(begin, end))

            # create the label
            syll_string = Syllabifier.phonetize_syllables(p, [syll])
            label = sppasLabel(sppasTag(syll_string))

            # add the syllable
            syllables.create_annotation(location, label)
Beispiel #3
0
    def __add_repetition(repetition, spk1_tier, spk2_tier, start_idx1,
                         start_idx2, src_tier, echo_tier):
        """Add a repetition - source and echos - in tiers.

        :param repetition: (DataRepetition)
        :param spk1_tier: (Tier) The tier of speaker 1 (to detect sources)
        :param spk2_tier: (Tier) The tier of speaker 2 (to detect echos)
        :param start_idx1: start index of the interval in spk1_tier
        :param start_idx2: start index of the interval in spk2_tier
        :param src_tier: (Tier) The resulting tier with sources
        :param echo_tier: (Tier) The resulting tier with echos
        :returns: (bool) the repetition was added or not

        """
        index = len(src_tier)

        # Source
        s, e = repetition.get_source()
        src_begin = spk1_tier[start_idx1 + s].get_lowest_localization()
        src_end = spk1_tier[start_idx1 + e].get_highest_localization()
        time = sppasInterval(src_begin.copy(), src_end.copy())
        try:
            a = src_tier.create_annotation(
                sppasLocation(time),
                sppasLabel(sppasTag("S" + str(index + 1))))
            src_id = a.get_meta('id')
        except TierAddError:
            return False

        # Echos
        for (s, e) in repetition.get_echos():
            rep_begin = spk2_tier[start_idx2 + s].get_lowest_localization()
            rep_end = spk2_tier[start_idx2 + e].get_highest_localization()
            time = sppasInterval(rep_begin.copy(), rep_end.copy())
            r = sppasLabel(sppasTag("R" + str(index + 1)))
            try:
                a = echo_tier.create_annotation(sppasLocation(time), r)
                a.set_meta('is_other_repetition_of', src_id)
            except TierAddError:
                a = echo_tier.find(rep_begin, rep_end)
                if len(a) > 0:
                    a[0].append_label(r)

        return True
Beispiel #4
0
    def __add_repetition(repetition, spk_tier, start_idx, src_tier, echo_tier):
        """Add a repetition - source and echos - in tiers.

        :param repetition: (DataRepetition)
        :param spk_tier: (sppasTier) The tier of the speaker (to detect sources)
        :param start_idx: (int) start index of the interval in spk_tier
        :param src_tier: (sppasTier) The resulting tier with sources
        :param echo_tier: (sppasTier) The resulting tier with echos
        :returns: (bool) the repetition was added or not

        """
        index = len(src_tier)

        # Source
        s, e = repetition.get_source()
        src_begin = spk_tier[start_idx + s].get_lowest_localization()
        src_end = spk_tier[start_idx + e].get_highest_localization()
        time = sppasInterval(src_begin.copy(), src_end.copy())
        try:
            a = src_tier.create_annotation(
                sppasLocation(time),
                sppasLabel(sppasTag("S" + str(index + 1))))
            src_id = a.get_meta('id')
        except:
            return False

        # Echos
        for (s, e) in repetition.get_echos():
            rep_begin = spk_tier[start_idx + s].get_lowest_localization()
            rep_end = spk_tier[start_idx + e].get_highest_localization()
            time = sppasInterval(rep_begin.copy(), rep_end.copy())
            a = echo_tier.create_annotation(
                sppasLocation(time),
                sppasLabel(sppasTag("R" + str(index + 1))))
            a.set_meta('is_self_repetition_of', src_id)

        return True
Beispiel #5
0
    def make_stop_words(self, tier):
        """Return a tier indicating if entries are stop-words.

        :param tier: (sppasTier) Time-aligned tokens.

        """
        stp_tier = sppasTier('StopWord')
        for ann in tier:
            token = ann.serialize_labels()
            if token not in symbols.all:
                stp = self._stop_words.is_in(token)
                stp_tier.create_annotation(
                    ann.get_location().copy(),
                    sppasLabel(sppasTag(stp, tag_type="bool"))
                )
        return stp_tier
Beispiel #6
0
    def anchors_to_tier(anchors):
        """Transform anchors to a sppasTier.

        Anchors are stored in frames. It is converted to seconds (a frame is
        during 10ms).

        :param anchors: (List of Anchor)
        :returns: (sppasTier)

        """
        tier = sppasTier('Momel')
        for anchor in anchors:
            tier.create_annotation(
                sppasLocation(sppasPoint(anchor.x * 0.01, 0.005)),
                sppasLabel(sppasTag(anchor.y, "float")))

        return tier
Beispiel #7
0
    def convert(self, tier):
        """Phonetize annotations of a tokenized tier.

        :param tier: (Tier) the ortho transcription previously tokenized.
        :returns: (Tier) phonetized tier with name "Phones"

        """
        if tier is None:
            raise IOError('No given tier.')
        if tier.is_empty() is True:
            raise EmptyInputError(name=tier.get_name())

        phones_tier = sppasTier("Phones")
        for i, ann in enumerate(tier):
            self.logfile.print_message(
                (info(1220, "annotations")).format(number=i + 1), indent=1)

            location = ann.get_location().copy()
            labels = list()

            # Normalize all labels of the orthographic transcription
            for label in ann.get_labels():

                phonetizations = list()
                for text, score in label:
                    if text.is_pause() or text.is_silence():
                        # It's in case the pronunciation dictionary
                        # were not properly fixed.
                        phonetizations.append(SIL)

                    elif text.is_empty() is False:
                        phones = self._phonetize(text.get_content())
                        for p in phones:
                            phonetizations.extend(p.split(separators.variants))

                # New in SPPAS 1.9.6.
                #  - The result is a sequence of labels.
                #  - Variants are alternative tags.
                tags = [sppasTag(p) for p in set(phonetizations)]
                labels.append(sppasLabel(tags))

            phones_tier.create_annotation(location, labels)

        return phones_tier
Beispiel #8
0
    def make_classes(self, syllables):
        """Create the tier with syllable classes.

        :param syllables: (sppasTier)

        """
        classes = sppasTier("SyllClassAlign")
        classes.set_meta('syllabification_classes_of_tier',
                         syllables.get_name())

        for syll in syllables:
            location = syll.get_location().copy()
            syll_tag = syll.get_best_tag()
            class_tag = sppasTag(
                self.__syllabifier.classes_phonetized(
                    syll_tag.get_typed_content()))
            classes.create_annotation(location, sppasLabel(class_tag))

        return classes
Beispiel #9
0
    def make_word_strain(self, tier):
        """Return a tier with modified tokens.

        :param tier: (sppasTier) Time-aligned tokens.

        """
        if len(self._word_strain) == 0:
            return tier

        self.logfile.print_message("Words strain enabled.", indent=1, status=2)
        lems_tier = sppasTier('TokenStrain')
        for ann in tier:
            token = ann.serialize_labels()
            lem = self._word_strain.get(token, token)
            lems_tier.create_annotation(
                ann.get_location().copy(),
                sppasLabel(sppasTag(lem))
            )
        return lems_tier
Beispiel #10
0
    def tones_to_tier(tones, anchors_tier):
        """Convert the INTSINT result into a tier.

        :param tones: (list)
        :param anchors_tier: (sppasTier)

        """
        if len(tones) != len(anchors_tier):
            raise AnnDataEqError("tones:" + str(len(tones)),
                                 "anchors:" + str(len(anchors_tier)))

        tier = sppasTier("INTSINT")
        for tone, anchor_ann in zip(tones, anchors_tier):
            # Create the label
            tag = sppasTag(tone)
            # Create the location
            location = anchor_ann.get_location().copy()
            # Create the annotation
            tier.create_annotation(location, sppasLabel(tag))

        return tier
Beispiel #11
0
    # Append in new tier
    ti = sppasInterval(sppasPoint(b, 0.0001), sppasPoint(e, 0.0001))
    if len(texts) > 1:
        missing = False
        for t in texts:
            if len(t.strip()) == 0:
                # missing annotation label...
                missing = True
        if missing is True:
            text = ""
        else:
            text = ";".join(texts)
    else:
        text = str(texts[0])
    behavior_tier.create_annotation(sppasLocation(ti),
                                    sppasLabel(sppasTag(text)))

# ----------------------------------------------------------------------------

synchro_tier = trs_input.create_tier("Synchronicity")
for ann in behavior_tier:
    text = ann.serialize_labels()
    if len(text) > 0:
        values = text.split(';')
        v1 = values[0].strip()
        v2 = values[1].strip()
        if v1 == "0" or v2 == "0":
            if v1 == "0" and v2 == "0":
                v = -1
            else:
                v = 0
Beispiel #12
0
                if is_silence(h) is False:
                    # the middle of the hyp must be inside the ref
                    # or the contrary!
                    hb, he, hl = get_ann_infos(h)
                    hm = hb + (he-hb)/2.
                    rm = rb + (re-rb)/2.
                    if rb < hm < re or hb < rm < he:
                        ipus_hyp_anns.append(h)

            # the ipu of the ref does not match any ipu in the hyp.
            if len(ipus_hyp_anns) == 0:
                # this is the critical situation.
                nb_ref_not_match += 1
                logging.debug('        REF IPU: [ {:f} ; {:f} ; {:s} ] has no HYP.'
                              ''.format(rb, re, etiquette))
                result_ann.set_labels(sppasLabel(sppasTag('Missing')))

            # the ipu of the ref is matching only one ipu in the hyp
            elif len(ipus_hyp_anns) == 1:
                # this is a success.
                nb_ref_perfect_match += 1
                result_ann.set_labels(ipus_hyp_anns[0].get_labels())

            # the ipu of the ref is matching several ipus in the hyp.
            else:
                # This over-segmentation could correspond to a short-pause,
                # or a silence into a laugh.
                # This is an error but not a critical one.
                nb_ref_several_match += 1
                logging.debug('        REF IPU: [ {:f} ; {:f} ; {:s} ] has several HYPs:'
                              ''.format(rb, re, etiquette))
Beispiel #13
0
        continue
    if ann.get_best_tag().is_silence():
        continue

    old_label = ann.serialize_labels(separator=" ", empty="", alt=True)
    if old_label.startswith("ipu_"):
        try:
            space = old_label.index(' ')
            old_label = old_label[space:].strip()
        except ValueError:
            old_label = ""

    if len(old_label) > 0:
        ipu += 1
        new_labels = list()
        new_labels.append(sppasLabel(sppasTag('ipu_%d' % ipu)))
        new_labels.append(sppasLabel(sppasTag(old_label)))
        ann.set_labels(new_labels)
    else:
        ann.set_labels(sppasLabel(sppasTag(SIL_ORTHO)))


# Merge continuous silences
i = len(tier)-1
while i >= 0:
    label = tier[i].serialize_labels()

    i -= 1
    c = i
    while label == SIL_ORTHO:
        label = tier[c].serialize_labels()
Beispiel #14
0
tier_token = trs_input.find('TokensAlign')
if tier_token is None:
    print("Error: can't find the tier TokensAlign.")
    sys.exit(1)

# ----------------------------------------------------------------------------
# 2. Create the expected data

new_tier = trs_input.create_tier('PhnTokAlign')

for ann_token in tier_token:

    # Create the sequence of phonemes
    beg = ann_token.get_lowest_localization()
    end = ann_token.get_highest_localization()
    ann_phons = tier_phon.find(beg, end)
    content = "-".join(ann.serialize_labels() for ann in ann_phons)

    # Append in the new tier
    loc = ann_token.get_location().copy()
    new_tier.create_annotation(loc, sppasLabel(sppasTag(content)))

trs_input.add_hierarchy_link("TimeAssociation", tier_token, new_tier)

# ----------------------------------------------------------------------------
# 3. Save new version of the file

if args.quiet is False:
    print("Override input file: {:s}".format(args.i))
parser.write(trs_input)
Beispiel #15
0
if len(tier_input) < 2:
    print('The tier does not contains enough intervals.')
    sys.exit(1)

if args.o:
    tier = tier_input.copy()
    tier.set_name(tier_input.get_name() + "-fill")
else:
    tier = tier_input

# ---------------------------------------------------------------------------
# Create the tag to fill empty intervals
# ---------------------------------------------------------------------------

if tier.is_int():
    filler = sppasTag(args.f, "int")
elif tier.is_float():
    filler = sppasTag(args.f, "float")
elif tier.is_bool():
    filler = sppasTag(args.f, "bool")
else:
    filler = sppasTag(args.f)

ctrl_vocab = tier.get_ctrl_vocab()
if ctrl_vocab is not None:
    if ctrl_vocab.contains(filler) is False:
        ctrl_vocab.add(filler, description="Filler")

# ----------------------------------------------------------------------------
# Fill in
# ----------------------------------------------------------------------------