Beispiel #1
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) momel anchors
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get the tier to be annotated.
        parser = sppasRW(input_file[0])
        trs_input = parser.read()
        tier_input = sppasFindTier.pitch_anchors(trs_input)

        # Annotate the tier
        targets = sppasIntsint.tier_to_anchors(tier_input)
        tones = self.__intsint.annotate(targets)
        tier_intsint = sppasIntsint.tones_to_tier(tones, tier_input)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        trs_output.append(tier_intsint)
        trs_output.set_meta('intsint_result_of', input_file[0])

        # Save in a file
        if output_file is not None:
            parser = sppasRW(output_file)
            parser.write(trs_output)

        return trs_output
Beispiel #2
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) pitch values
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get pitch values from the input
        pitch = self.fix_pitch(input_file[0])

        # Search for anchors
        anchors_tier = self.convert(pitch)
        self.logfile.print_message(str(len(anchors_tier)) + " anchors found.",
                                   indent=2,
                                   status=annots.info)

        # Fix result
        trs_output = sppasTranscription(self.name)
        trs_output.append(anchors_tier)
        trs_output.set_meta('annotation_result_of', input_file[0])

        if output_file is not None:
            parser = sppasRW(output_file)
            parser.write(trs_output)

        return trs_output
Beispiel #3
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        Input file is a tuple with 2 files: the main speaker and the echoing
        speaker.

        :param input_file: (list of str) time-aligned tokens
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        self.print_options()
        self.print_diagnosis(input_file[0])
        self.print_diagnosis(input_file[1])

        # Get the tier to be used
        parser = sppasRW(input_file[0])
        trs_input1 = parser.read()
        tier_tokens = sppasFindTier.aligned_tokens(trs_input1)
        tier_input1 = self.make_word_strain(tier_tokens)
        tier_input1.set_name(tier_input1.get_name() + "-source")

        # Get the tier to be used
        parser = sppasRW(input_file[1])
        trs_input2 = parser.read()
        tier_tokens = sppasFindTier.aligned_tokens(trs_input2)
        tier_input2 = self.make_word_strain(tier_tokens)
        tier_input2.set_name(tier_input2.get_name() + "-echo")

        # Repetition Automatic Detection
        (src_tier, echo_tier) = self.other_detection(tier_input1, tier_input2)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        trs_output.set_meta('other_repetition_result_of_src', input_file[0])
        trs_output.set_meta('other_repetition_result_of_echo', input_file[1])
        if len(self._word_strain) > 0:
            trs_output.append(tier_input1)
        if self._options['stopwords'] is True:
            trs_output.append(self.make_stop_words(tier_input1))
        trs_output.append(src_tier)
        trs_output.append(echo_tier)
        if len(self._word_strain) > 0:
            trs_output.append(tier_input2)

        # Save in a file
        if output_file is not None:
            if len(trs_output) > 0:
                parser = sppasRW(output_file)
                parser.write(trs_output)
                self.print_filename(output_file)
            else:
                raise EmptyOutputError

        return trs_output
Beispiel #4
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) time-aligned phonemes
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get the tier to syllabify
        parser = sppasRW(input_file[0])
        trs_input = parser.read()
        tier_input = sppasFindTier.aligned_phones(trs_input)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        trs_output.set_meta('syllabification_result_of', input_file[0])

        # Syllabify the tier
        if self._options['usesphons'] is True:
            tier_syll = self.convert(tier_input)
            trs_output.append(tier_syll)
            if self._options['createclasses']:
                trs_output.append(self.make_classes(tier_syll))

        # Extra tier: syllabify between given intervals
        if self._options['usesintervals'] is True:
            intervals = trs_input.find(self._options['tiername'])
            if intervals is None:
                self.logfile.print_message((info(
                    1264,
                    "annotations")).format(tiername=self._options['tiername']),
                                           indent=2,
                                           status=annots.warning)
            else:
                tier_syll_int = self.convert(tier_input, intervals)
                tier_syll_int.set_name("SyllAlign-Intervals")
                tier_syll_int.set_meta('syllabification_used_intervals',
                                       intervals.get_name())
                trs_output.append(tier_syll_int)
                if self._options['createclasses']:
                    t = self.make_classes(tier_syll_int)
                    t.set_name("SyllClassAlign-Intervals")
                    trs_output.append(t)

        # Save in a file
        if output_file is not None:
            if len(trs_output) > 0:
                parser = sppasRW(output_file)
                parser.write(trs_output)
            else:
                raise EmptyOutputError

        return trs_output
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) orthographic transcription
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get input tier to tokenize
        parser = sppasRW(input_file[0])
        trs_input = parser.read()
        tier_input = sppasFindTier.transcription(trs_input)

        # Tokenize the tier
        tier_faked_tokens, tier_std_tokens, tier_custom = self.convert(
            tier_input)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        if tier_faked_tokens is not None:
            trs_output.append(tier_faked_tokens)
        if tier_std_tokens is not None:
            trs_output.append(tier_std_tokens)
        if tier_custom is not None:
            trs_output.append(tier_custom)

        trs_output.set_meta('text_normalization_result_of', input_file[0])
        trs_output.set_meta('text_normalization_vocab',
                            self.__normalizer.get_vocab_filename())
        trs_output.set_meta('language_iso', "iso639-3")
        trs_output.set_meta('language_code_0', self.__normalizer.lang)
        trs_output.set_meta('language_name_0', "Undetermined")
        trs_output.set_meta(
            'language_url_0',
            "https://iso639-3.sil.org/code/" + self.__normalizer.lang)

        # Save in a file
        if output_file is not None:
            if len(trs_output) > 0:
                parser = sppasRW(output_file)
                parser.write(trs_output)
            else:
                raise EmptyOutputError

        return trs_output
Beispiel #6
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) time-aligned tokens
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get the tier to be used
        parser = sppasRW(input_file[0])
        trs_input = parser.read()

        tier_tokens = sppasFindTier.aligned_tokens(trs_input)
        tier_input = self.make_word_strain(tier_tokens)

        # Repetition Automatic Detection
        (src_tier, echo_tier) = self.self_detection(tier_input)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        trs_output.set_meta('self_repetition_result_of', input_file[0])
        if len(self._word_strain) > 0:
            trs_output.append(tier_input)
        if self._options['stopwords'] is True:
            trs_output.append(self.make_stop_words(tier_input))
        trs_output.append(src_tier)
        trs_output.append(echo_tier)

        # Save in a file
        if output_file is not None:
            if len(trs_output) > 0:
                parser = sppasRW(output_file)
                parser.write(trs_output)
            else:
                raise EmptyOutputError

        return trs_output
Beispiel #7
0
    def run(self, input_file, opt_input_file=None, output_file=None):
        """Run the automatic annotation process on an input.

        :param input_file: (list of str) normalized text
        :param opt_input_file: (list of str) ignored
        :param output_file: (str) the output file name
        :returns: (sppasTranscription)

        """
        # Get the tier to be phonetized.
        pattern = ""
        if self._options['usestdtokens'] is True:
            pattern = "std"
        parser = sppasRW(input_file[0])
        trs_input = parser.read()
        tier_input = sppasFindTier.tokenization(trs_input, pattern)

        # Phonetize the tier
        tier_phon = self.convert(tier_input)

        # Create the transcription result
        trs_output = sppasTranscription(self.name)
        if tier_phon is not None:
            trs_output.append(tier_phon)

        trs_output.set_meta('text_phonetization_result_of', input_file[0])
        trs_output.set_meta('text_phonetization_dict',
                            self.__phonetizer.get_dict_filename())

        # Save in a file
        if output_file is not None:
            if len(trs_output) > 0:
                parser = sppasRW(output_file)
                parser.write(trs_output)
            else:
                raise EmptyOutputError

        return trs_output
Beispiel #8
0
        logging.info(" * Files: {:s} {:s}".format(fr, fh))

        ref_tier, hyp_tier = get_tiers(fr, fh, idxref_tier, idxhyp_tier)
        if ref_tier is None:
            logging.error("No tier with IPUs found in reference file. Nothing to do. ")
            continue
        if hyp_tier is None:
            logging.error("No tier with IPUs found in hypotheses. Nothing to do. ")
            continue
        files_ok.append((f[0], f[1]))

        ref_tier.set_radius(0.001)
        hyp_tier.set_radius(0.001)

        # transcription to store the results of this file
        trs = sppasTranscription("eval-searchforipus")

        # ----------------------------------------------------------------------------
        # Number of ipus in ref and hyp

        nb_ipus_ref = get_nb_ipus(ref_tier)
        nb_ipus_ref_total += nb_ipus_ref
        logging.info('    ==> Number of IPUs in ref: {:d}'.format(nb_ipus_ref))

        nb_ipus_hyp = get_nb_ipus(hyp_tier)
        logging.info('    ==> Number of IPUs in hyp: {:d}'.format(nb_ipus_hyp))
        nb_ipus_hyp_total += nb_ipus_hyp

        # ----------------------------------------------------------------------------
        # Match ipus of ref in hyp (add, merge)
Beispiel #9
0
          "(i.e. with -palign in its name).")
    sys.exit(1)

# read to check data content
# --------------------------
parser = sppasRW(filename)
trs_input = parser.read(filename)
tier = trs_input.find("TokensAlign", case_sensitive=False)
if tier is None:
    print("ERROR: A tier with name TokensAlign is required.")
    sys.exit(1)

# write as textgrid
# -----------------
if fext.lower().endswith("textgrid") is False:
    trs = sppasTranscription(name="TokensAlign")
    trs.append(tier)
    filename = fname + ".TextGrid"
    parser.set_filename(filename)
    parser.write(trs)

# ---------------------------------------------------------------------------
# Call MarsaTag
# ---------------------------------------------------------------------------

command = 'java -Xms300M -Xmx580M -Dortolang.home="' + args.m + '"'
command += ' -jar "' + MARSATAG + '" '
command += ' --cli '
command += ' -tier TokensAlign '
command += ' -reader praat-textgrid '
command += ' -encoding UTF8 '
Beispiel #10
0
    with sppasAppConfig() as cg:
        if not args.quiet:
            if args.debug:
                log_level = 0
            else:
                log_level = cg.log_level
        else:
            log_level = cg.quiet_log_level
        lgs = sppasLogSetup(log_level)
        lgs.stream_handler()

    # -----------------------------------------------------------------------
    # Read
    # -----------------------------------------------------------------------

    trs_output = sppasTranscription("Merged")

    for file_idx, trs_input_file in enumerate(args.i):

        logging.info("Read {:s}".format(args.i))

        start_time = time.time()
        parser = sppasRW(trs_input_file)
        trs_input = parser.read()
        end_time = time.time()

        # General information
        # -------------------
        logging.debug("Elapsed time for reading: {:f} seconds"
                      "".format(end_time - start_time))
        pickle_string = pickle.dumps(trs_input)
Beispiel #11
0
    logging.debug("Memory usage of the transcription: {:d} bytes"
                  "".format(sys.getsizeof(pickle_string)))

    # -----------------------------------------------------------------------
    # Select tiers
    # -----------------------------------------------------------------------

    # Take all tiers or specified tiers
    tier_numbers = []
    if not args.t and not args.n:
        tier_numbers = range(1, (len(trs_input) + 1))
    elif args.n:
        tier_numbers = args.n

    # Select tiers to create output
    trs_output = sppasTranscription("Converted")

    # Add selected tiers into output
    for i in tier_numbers:
        if i > 0:
            idx = i - 1
        elif i < 0:
            idx = i
        else:
            idx = len(trs_input)
        if idx < len(trs_input):
            trs_output.append(trs_input[idx])
            logging.info("  - Tier {:d}: {:s}. Selected."
                         "".format(i, trs_input[idx].get_name()))
        else:
            logging.error("  - Tier {:d}: Wrong tier number. Ignored"
Beispiel #12
0
    if not args.quiet:
        log_level = cg.log_level
    else:
        log_level = cg.quiet_log_level
    lgs = sppasLogSetup(log_level)
    lgs.stream_handler()


# ----------------------------------------------------------------------------
# Read

logging.info("Read input: {:s}".format(args.i))
parser = sppasRW(args.i)
trs_input = parser.read()

trs_out = sppasTranscription()

# ----------------------------------------------------------------------------
# Transform the PhonAlign tier to a Phonetization tier

try:
    align_tier = sppasFindTier.aligned_phones(trs_input)
    logging.info("PhonAlign tier found.")
    phon_tier = unalign(align_tier)
    phon_tier.set_name("Phones")
    trs_out.append(phon_tier)
except IOError:
    logging.error("PhonAlign tier not found.")

# ----------------------------------------------------------------------------
# Transform the TokensAlign tier to a Tokenization tier