def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) momel anchors :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get the tier to be annotated. parser = sppasRW(input_file[0]) trs_input = parser.read() tier_input = sppasFindTier.pitch_anchors(trs_input) # Annotate the tier targets = sppasIntsint.tier_to_anchors(tier_input) tones = self.__intsint.annotate(targets) tier_intsint = sppasIntsint.tones_to_tier(tones, tier_input) # Create the transcription result trs_output = sppasTranscription(self.name) trs_output.append(tier_intsint) trs_output.set_meta('intsint_result_of', input_file[0]) # Save in a file if output_file is not None: parser = sppasRW(output_file) parser.write(trs_output) return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) pitch values :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get pitch values from the input pitch = self.fix_pitch(input_file[0]) # Search for anchors anchors_tier = self.convert(pitch) self.logfile.print_message(str(len(anchors_tier)) + " anchors found.", indent=2, status=annots.info) # Fix result trs_output = sppasTranscription(self.name) trs_output.append(anchors_tier) trs_output.set_meta('annotation_result_of', input_file[0]) if output_file is not None: parser = sppasRW(output_file) parser.write(trs_output) return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. Input file is a tuple with 2 files: the main speaker and the echoing speaker. :param input_file: (list of str) time-aligned tokens :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ self.print_options() self.print_diagnosis(input_file[0]) self.print_diagnosis(input_file[1]) # Get the tier to be used parser = sppasRW(input_file[0]) trs_input1 = parser.read() tier_tokens = sppasFindTier.aligned_tokens(trs_input1) tier_input1 = self.make_word_strain(tier_tokens) tier_input1.set_name(tier_input1.get_name() + "-source") # Get the tier to be used parser = sppasRW(input_file[1]) trs_input2 = parser.read() tier_tokens = sppasFindTier.aligned_tokens(trs_input2) tier_input2 = self.make_word_strain(tier_tokens) tier_input2.set_name(tier_input2.get_name() + "-echo") # Repetition Automatic Detection (src_tier, echo_tier) = self.other_detection(tier_input1, tier_input2) # Create the transcription result trs_output = sppasTranscription(self.name) trs_output.set_meta('other_repetition_result_of_src', input_file[0]) trs_output.set_meta('other_repetition_result_of_echo', input_file[1]) if len(self._word_strain) > 0: trs_output.append(tier_input1) if self._options['stopwords'] is True: trs_output.append(self.make_stop_words(tier_input1)) trs_output.append(src_tier) trs_output.append(echo_tier) if len(self._word_strain) > 0: trs_output.append(tier_input2) # Save in a file if output_file is not None: if len(trs_output) > 0: parser = sppasRW(output_file) parser.write(trs_output) self.print_filename(output_file) else: raise EmptyOutputError return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) time-aligned phonemes :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get the tier to syllabify parser = sppasRW(input_file[0]) trs_input = parser.read() tier_input = sppasFindTier.aligned_phones(trs_input) # Create the transcription result trs_output = sppasTranscription(self.name) trs_output.set_meta('syllabification_result_of', input_file[0]) # Syllabify the tier if self._options['usesphons'] is True: tier_syll = self.convert(tier_input) trs_output.append(tier_syll) if self._options['createclasses']: trs_output.append(self.make_classes(tier_syll)) # Extra tier: syllabify between given intervals if self._options['usesintervals'] is True: intervals = trs_input.find(self._options['tiername']) if intervals is None: self.logfile.print_message((info( 1264, "annotations")).format(tiername=self._options['tiername']), indent=2, status=annots.warning) else: tier_syll_int = self.convert(tier_input, intervals) tier_syll_int.set_name("SyllAlign-Intervals") tier_syll_int.set_meta('syllabification_used_intervals', intervals.get_name()) trs_output.append(tier_syll_int) if self._options['createclasses']: t = self.make_classes(tier_syll_int) t.set_name("SyllClassAlign-Intervals") trs_output.append(t) # Save in a file if output_file is not None: if len(trs_output) > 0: parser = sppasRW(output_file) parser.write(trs_output) else: raise EmptyOutputError return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) orthographic transcription :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get input tier to tokenize parser = sppasRW(input_file[0]) trs_input = parser.read() tier_input = sppasFindTier.transcription(trs_input) # Tokenize the tier tier_faked_tokens, tier_std_tokens, tier_custom = self.convert( tier_input) # Create the transcription result trs_output = sppasTranscription(self.name) if tier_faked_tokens is not None: trs_output.append(tier_faked_tokens) if tier_std_tokens is not None: trs_output.append(tier_std_tokens) if tier_custom is not None: trs_output.append(tier_custom) trs_output.set_meta('text_normalization_result_of', input_file[0]) trs_output.set_meta('text_normalization_vocab', self.__normalizer.get_vocab_filename()) trs_output.set_meta('language_iso', "iso639-3") trs_output.set_meta('language_code_0', self.__normalizer.lang) trs_output.set_meta('language_name_0', "Undetermined") trs_output.set_meta( 'language_url_0', "https://iso639-3.sil.org/code/" + self.__normalizer.lang) # Save in a file if output_file is not None: if len(trs_output) > 0: parser = sppasRW(output_file) parser.write(trs_output) else: raise EmptyOutputError return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) time-aligned tokens :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get the tier to be used parser = sppasRW(input_file[0]) trs_input = parser.read() tier_tokens = sppasFindTier.aligned_tokens(trs_input) tier_input = self.make_word_strain(tier_tokens) # Repetition Automatic Detection (src_tier, echo_tier) = self.self_detection(tier_input) # Create the transcription result trs_output = sppasTranscription(self.name) trs_output.set_meta('self_repetition_result_of', input_file[0]) if len(self._word_strain) > 0: trs_output.append(tier_input) if self._options['stopwords'] is True: trs_output.append(self.make_stop_words(tier_input)) trs_output.append(src_tier) trs_output.append(echo_tier) # Save in a file if output_file is not None: if len(trs_output) > 0: parser = sppasRW(output_file) parser.write(trs_output) else: raise EmptyOutputError return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) normalized text :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get the tier to be phonetized. pattern = "" if self._options['usestdtokens'] is True: pattern = "std" parser = sppasRW(input_file[0]) trs_input = parser.read() tier_input = sppasFindTier.tokenization(trs_input, pattern) # Phonetize the tier tier_phon = self.convert(tier_input) # Create the transcription result trs_output = sppasTranscription(self.name) if tier_phon is not None: trs_output.append(tier_phon) trs_output.set_meta('text_phonetization_result_of', input_file[0]) trs_output.set_meta('text_phonetization_dict', self.__phonetizer.get_dict_filename()) # Save in a file if output_file is not None: if len(trs_output) > 0: parser = sppasRW(output_file) parser.write(trs_output) else: raise EmptyOutputError return trs_output
logging.info(" * Files: {:s} {:s}".format(fr, fh)) ref_tier, hyp_tier = get_tiers(fr, fh, idxref_tier, idxhyp_tier) if ref_tier is None: logging.error("No tier with IPUs found in reference file. Nothing to do. ") continue if hyp_tier is None: logging.error("No tier with IPUs found in hypotheses. Nothing to do. ") continue files_ok.append((f[0], f[1])) ref_tier.set_radius(0.001) hyp_tier.set_radius(0.001) # transcription to store the results of this file trs = sppasTranscription("eval-searchforipus") # ---------------------------------------------------------------------------- # Number of ipus in ref and hyp nb_ipus_ref = get_nb_ipus(ref_tier) nb_ipus_ref_total += nb_ipus_ref logging.info(' ==> Number of IPUs in ref: {:d}'.format(nb_ipus_ref)) nb_ipus_hyp = get_nb_ipus(hyp_tier) logging.info(' ==> Number of IPUs in hyp: {:d}'.format(nb_ipus_hyp)) nb_ipus_hyp_total += nb_ipus_hyp # ---------------------------------------------------------------------------- # Match ipus of ref in hyp (add, merge)
"(i.e. with -palign in its name).") sys.exit(1) # read to check data content # -------------------------- parser = sppasRW(filename) trs_input = parser.read(filename) tier = trs_input.find("TokensAlign", case_sensitive=False) if tier is None: print("ERROR: A tier with name TokensAlign is required.") sys.exit(1) # write as textgrid # ----------------- if fext.lower().endswith("textgrid") is False: trs = sppasTranscription(name="TokensAlign") trs.append(tier) filename = fname + ".TextGrid" parser.set_filename(filename) parser.write(trs) # --------------------------------------------------------------------------- # Call MarsaTag # --------------------------------------------------------------------------- command = 'java -Xms300M -Xmx580M -Dortolang.home="' + args.m + '"' command += ' -jar "' + MARSATAG + '" ' command += ' --cli ' command += ' -tier TokensAlign ' command += ' -reader praat-textgrid ' command += ' -encoding UTF8 '
with sppasAppConfig() as cg: if not args.quiet: if args.debug: log_level = 0 else: log_level = cg.log_level else: log_level = cg.quiet_log_level lgs = sppasLogSetup(log_level) lgs.stream_handler() # ----------------------------------------------------------------------- # Read # ----------------------------------------------------------------------- trs_output = sppasTranscription("Merged") for file_idx, trs_input_file in enumerate(args.i): logging.info("Read {:s}".format(args.i)) start_time = time.time() parser = sppasRW(trs_input_file) trs_input = parser.read() end_time = time.time() # General information # ------------------- logging.debug("Elapsed time for reading: {:f} seconds" "".format(end_time - start_time)) pickle_string = pickle.dumps(trs_input)
logging.debug("Memory usage of the transcription: {:d} bytes" "".format(sys.getsizeof(pickle_string))) # ----------------------------------------------------------------------- # Select tiers # ----------------------------------------------------------------------- # Take all tiers or specified tiers tier_numbers = [] if not args.t and not args.n: tier_numbers = range(1, (len(trs_input) + 1)) elif args.n: tier_numbers = args.n # Select tiers to create output trs_output = sppasTranscription("Converted") # Add selected tiers into output for i in tier_numbers: if i > 0: idx = i - 1 elif i < 0: idx = i else: idx = len(trs_input) if idx < len(trs_input): trs_output.append(trs_input[idx]) logging.info(" - Tier {:d}: {:s}. Selected." "".format(i, trs_input[idx].get_name())) else: logging.error(" - Tier {:d}: Wrong tier number. Ignored"
if not args.quiet: log_level = cg.log_level else: log_level = cg.quiet_log_level lgs = sppasLogSetup(log_level) lgs.stream_handler() # ---------------------------------------------------------------------------- # Read logging.info("Read input: {:s}".format(args.i)) parser = sppasRW(args.i) trs_input = parser.read() trs_out = sppasTranscription() # ---------------------------------------------------------------------------- # Transform the PhonAlign tier to a Phonetization tier try: align_tier = sppasFindTier.aligned_phones(trs_input) logging.info("PhonAlign tier found.") phon_tier = unalign(align_tier) phon_tier.set_name("Phones") trs_out.append(phon_tier) except IOError: logging.error("PhonAlign tier not found.") # ---------------------------------------------------------------------------- # Transform the TokensAlign tier to a Tokenization tier