def run(self, input_filename, output_filename=None): """ Perform the TGA estimation process. :param input_filename: (str) Name of the input file with the aligned syllables :param output_filename: (str) Name of the resulting file with TGA """ self.print_filename(input_filename) self.print_options() self.print_diagnosis(input_filename) # Get the tier to syllabify parser = sppasRW(input_filename) trs_input = parser.read() tier_input = sppasFindTier.aligned_syllables(trs_input) # Create the transcription result trs_output = sppasTranscription("Time Group Analyzer") trs_output.set_meta('tga_result_of', input_filename) # Estimate TGA on the tier trs_output = self.convert(tier_input) # Save in a file if output_filename is not None: if len(trs_output) > 0: parser = sppasRW(output_filename) parser.write(trs_output) self.print_filename(output_filename, status=0) else: raise EmptyOutputError return trs_output
def run(self, input_filename, output_filename=None): """ Run the INTSINT annotation process on an input file. :param input_filename: (str) the input file name with momel :param output_filename: (str) the output file name of the INTSINT tier :returns: (sppasTranscription) """ self.print_filename(input_filename) # Get the tier to be annotated. parser = sppasRW(input_filename) trs_input = parser.read() tier_input = sppasFindTier.pitch_anchors(trs_input) # Annotate the tier targets = sppasIntsint.tier_to_anchors(tier_input) tones = self.intsint.annotate(targets) tier_intsint = sppasIntsint.tones_to_tier(tones, tier_input) # Create the transcription result trs_output = sppasTranscription(self.name) trs_output.append(tier_intsint) trs_output.set_meta('intsint_result_of', input_filename) # Save in a file if output_filename is not None: parser = sppasRW(output_filename) parser.write(trs_output) self.print_filename(output_filename, status=0) return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) syllabification :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get the tier to syllabify parser = sppasRW(input_file[0]) trs_input = parser.read() tier_input = sppasFindTier.aligned_syllables(trs_input) # Create the transcription result trs_output = sppasTranscription(self.name) trs_output.set_meta('tga_result_of', input_file[0]) # Estimate TGA on the tier trs_output = self.convert(tier_input) # Save in a file if output_file is not None: if len(trs_output) > 0: parser = sppasRW(output_file) parser.write(trs_output) else: raise EmptyOutputError return trs_output
def _merge(self): """Merge all annotated files.""" self._progress.set_header("Merge all annotations in a file") self._progress.update(0, "") # Get the list of files with the ".wav" extension filelist = self.get_annot_files( pattern="", extensions=sppas.src.audiodata.aio.extensions) total = len(filelist) output_format = self._parameters.get_output_format() for i, f in enumerate(filelist): nbfiles = 0 # Change f, to allow "replace" to work properly basef = os.path.splitext(f)[0] self._logfile.print_message("File: " + f, indent=0) self._progress.set_text( os.path.basename(f) + " (" + str(i + 1) + "/" + str(total) + ")") # Add all files content in the same order than to annotate trs = sppasTranscription() nbfiles += self.__add_trs(trs, basef + output_format) for s in range(self._parameters.get_step_numbers()): ann_key = self._parameters.get_step_key(s) a = self._get_instance(ann_key) pattern = a.get_pattern() if len(pattern) > 0: nbfiles += self.__add_trs(trs, basef + pattern + output_format) if nbfiles > 1: try: info_tier = sppasMetaInfoTier(trs) tier = info_tier.create_time_tier( trs.get_min_loc().get_midpoint(), trs.get_max_loc().get_midpoint()) trs.append(tier) parser = sppasRW(basef + "-merge.xra") parser.write(trs) self._logfile.print_message(basef + "-merge.xra", indent=1, status=0) except Exception as e: self._logfile.print_message(str(e), indent=1, status=-1) self._progress.set_fraction(float((i + 1)) / float(total)) self._logfile.print_newline() self._progress.update(1, "Completed.") self._progress.set_header("")
def run(self, input_filename, output_filename=None): """ Perform the Syllabification process. :param input_filename: (str) Name of the input file with the aligned phonemes :param output_filename: (str) Name of the resulting file with syllabification """ self.print_filename(input_filename) self.print_options() self.print_diagnosis(input_filename) # Get the tier to syllabify parser = sppasRW(input_filename) trs_input = parser.read() tier_input = sppasFindTier.aligned_phones(trs_input) # Create the transcription result trs_output = sppasTranscription("Syllabification") trs_output.set_meta('syllabification_result_of', input_filename) # Syllabify the tier if self._options['usesphons'] is True: tier_syll = self.convert(tier_input) trs_output.append(tier_syll) if self._options['createclasses']: trs_output.append(self.make_classes(tier_syll)) # Extra tier: syllabify between given intervals if self._options['usesintervals'] is True: intervals = trs_input.find(self._options['tiername']) if intervals is None: self.print_message(MSG_NO_TIER.format(tiername=self._options['tiername']), indent=2, status=WARNING_ID) else: tier_syll_int = self.convert(tier_input, intervals) tier_syll_int.set_name("SyllAlign-Intervals") tier_syll_int.set_meta('syllabification_used_intervals', intervals.get_name()) trs_output.append(tier_syll_int) if self._options['createclasses']: t = self.make_classes(tier_syll_int) t.set_name("SyllClassAlign-Intervals") trs_output.append(t) # Save in a file if output_filename is not None: if len(trs_output) > 0: parser = sppasRW(output_filename) parser.write(trs_output) self.print_filename(output_filename, status=0) else: raise EmptyOutputError return trs_output
def test_get_tier(self): a = sppasActivity() trs = sppasTranscription() # No tokensTier with self.assertRaises(IOError): a.get_tier(trs) # Test with an empty Tokens tier trs.create_tier('TokensAlign') tier = a.get_tier(trs) self.assertEqual(len(tier), 0)
def run(self, input_filename, output_filename=None): """ Run the Phonetization process on an input file. :param input_filename (str) Name of the file including a tokenization :param output_filename (str) Name of the resulting file with phonetization :returns: (sppasTranscription) """ self.print_filename(input_filename) self.print_options() self.print_diagnosis(input_filename) # Get the tier to be phonetized. pattern = "" if self._options['usestdtokens'] is True: pattern = "std" parser = sppasRW(input_filename) trs_input = parser.read() tier_input = sppasFindTier.tokenization(trs_input, pattern) # Phonetize the tier tier_phon = self.convert(tier_input) # Create the transcription result trs_output = sppasTranscription("Phonetization") if tier_phon is not None: trs_output.append(tier_phon) trs_output.set_meta('text_phonetization_result_of', input_filename) trs_output.set_meta('text_phonetization_dict', self.phonetizer.get_dict_filename()) # Save in a file if output_filename is not None: if len(trs_output) > 0: parser = sppasRW(output_filename) parser.write(trs_output) self.print_filename(output_filename, status=0) else: raise EmptyOutputError return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) audio :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ # Get audio and the channel we'll work on audio_speech = sppas.src.audiodata.aio.open(input_file[0]) n = audio_speech.get_nchannels() if n != 1: raise IOError("An audio file with only one channel is expected. " "Got {:d} channels.".format(n)) # Extract the channel idx = audio_speech.extract_channel(0) channel = audio_speech.get_channel(idx) tier = self.convert(channel) # Create the transcription to put the result trs_output = sppasTranscription(self.name) trs_output.set_meta('search_ipus_result_of', input_file[0]) trs_output.append(tier) extm = os.path.splitext(input_file[0])[1].lower()[1:] media = sppasMedia(os.path.abspath(input_file[0]), mime_type="audio/" + extm) tier.set_media(media) # Save in a file if output_file is not None: parser = sppasRW(output_file) parser.write(trs_output) return trs_output
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. input_filename is a tuple (audio, raw transcription) :param input_file: (list of str) (audio, ortho) :param opt_input_file: (list of str) ignored :param output_file: (str) the output file name :returns: (sppasTranscription) """ input_audio_filename = input_file[0] input_trans_filename = input_file[1] tier = self.convert(input_audio_filename, input_trans_filename) if tier is None: self.logfile.print_message(_info(1296), indent=2, status=-1) return None # Create the transcription to put the result trs_output = sppasTranscription(self.name) trs_output.set_meta('fill_ipus_result_of', input_audio_filename) trs_output.set_meta('fill_ipus_result_of_trs', input_trans_filename) trs_output.append(tier) extm = os.path.splitext(input_audio_filename)[1].lower()[1:] media = sppasMedia(os.path.abspath(input_audio_filename), mime_type="audio/" + extm) tier.set_media(media) # Save in a file if output_file is not None: parser = sppasRW(output_file) parser.write(trs_output) return trs_output
metavar="file", required=True, help='Output annotated file name') parser.add_argument("--quiet", action='store_true', help="Disable the verbosity") if len(sys.argv) <= 1: sys.argv.append('-h') args = parser.parse_args() # ---------------------------------------------------------------------------- trs_output = sppasTranscription("SPPAS Merge") for trs_input_file in args.i: if not args.quiet: print("Read input annotated file:") parser = sppasRW(trs_input_file) trs_input = parser.read() # Take all tiers for i in range(len(trs_input)): if not args.quiet: sys.stdout.write(" -> Tier {:d}: ".format(i + 1)) trs_output.append(trs_input[i]) if not args.quiet: print(" [ OK ]")
# The output file name output_filename = 'F_F_B003-P9-selection.TextGrid' # ---------------------------------------------------------------------------- # Main # ---------------------------------------------------------------------------- # Create a parser object then parse the input file. parser = sppasRW(filename) print("Read the file {:s}".format(filename)) trs = parser.read() print(" Number of tiers: {:d}.".format(len(trs))) # Create a new Transcription to add selected tiers. new_trs = sppasTranscription("Selected") # Select some tiers, add into the new Transcription for name in tier_names: tier = trs.find(name, case_sensitive=False) if tier is not None: new_trs.append(tier) print(" - Tier {:s} successfully added.".format(tier.get_name())) else: print(" - Error: the file does not contain a tier with name {:s}".format(name)) # Save the Transcription object into a file. parser.set_filename(output_filename) parser.write(new_trs) if os.path.exists(output_filename) is True: print("The file {:s} was successfully saved.".format(output_filename))
table = os.path.join(os.path.dirname(PROGRAM), "sampa2praat.repl") else: print('Converted with standard-IPA mapping table.') table = os.path.join(os.path.dirname(PROGRAM), 'sampa2ipa.repl') # load table mapping = sppasMappingTier(table) mapping.set_reverse(False) # from sampa to ipa direction mapping.set_keep_miss(True) # keep unknown entries as given mapping.set_miss_symbol("") # not used! mapping.set_delimiters([]) # will use longest matching # ---------------------------------------------------------------------------- # Convert input file trs = sppasTranscription(name=trs_input.get_name()+"-IPA") for n in args.n.split(','): print(" -> Tier {:s}:".format(n)) tier = trs_input.find(n, case_sensitive=False) if tier is not None: new_tier = mapping.map_tier(tier) new_tier.set_name(n+"-IPA") trs.append(new_tier) else: print(" [IGNORED] Wrong tier name.") # ---------------------------------------------------------------------------- # Write converted tiers if len(trs) == 0:
def run(self, input_file, opt_input_file=None, output_file=None): """Run the automatic annotation process on an input. :param input_file: (list of str) (audio, phonemes) :param opt_input_file: (list of str) (tokens) :param output_file: (str) the output file name :returns: (sppasTranscription) """ input_audio_filename = input_file[0] input_phon_filename = input_file[1] # Get the tiers to be time-aligned parser = sppasRW(input_phon_filename) trs_input = parser.read() phon_tier = sppasFindTier.phonetization(trs_input) if phon_tier is None: raise NoInputError try: parser = sppasRW(opt_input_file[0]) trs_input_tok = parser.read() tok_tier = sppasFindTier.tokenization(trs_input_tok, "std") except: # IOError, AttributeError: tok_tier = None self.logfile.print_message(MSG_TOKENS_DISABLED, indent=2, status=annots.warning) # Prepare data workdir = sppasAlign.fix_workingdir(input_audio_filename) if self._options['clean'] is False: self.logfile.print_message(MSG_WORKDIR.format(dirname=workdir), indent=3, status=None) # Set media extm = os.path.splitext(input_audio_filename)[1].lower()[1:] media = sppasMedia(input_audio_filename, mime_type="audio/" + extm) # Processing... try: tier_phn, tier_tok, tier_pron = self.convert( phon_tier, tok_tier, input_audio_filename, workdir) tier_phn.set_media(media) trs_output = sppasTranscription(self.name) trs_output.append(tier_phn) if tier_tok is not None: tier_tok.set_media(media) trs_output.append(tier_tok) try: trs_output.add_hierarchy_link("TimeAlignment", tier_phn, tier_tok) except: logging.error('No hierarchy was created between' 'phonemes and tokens') if tier_pron is not None: tier_pron.set_media(media) trs_output.append(tier_pron) try: if tier_tok is not None: trs_output.add_hierarchy_link("TimeAssociation", tier_tok, tier_pron) else: trs_output.add_hierarchy_link("TimeAlignment", tier_phn, tier_pron) except: logging.error('No hierarchy was created between' 'phonemes and tokens') except Exception as e: self.logfile.print_message(str(e)) if self._options['clean'] is True: shutil.rmtree(workdir) raise self.append_extra(trs_output) # Save results if output_file is not None: try: # Save in a file parser = sppasRW(output_file) parser.write(trs_output) except Exception: if self._options['clean'] is True: shutil.rmtree(workdir) raise # Remove the working directory we created if self._options['clean'] is True: shutil.rmtree(workdir) return trs_output
idx = "{:04d}".format(i + 1) fn = os.path.join(output_dir, idx + "_" + text_ascii) if not args.quiet: print('* track {:s} from {:f} to {:f}'.format(idx, begin, end)) # create audio output extracter = channel.extract_fragment(int(begin * framerate), int(end * framerate)) audio_out = sppasAudioPCM() audio_out.append_channel(extracter) if not args.quiet: print(" - audio: " + fn + ".wav") sppas.src.audiodata.aio.save(fn + ".wav", audio_out) # create text output (copy original label as it!) trs_output = sppasTranscription("TrackSegment") tracks_tier = trs_output.create_tier(tier_name + "-" + idx) tracks_tier.create_annotation( sppasLocation( sppasInterval(sppasPoint(0.), sppasPoint(float(end - begin)))), [l.copy() for l in ann.get_labels()]) parser.set_filename(fn + args.e) if not args.quiet: print(" - text: " + fn + args.e) parser.write(trs_output) nb += 1 # just to do things... properly! if nb == 0: os.remove(output_dir)
def ExportToAnnData(self): """ Export this transcription to anndata.sppasTranscription(). """ trs = anndata.sppasTranscription(self.__name) for meta_key in self.metadata: if self.metadata[meta_key] is not None: trs.set_meta(meta_key, self.metadata[meta_key]) for ctrl_vocab in self.GetCtrlVocab(): other_cv = anndata.sppasCtrlVocab(ctrl_vocab.id, ctrl_vocab.GetDescription()) for entry in ctrl_vocab: entry_text = entry.Text entry_desc = entry.GetDescription() other_cv.add(anndata.sppasTag(entry_text), entry_desc) trs.add_ctrl_vocab(other_cv) for media in self.GetMedia(): other_m = anndata.sppasMedia(media.url, media.id, media.mime) trs.add_media(other_m) for tier in self: c = tier.GetCtrlVocab() if c is not None: ctrl_vocab = trs.get_ctrl_vocab_from_name(c.GetName()) else: ctrl_vocab = None m = tier.GetMedia() if m is not None: media = trs.get_media_from_id(m.id) else: media = None other_t = trs.create_tier(tier.GetName(), ctrl_vocab, media) is_point = tier.IsPoint() for ann in tier: text = ann.GetLabel().GetLabel() if is_point is True: p = ann.GetLocation().GetPoint().GetValue() r = ann.GetLocation().GetPoint().GetRadius() if r == 0.: r = None other_t.create_annotation( anndata.sppasLocation(anndata.sppasPoint(p, r)), anndata.sppasLabel(anndata.sppasTag(text))) else: b = ann.GetLocation().GetBegin().GetValue() rb = ann.GetLocation().GetBegin().GetRadius() if rb == 0.: rb = None e = ann.GetLocation().GetEnd().GetValue() re = ann.GetLocation().GetEnd().GetRadius() if rb == 0.: rb = None other_t.create_annotation( anndata.sppasLocation( anndata.sppasInterval(anndata.sppasPoint(b, rb), anndata.sppasPoint(e, re))), anndata.sppasLabel(anndata.sppasTag(text))) for tier in self: parent_tier = self._hierarchy.get_parent(tier) if parent_tier is not None: link_type = self._hierarchy.get_hierarchy_type(tier) new_tier = trs.find(tier.GetName()) new_parent_tier = trs.find(parent_tier.GetName()) trs.add_hierarchy_link(link_type, new_parent_tier, new_tier) return trs
def convert(self, syllables): """Estimate TGA on the given syllables. :param syllables: (sppasTier) :returns: (sppasTranscription) """ trs_out = sppasTranscription("TimeGroupAnalyser") # Create the time groups: intervals of consecutive syllables timegroups = self.syllables_to_timegroups(syllables) timegroups.set_meta('timegroups_of_tier', syllables.get_name()) trs_out.append(timegroups) # Create the time segments timesegs = self.syllables_to_timesegments(syllables) trs_out.append(timesegs) trs_out.add_hierarchy_link("TimeAssociation", timegroups, timesegs) # Get the duration of each syllable, grouped into the timegroups tg_dur = self.timegroups_to_durations(syllables, timegroups) # here, we could add an option to add durations and # delta durations into the transcription output # Estimate TGA ts = TimeGroupAnalysis(tg_dur) # Put TGA non-optional results into tiers tier = sppasTGA.tga_to_tier(ts.len(), timegroups, "TGA-Occurrences", "int") trs_out.append(tier) trs_out.add_hierarchy_link("TimeAssociation", timegroups, tier) tier = sppasTGA.tga_to_tier(ts.total(), timegroups, "TGA-Total") trs_out.append(tier) trs_out.add_hierarchy_link("TimeAssociation", timegroups, tier) tier = sppasTGA.tga_to_tier(ts.mean(), timegroups, "TGA-Mean") trs_out.append(tier) trs_out.add_hierarchy_link("TimeAssociation", timegroups, tier) tier = sppasTGA.tga_to_tier(ts.median(), timegroups, "TGA-Median") trs_out.append(tier) trs_out.add_hierarchy_link("TimeAssociation", timegroups, tier) tier = sppasTGA.tga_to_tier(ts.stdev(), timegroups, "TGA-StdDev") trs_out.append(tier) trs_out.add_hierarchy_link("TimeAssociation", timegroups, tier) tier = sppasTGA.tga_to_tier(ts.nPVI(), timegroups, "TGA-nPVI") trs_out.append(tier) trs_out.add_hierarchy_link("TimeAssociation", timegroups, tier) # Put TGA Intercept/Slope results if self._options['original'] is True: tier = sppasTGA.tga_to_tier_reglin(ts.intercept_slope_original(), timegroups, True) tier.set_name('TGA-Intercept_original') trs_out.append(tier) trs_out.add_hierarchy_link("TimeAssociation", timegroups, tier) tier = sppasTGA.tga_to_tier_reglin(ts.intercept_slope_original(), timegroups, False) tier.set_name('TGA-slope_original') trs_out.append(tier) trs_out.add_hierarchy_link("TimeAssociation", timegroups, tier) if self._options['annotationpro'] is True: tier = sppasTGA.tga_to_tier_reglin(ts.intercept_slope(), timegroups, True) tier.set_name('TGA-Intercept_timestamps') trs_out.append(tier) trs_out.add_hierarchy_link("TimeAssociation", timegroups, tier) tier = sppasTGA.tga_to_tier_reglin(ts.intercept_slope(), timegroups, False) tier.set_name('TGA-slope_timestamps') trs_out.append(tier) trs_out.add_hierarchy_link("TimeAssociation", timegroups, tier) return trs_out
# ---------------------------------------------------------------------------- # Select tiers if args.quiet is False: print("Tier selection:") # Take all tiers or specified tiers tier_numbers = [] if not args.t and not args.n: tier_numbers = range(1, (len(trs_input) + 1)) elif args.t: tier_numbers = args.t # Select tiers to create output trs_output = sppasTranscription(name=trs_input.get_name()) # Add selected tiers into output for i in tier_numbers: if args.quiet is False: sys.stdout.write(" - Tier " + str(i) + ": ") if i > 0: idx = i - 1 elif i < 0: idx = i else: idx = len(trs_input) if idx < len(trs_input): trs_output.append(trs_input[idx]) if args.quiet is False: print("{:s}.".format(trs_input[idx].get_name()))
help='Tier number (default: 1)') parser.add_argument("-o", metavar="file", help='Output file name') if len(sys.argv) <= 1: sys.argv.append('-h') args = parser.parse_args() # ---------------------------------------------------------------------------- # Extract parameters, load data... file_output = None if args.o: file_output = args.o trs_out = sppasTranscription("PhonemesDensity") n = 3 # n-value of the ngram w = 7 # window size parser = sppasRW(args.i) trs = parser.read() if args.t <= 0 or args.t > len(trs): print('Error: Bad tier number {:d}.\n'.format(args.t)) sys.exit(1) tier = trs[args.t - 1] if len(tier) == 0: print('Empty tier {:s}.\n'.format(tier.get_name())) sys.exit(1)
if verbose: print("{:s} has the following {:d} 'a':" "".format(tier.get_name(), len(phon_set_a))) for ann in phon_set_a: print(' - {}: {}'.format(ann.get_location().get_best(), phon_set_a.get_value(ann))) # convert the data set into a tier tier_phon_a = phon_set_a.to_tier(name="Phon-a") # Apply a filter: Extract phonemes 'a', 'A', 'E' and 'e' # ------------------------------------------------------ phon_set_a_e = f.tag(iexact=u("a")) | f.tag(iexact=u("e")) # convert the data set into a tier tier_phon_a_e = phon_set_a_e.to_tier(name="Phon-a-e") if verbose: print("{:s} has {:d} phonemes 'aeAE'.".format(tier.get_name(), len(tier_phon_a_e))) # Save # ------------------------------------------------------------- t = sppasTranscription() t.append(tier_phon_a) t.append(tier_phon_a_e) parser = sppasRW(output_filename) parser.write(t) if verbose: print("File {:s} saved".format(output_filename))