def __write_trs_track(track_filename, track_content, duration): begin = TimePoint(0.) end = TimePoint(duration) ann = Annotation(TimeInterval(begin, end), Label(track_content)) trs = Transcription() tier = trs.NewTier("Transcription") tier.Append(ann) sppas.src.annotationdata.aio.write(track_filename, trs)
def __init__(self, name="NoName", mintime=0., maxtime=0.): """ Creates a new SegmentsIn instance. """ Transcription.__init__(self, name, mintime, maxtime) self.alignerio = AlignerIO() self._radius = 0.005 self._tracknames = TrackNamesGenerator()
def __init__(self, name="NoName", mintime=0., maxtime=0.): """ Creates a new TrackSplitter instance. :param name: (str) """ Transcription.__init__(self, name, mintime, maxtime) self._radius = 0.005 self._tracknames = TrackNamesGenerator() self._aligntrack = None
def __init__(self, name="AnnotationSystemDataSet"): """ Initialize a new instance. @type name: str @param name: the name of the transcription """ Transcription.__init__(self, name) self.__id_tier_map = {}
def set_transcription(self, trs): """ Set a new Transcription. :param trs: (Transcription) Input transcription from which it's possible to extract IPUs. """ if trs is not None: self._trsinput = trs else: self._trsinput = Transcription()
def run(self, input_filename1, input_filename2, output_filename): """ Run the Repetition Automatic Detection annotation. :param input_filename1: Name of the file with aligned tokens of spkeaker 1 (the source) :param input_filename2: Name of the file with aligned tokens of spkeaker 2 (the echo) if OR, or None for SR :param output_filename: Name of the file to save the result """ self.print_filename(input_filename1) self.print_options() self.print_diagnosis(input_filename1) if input_filename2 is not None: self.print_diagnosis(input_filename2) if self.logfile is not None: self.logfile.print_message("Span = " + str(self._span), indent=3) self.logfile.print_message("Alpha = " + str(self._alpha), indent=3) # Get the tiers to be used # --------------------------------------------------------------- # Tokens of main speaker trs_input1 = sppas.src.annotationdata.aio.read(input_filename1) tier1 = sppasSearchTier.aligned_tokens(trs_input1) if tier1.IsEmpty() is True: raise Exception("Empty tokens tier (main speaker).\n") # Tokens of echoing speaker (if any) tier2 = None if input_filename2 is not None: trs_input2 = sppas.src.annotationdata.aio.read(input_filename2) tier2 = sppasSearchTier.aligned_tokens(trs_input2) if tier2.IsEmpty() is True: raise Exception("Empty tokens tier (echoing speaker).\n") # Lemmatize input? if self._use_lemmatize: tier1 = self.lemmatize(tier1) if tier2 is not None: tier2 = self.lemmatize(tier2) # Repetition Automatic Detection # --------------------------------------------------------------- if tier2 is None: (src_tier, echo_tier) = self.self_detection(tier1) else: (src_tier, echo_tier) = self.other_detection(tier1, tier2) # Save results # -------------------------------------------------------------- trs_output = Transcription("Repetitions") trs_output.Append(src_tier) trs_output.Append(echo_tier) sppas.src.annotationdata.aio.write(output_filename, trs_output)
def __init__(self, trs): """ Creates a new IPUsTrs instance. :param trs: (Transcription) Input transcription from which it's possible to extract IPUs. Expected tiers are: - first tier: the IPUs content [required] - second tier: the IPUs file names [optional] """ super(IPUsTrs, self).__init__() self._trsinput = Transcription() self._units = list() # List of the content of the units (if any) self._names = list() # List of file names for IPUs (if any) self.set_transcription(trs)
def print_targets(self, targets, output_filename=None, trs=None): """ Print the set of selected targets. :param targets: :param output_filename: (str) :param trs: (Transcription) """ if output_filename is not None: if output_filename is "STDOUT": output = sys.stdout self.__print_tgts(targets, output) elif output_filename.lower().endswith('momel') is True: output = open(output_filename, "w") self.__print_tgts(targets, output) output.close() if trs is not None: # Attention: time in targets is in milliseconds! tier = trs.NewTier(name="Momel") for i in range(len(targets)): _time = targets[i].get_x() * (0.001 * self.PAS_TRAME) _label = str("%d" % (targets[i].get_y())) try: tier.Append(Annotation(TimePoint(_time), Label(_label))) except Exception: if self.logfile is not None: self.logfile.print_message("Ignore target: time=" + str(_time) + " and value=" + _label, indent=2, status=3) if output_filename is not None and output_filename.lower( ).endswith('.pitchtier'): trsp = Transcription() trsp.Add(tier) try: sppas.src.annotationdata.aio.write(output_filename, trsp) except Exception: if self.logfile is not None: self.logfile.print_message( "Can't write PitchTier output file.", status=-1) return tier
def LoadFile(self, filename): """ Load a file in memory and show it. @param filename is an annotated file. """ self._filename = filename if os.path.exists(filename) is False: self._transcription = Transcription("Empty") return try: self._transcription = sppas.src.annotationdata.aio.read(filename) self._dirty = False self._boxtitle.SetForegroundColour(FG_FILE_COLOUR) self.Refresh() except Exception as e: logging.info('Error loading file %s: %s' % (filename, str(e))) self._transcription = Transcription("IO-Error")
class TrsList(wx.Panel): """ :author: Brigitte Bigi :organization: Laboratoire Parole et Langage, Aix-en-Provence, France :contact: [email protected] :license: GPL, v3 :copyright: Copyright (C) 2011-2018 Brigitte Bigi :summary: Show data about transcriptions, in a panel including a list of tiers. """ def __init__(self, parent, filename, trs=None, multiple=False): wx.Panel.__init__(self, parent, -1, size=wx.DefaultSize) # initialize the GUI self._prefs = Preferences() self._filename = filename self._dirty = False # the transcription was changed self._selected = False # the transcription is selected self._protected = [ ] # list of the tiers that are protected (can't be modified) if len(filename) == 0: self._filename = "Empty" boxtitle = self._create_title() self.tier_list = self._create_list(multiple) # load the Transcription if trs is None and len(filename) != 0: self.LoadFile(filename) else: self._transcription = trs # add Transcription information in the list for i in range(self._transcription.GetSize()): self.SetTierProperties(i) self._checksize() # events self.Bind(wx.EVT_LIST_ITEM_SELECTED, self.OnListItemSelected, self.tier_list) self.Bind(wx.EVT_LIST_COL_CLICK, self.OnListItemSelected, self.tier_list) # layout sizer = wx.BoxSizer(wx.VERTICAL) sizer.Add(boxtitle, 0, wx.EXPAND | wx.ALL, border=4) sizer.Add(self.tier_list, 1, wx.EXPAND | wx.ALL, border=4) self.SetFont(self._prefs.GetValue('M_FONT')) self.SetForegroundColour(self._prefs.GetValue('M_FG_COLOUR')) self.SetBackgroundColour(self._prefs.GetValue('M_BG_COLOUR')) self._boxtitle.SetForegroundColour(FG_FILE_COLOUR) self.SetSizerAndFit(sizer) self.SetAutoLayout(True) self.Layout() # ---------------------------------------------------------------------- def _create_title(self): """ Create the title of the panel. """ _sizer = wx.BoxSizer(wx.HORIZONTAL) self._static_tx = wx.TextCtrl(self, -1, "File: ", style=wx.TE_READONLY | wx.NO_BORDER) self._boxtitle = wx.TextCtrl(self, -1, self._filename, style=wx.TE_READONLY | wx.NO_BORDER) _sizer.Add(self._static_tx, 0, wx.RIGHT, border=2) _sizer.Add(self._boxtitle, 1, wx.EXPAND) return _sizer # ---------------------------------------------------------------------- def _create_list(self, multiple=False): """ Create the list to show information of a each tier of a transcription. """ if multiple: tier_list = CheckListCtrl(self, -1, style=wx.LC_REPORT | wx.BORDER_NONE) else: tier_list = CheckListCtrl(self, -1, style=wx.LC_REPORT | wx.BORDER_NONE | wx.LC_SINGLE_SEL) # Add all columns col_names = [ " Nb ", " Name ", " Begin ", " End ", " Type ", " Size " ] for i, n in enumerate(col_names): tier_list.InsertColumn(i, n) # Fix column width for i in range(len(col_names)): tier_list.SetColumnWidth(i, wx.LIST_AUTOSIZE_USEHEADER) # Enlarge column with tier name tier_list.SetColumnWidth(1, 140) return tier_list # ------------------------------------------------------------------------- def SetTierProperties(self, tier_idx): """ Display tier properties. """ try: tier = self._transcription[tier_idx] if tier.IsPoint() is True: tier_type = "Point" elif tier.IsInterval(): tier_type = "Interval" elif tier.IsDisjoint(): tier_type = "Disjoint" else: tier_type = "Unknown" if tier.IsEmpty() is True: begin = " ... " end = " ... " else: begin = str(tier.GetBeginValue()) end = str(tier.GetEndValue()) self.tier_list.InsertStringItem(tier_idx, "Tier %d" % (tier_idx + 1)) self.tier_list.SetStringItem(tier_idx, 1, tier.GetName()) self.tier_list.SetStringItem(tier_idx, 2, begin) self.tier_list.SetStringItem(tier_idx, 3, end) self.tier_list.SetStringItem(tier_idx, 4, tier_type) self.tier_list.SetStringItem(tier_idx, 5, str(tier.GetSize())) except Exception as e: self.tier_list.InsertStringItem(1, "Error: " + str(e)) # ---------------------------------------------------------------------- # Callbacks... # ---------------------------------------------------------------------- def OnListItemSelected(self, event): """ An item of this panel was clicked. Inform the parent. """ evt = PanelSelectedEvent(panel=self) evt.SetEventObject(self) wx.PostEvent(self.GetParent(), evt) # ---------------------------------------------------------------------- # GUI # ---------------------------------------------------------------------- def SetPreferences(self, prefs): """ Set new preferences. """ self._prefs = prefs self.SetBackgroundColour(self._prefs.GetValue("M_BG_COLOUR")) self.SetForegroundColour(self._prefs.GetValue("M_FG_COLOUR")) self.SetFont(self._prefs.GetValue("M_FONT")) # ------------------------------------------------------------------------- def SetFont(self, font): """ Set a new font. """ wx.Window.SetFont(self, font) self.tier_list.SetFont(font) for i in range(self._transcription.GetSize()): self.tier_list.SetItemFont(i, font) self._static_tx.SetFont(font) self._boxtitle.SetFont(font) self.Layout() # bigger/smaller font can impact on the layout # ------------------------------------------------------------------------- def SetBackgroundColour(self, color): """ Set background. """ wx.Window.SetBackgroundColour(self, color) self.tier_list.SetBackgroundColour(color) for i in range(self._transcription.GetSize()): self.tier_list.SetItemBackgroundColour(i, color) self._static_tx.SetBackgroundColour(color) self._boxtitle.SetBackgroundColour(color) self.Refresh() # ------------------------------------------------------------------------- def SetForegroundColour(self, color): """ Set foreground and items text color. """ wx.Window.SetForegroundColour(self, color) self.tier_list.SetForegroundColour(color) for i in range(self._transcription.GetSize()): self.tier_list.SetItemTextColour(i, color) self._static_tx.SetForegroundColour(color) self.Refresh() # ---------------------------------------------------------------------- # Functions... # ---------------------------------------------------------------------- def Protect(self): """ Fix the current list of tiers as protected: they won't be changed. """ self._protected = [] for i, t in enumerate(self._transcription): self._protected.append(t) self.tier_list.SetItemTextColour(i, wx.Colour(140, 10, 10)) # ------------------------------------------------------------------------- def Unprotect(self): """ Erase the list of protected tiers. """ self._protected = [] # ---------------------------------------------------------------------- def IsSelected(self, tiername, case_sensitive=False): """ Return True if the tier is selected. """ i = self._transcription.GetIndex(tiername, case_sensitive) if i != -1: return self.tier_list.IsSelected(i) return False # ---------------------------------------------------------------------- def Select(self, tiername, case_sensitive=False): """ Select tiers which name is exactly matching. """ i = self._transcription.GetIndex(tiername, case_sensitive) if i != -1: self.tier_list.Select(i, on=True) return True return False # ---------------------------------------------------------------------- def Deselect(self): #for i in range(self.tier_list.GetItemCount()): # self.tier_list.Select(i, on=0) self.tier_list.DeSelectAll() # ---------------------------------------------------------------------- def Rename(self): """ Rename the selected tier. Dialog with the user to get the new name. """ if self._transcription.GetSize() == 0: return # Get the selected tier in the list sellist = self.tier_list.GetFirstSelected() # Nothing selected if sellist == -1: return # Too many selected items if self.tier_list.GetSelectedItemCount() > 1: ShowInformation(self, self._prefs, 'Only one tier has to be checked to be renamed...', style=wx.ICON_INFORMATION) return tier = self._transcription[sellist] if tier in self._protected: ShowInformation(self, self._prefs, "Attempt to rename a protected tier: forbidden!", style=wx.ICON_INFORMATION) return # Ask the user to enter a new name dlg = wx.TextEntryDialog(self, 'Indicate the new tier name', 'Data Roamer', 'Rename a tier.') dlg.SetValue(self._transcription[sellist].GetName()) if dlg.ShowModal() == wx.ID_OK: # Update tier name of the transcription tier.SetName(dlg.GetValue()) # Update tier name of the list self.tier_list.SetStringItem(sellist, 1, dlg.GetValue()) self._dirty = True self._boxtitle.SetForegroundColour(FG_FILE_DIRTY_COLOUR) self.Refresh() dlg.Destroy() # ---------------------------------------------------------------------- def Cut(self): """ Cut the selected tier. Return the clipboard. """ if self._transcription.GetSize() == 0: return # Get the selected tier in the list sellist = self.tier_list.GetFirstSelected() # No tier selected if sellist == -1: return # Too many selected items if self.tier_list.GetSelectedItemCount() > 1: ShowInformation(self, self._prefs, 'One tier must be checked.', style=wx.ICON_INFORMATION) return # Copy the tier to the clipboard tier = self._transcription[sellist] if tier in self._protected: ShowInformation(self, self._prefs, "Attempt to cut a protected tier: forbidden!", style=wx.ICON_INFORMATION) return clipboard = tier.Copy() # Delete tier of the transcription self._transcription.Remove(sellist) # Delete tier of the list self.tier_list.DeleteItem(sellist) # Update tier numbers of next items in the list. for i in range(sellist, self.tier_list.GetItemCount()): self.tier_list.SetStringItem(i, 0, "Tier " + str(i + 1)) self.Deselect() self._checksize() self._dirty = True self._boxtitle.SetForegroundColour(FG_FILE_DIRTY_COLOUR) self.Refresh() return clipboard # ---------------------------------------------------------------------- def Copy(self): """ Return the selected tier. """ if self._transcription.GetSize() == 0: return # Get the selected tier in the list sellist = self.tier_list.GetFirstSelected() if sellist == -1: return # Too many selected items if self.tier_list.GetSelectedItemCount() > 1: ShowInformation(self, self._prefs, "One tier must be checked", style=wx.ICON_INFORMATION) return # Copy the tier to the clipboard tier = self._transcription[sellist] return tier.Copy() # ---------------------------------------------------------------------- def Paste(self, clipboard): """ Paste the clipboard tier to the current page. """ # Get the clipboard tier if clipboard is None: return # Append clipboard to the transcription tier = clipboard #.Copy() self.Append(tier) # The tier comes from another Transcription... must update infos. if not (tier.GetTranscription() is self._transcription): # parent transcription tier.SetTranscription(self._transcription) # And if CtrlVocab... # TODO self._checksize() # ---------------------------------------------------------------------- def Delete(self): """ Delete the selected tier. Dialog with the user to confirm. """ if self._transcription.GetSize() == 0: return 0 # Get the selected tier in the list of this page sellist = self.tier_list.GetFirstSelected() if sellist == -1: return 0 # Get Indexes of tiers to remove indexes = [] while sellist != -1: indexes.append(sellist) sellist = self.tier_list.GetNextSelected(sellist) # Ask the user to confirm before deleting delete = 0 message = 'Are you sure you want to definitively delete:\n' \ '%d tiers in %s?' % (len(indexes), self._filename) dlg = ShowYesNoQuestion(self, self._prefs, message) if dlg == wx.ID_YES: for sellist in reversed(sorted(indexes)): item = self.tier_list.GetItem(sellist) tier = self._transcription[sellist] if tier in self._protected: pass else: # Delete tier of the transcription self._transcription.Remove(sellist) # Delete tier of the list self.tier_list.DeleteItem(sellist) delete = delete + 1 # Update tier numbers of next items in the list. for i in range(sellist, self.tier_list.GetItemCount()): self.tier_list.SetStringItem(i, 0, str(i + 1)) self._dirty = True self._boxtitle.SetForegroundColour(FG_FILE_DIRTY_COLOUR) self.Refresh self._checksize() return delete # ---------------------------------------------------------------------- def Duplicate(self): """ Duplicate the selected tier. """ if self._transcription.GetSize() == 0: return # Get the selected tier index in the list sellist = self.tier_list.GetFirstSelected() if sellist == -1: return # Too many selected items if self.tier_list.GetSelectedItemCount() > 1: ShowInformation(self, self._prefs, "One tier must be checked", style=wx.ICON_INFORMATION) return tier = self._transcription[sellist] self.Append(tier.Copy()) # ---------------------------------------------------------------------- def MoveUp(self): """ Move up the selected tier (except for the first one). """ if self._transcription.GetSize() == 0: return # Get the selected tier in the list sellist = self.tier_list.GetFirstSelected() if sellist == -1: return # Too many selected items if self.tier_list.GetSelectedItemCount() > 1: ShowInformation(self, self._prefs, "One tier must be checked", style=wx.ICON_INFORMATION) return # tier = self._transcription[sellist] if tier in self._protected: ShowInformation(self, self._prefs, "Attempt to move a protected tier: forbidden!", style=wx.ICON_INFORMATION) return #Impossible to move up the first tier. if sellist == 0: return # Pop selected tier from transcription. try: self._transcription._hierarchy.remove_tier( self._transcription[sellist] ) # waiting a better way to work with hierarchy... except Exception: pass self._transcription.Pop(sellist) # Delete old tier of the list self.tier_list.DeleteItem(sellist) # Add tier to the transcription tierindex = self._transcription.Add(tier, sellist - 1) # Add tier to the list self.SetTierProperties(tierindex) # Update tier number self.tier_list.SetStringItem(sellist, 0, str(sellist + 1)) # Let the item selected self.tier_list.Select(sellist - 1, on=True) self._dirty = True self._boxtitle.SetForegroundColour(FG_FILE_DIRTY_COLOUR) self.Refresh() # ---------------------------------------------------------------------- def MoveDown(self): """ Move down the selected tier (except for the last one). """ if self._transcription.GetSize() == 0: return # Get the selected tier in the list sellist = self.tier_list.GetFirstSelected() if sellist == -1: return # Too many selected items if self.tier_list.GetSelectedItemCount() > 1: ShowInformation(self, self._prefs, "One tier must be checked", style=wx.ICON_INFORMATION) return # tier = self._transcription[sellist] if tier in self._protected: ShowInformation(self, self._prefs, "Attempting to move a protected tier: forbidden!", style=wx.ICON_INFORMATION) return # Impossible to move down the last tier. if (sellist + 1) == self.tier_list.GetItemCount(): return # Pop selected tier from transcription. try: self._transcription._hierarchy.remove_tier( self._transcription[sellist] ) # waiting a better way to work with hierarchy... except Exception: pass self._transcription.Pop(sellist) # Delete old tier of the list self.tier_list.DeleteItem(sellist) # Add tier to the transcription if (sellist + 1) >= self.tier_list.GetItemCount(): tierindex = self._transcription.Add(tier) else: tierindex = self._transcription.Add(tier, sellist + 1) # Add tier to the list self.SetTierProperties(tierindex) # Update tier number self.tier_list.SetStringItem(sellist, 0, "Tier " + str(sellist + 1)) self.tier_list.SetStringItem(sellist + 1, 0, "Tier " + str(tierindex + 1)) # Let the item selected self.tier_list.Select(sellist + 1, on=True) self._dirty = True self._boxtitle.SetForegroundColour(FG_FILE_DIRTY_COLOUR) self.Refresh() # ---------------------------------------------------------------------- def Radius(self): """ Fix a new radius value to all TimePoint instances of the selected tier. """ if self._transcription.GetSize() == 0: return # Get the selected tier in the list sellist = self.tier_list.GetFirstSelected() if sellist == -1: return # tier = self._transcription[sellist] if tier in self._protected: ShowInformation(self, self._prefs, "Attempt to modify a protected tier: forbidden!", style=wx.ICON_INFORMATION) return # Open a dialog to ask the new radius value radius = tier.GetBegin().GetRadius() dlg = RadiusChooser(self, self._prefs, radius) if dlg.ShowModal() == wx.ID_OK: # Get the value r = dlg.GetValue() try: r = float(r) if r > 1.0: raise ValueError('Radius must range 0-1.') except: logging.info('Radius cancelled (can not be applied: %f).' % r) return # Set the value while sellist != -1: tier.SetRadius(r) logging.debug('Radius fixed to %f' % r) sellist = self.tier_list.GetNextSelected(sellist) dlg.Destroy() # ---------------------------------------------------------------------- def Preview(self): """ Open a grid frame with the selected tier content. """ if self._transcription.GetSize() == 0: return # Get the selected tier in the list sellist = self.tier_list.GetFirstSelected() if sellist == -1: return # Too many selected items if self.tier_list.GetSelectedItemCount() > 1: ShowInformation(self, self._prefs, "One tier only must be checked", style=wx.ICON_INFORMATION) return tier = self._transcription[sellist] dlg = PreviewTierDialog(self, self._prefs, tiers=[tier]) dlg.Show() # ---------------------------------------------------------------------- def Append(self, newtier): """ Append a tier in the transcription and in the list. """ # Append tier to the transcription tierindex = self._transcription.Append(newtier) # Append tier to the list self.SetTierProperties(tierindex) # Display information self._dirty = True self._boxtitle.SetForegroundColour(FG_FILE_DIRTY_COLOUR) self.Refresh() # ---------------------------------------------------------------------- def LoadFile(self, filename): """ Load a file in memory and show it. @param filename is an annotated file. """ self._filename = filename if os.path.exists(filename) is False: self._transcription = Transcription("Empty") return try: self._transcription = sppas.src.annotationdata.aio.read(filename) self._dirty = False self._boxtitle.SetForegroundColour(FG_FILE_COLOUR) self.Refresh() except Exception as e: logging.info('Error loading file %s: %s' % (filename, str(e))) self._transcription = Transcription("IO-Error") #raise # ---------------------------------------------------------------------- def Save(self): """ Save the current page content. """ if self._dirty is False: return try: sppas.src.annotationdata.aio.write(self._filename, self._transcription) self._dirty = False self._boxtitle.SetForegroundColour(FG_FILE_COLOUR) self.Refresh() except Exception as e: # give information ShowInformation(self, self._prefs, 'File not saved: %s' % str(e), style=wx.ICON_ERROR) # ---------------------------------------------------------------------- def SaveAs(self, filename): """ Save the current page content with another file name. Keep everything un-changed in self. """ try: sppas.src.annotationdata.aio.write(filename, self._transcription) except Exception as e: # give information ShowInformation(self, self._prefs, 'File not saved: %s' % str(e), style=wx.ICON_ERROR) # ---------------------------------------------------------------------- def GetTranscription(self): """ Return the Transcription. """ return self._transcription # ---------------------------------------------------------------------- def GetTranscriptionName(self): """ Return the name of the transcription. """ return self._transcription.GetName() # ---------------------------------------------------------------------- # Private # ---------------------------------------------------------------------- def _checksize(self): """ Check the transcription size. Append an "empty line" if transcription is empty. Remove this empty line if transcription is not empty. Return True if something has changed. """ # Append an "empty" line in the ListCtrl if self._transcription.GetSize() == 0 and self.tier_list.GetItemCount( ) == 0: self.tier_list.InsertStringItem(0, " ... ") if self._transcription.GetName() == "IO-Error": self.tier_list.SetStringItem( 0, 1, " Error while reading this file ") else: self.tier_list.SetStringItem(0, 1, " Empty file: no tiers ") for i in range(2, 5): self.tier_list.SetStringItem(0, i, " ") return True # Remove the "empty" line of the ListCtrl if self._transcription.GetSize() < self.tier_list.GetItemCount(): self.tier_list.DeleteItem(self.tier_list.GetItemCount() - 1) return True return False
def tracks2transcription(self, ipustrs, ipusaudio, add_ipu_idx=False): """ Create a Transcription object from tracks. :param ipustrs: (IPUsTrs) :param ipusaudio: (IPUsAudio) :param add_ipu_idx: (bool) """ if len(self.tracks) == 0: raise IOError('No IPUs to write.\n') # Extract the info we need from IPUsAudio framerate = ipusaudio.get_channel().get_framerate() end_time = ipusaudio.get_channel().get_duration() # Extract the info we need from ipustrs try: medialist = ipustrs.trsinput.GetMedia() if len(medialist) > 0: media = medialist[0] else: media = None except Exception: media = None units = ipustrs.get_units() if len(units) != 0: if len(self.tracks) != len(units): raise Exception('Inconsistent number of tracks and units. ' 'Got %d audio tracks, and %d units.\n' % (len(self.tracks), len(units))) # Create the transcription and tiers trs = Transcription("IPU-Segmentation") tieripu = trs.NewTier("IPUs") tier = trs.NewTier("Transcription") radius = ipusaudio.get_win_length() / 8. # vagueness is win_length divided by 4 (see "refine" method of sppasChannelSilence class) # radius is vagueness divided by 2 # Convert the tracks: from frames to times tracks_times = frames2times(self.tracks, framerate) i = 0 to_prec = 0. for (from_time, to_time) in tracks_times: # From the previous track to the current track: silence if to_prec < from_time: begin = to_prec end = from_time a = Annotation( TimeInterval(TimePoint(begin, radius), TimePoint(end, radius)), Label("#")) tieripu.Append(a) tier.Append(a.Copy()) # New track with speech begin = from_time end = to_time # ... IPU tier label = "ipu_%d" % (i + 1) a = Annotation( TimeInterval(TimePoint(begin, radius), TimePoint(end, radius)), Label(label)) tieripu.Append(a) # ... Transcription tier if add_ipu_idx is False: label = "" if len(units) > 0: label = label + " " + units[i] a = Annotation( TimeInterval(TimePoint(begin, radius), TimePoint(end, radius)), Label(label)) tier.Append(a) # Go to the next i += 1 to_prec = to_time # The end is a silence? if to_prec < end_time: begin = TimePoint(to_prec, radius) end = TimePoint(end_time, radius) if begin < end: a = Annotation(TimeInterval(begin, end), Label("#")) tieripu.Append(a) tier.Append(a.Copy()) # Link both tiers: IPU and Transcription try: trs.GetHierarchy().add_link('TimeAssociation', tieripu, tier) except Exception: pass # Set media if media is not None: trs.AddMedia(media) for tier in trs: tier.SetMedia(media) return trs
def get_transcription(self, input_filename, tier_idx=None): """ Extract transcription from a file, either time-aligned or not. If input is a simple text file, it must be formatted like: - each line is supposed to be at least one unit; - each '#' symbol is considered as a unit boundary. - both can be combined. If input is a time-aligned file, the expected tier name for the transcription are: - priority: trans in the tier name; - secondary: trs, ortho, toe or ipu in the tier name. It also extracts IPUs file names if any, i.e. a tier with name "Name" or "File". :param input_filename: (str) Name of the input file :param tier_idx: (int) Force the tier index for the transcription :returns: Transcription """ if input_filename is None: return Transcription() trs_input = sppas.src.annotationdata.aio.read(input_filename) # input is a simple text file if input_filename.lower().endswith("txt"): if trs_input.GetSize() != 1: raise IOError( 'Error while reading file (expected one tier. Got %d)' % trs_input.GetSize()) return trs_input # input is a time-aligned file if tier_idx is None: trs_tier = sppasSearchTier.transcription(trs_input) else: trs_tier = trs_input[tier_idx] trs_output = Transcription("Output") if self.logfile: self.logfile.print_message("IPUs+Transcription tier found: %s" % trs_tier.GetName(), indent=3, status=INFO_ID) trs_tier.SetName('Transcription') trs_output.Append(trs_tier) # Expected IPUs file names for tier in trs_input: tier_name = tier.GetName().lower() if "name" in tier_name or "file" in tier_name: if self.logfile: self.logfile.print_message("IPUs file names found: %s" % tier.GetName(), indent=3, status=INFO_ID) tier.SetName("Name") trs_output.Append(tier) break return trs_output
# ---------------------------------------------------------------------------- # Load input data mapping = TierMapping(args.m) mapping.set_reverse(False) # from sampa to ipa direction mapping.set_keep_miss(True) # keep unknown entries as given mapping.set_miss_symbol("") # not used! mapping.set_delimiters([]) # longest matching # read content trs_input = aio.read(args.i) # ---------------------------------------------------------------------------- # Convert input file trs = Transcription(name=trs_input.GetName()+"-IPA") for n in args.n.split(','): print(" -> Tier {:s}:".format(n)) tier = trs_input.Find(n, case_sensitive=False) if tier is not None: new_tier = mapping.map_tier(tier) new_tier.SetName(n+"-IPA") new_tier.metadata = tier.metadata trs.Append(new_tier) else: print(" [IGNORED] Wrong tier name.") # Set the other members trs.metadata = trs_input.metadata
if tieridx < 0 or tieridx > trs.GetSize(): print('Error: Bad tier number.\n') sys.exit(1) tier = trs[tieridx] if not mode: mode.append(0) d = {0: 'exact', 1: 'contains', 2: 'startswith', 3: 'endswith', 4: 'regexp'} prefix = "" if "CASE_SENSITIVE" in options else "i" bools = [Sel(**{prefix + d[key]: p}) for key in mode for p in patterns] pred = functools.reduce(operator.or_, bools) pred = ~pred if "REVERSE" in options else pred filtered_annotations = filter(pred, tier) if not filtered_annotations: print("NO RESULT") sys.exit(0) filteredtier = Tier(tier.Name) for a in filtered_annotations: filteredtier.Add(a) if fileoutput is None: for a in filteredtier: print(a) else: trs = Transcription() trs.Add(filteredtier) sppas.src.annotationdata.aio.write(fileoutput, trs)
def __init__(self, name="NoName", mintime=0., maxtime=0.): """ Creates a new Phonedit Transcription instance. """ Transcription.__init__(self, name, mintime, maxtime)
def run(self, input_filename, trsoutput=None, outputfile=None): """ Apply momel from a pitch file. """ self.print_filename(input_filename) self.print_options() self.print_diagnosis(input_filename) # Get pitch values from the input pitch = self.set_pitch(input_filename) # Selected values (Target points) for this set of pitch values targets = [] # List of pitch values of one **estimated** Inter-Pausal-Unit (ipu) ipupitch = [] # Number of consecutive null F0 values nbzero = 0 # Current time value curtime = 0 # For each f0 value of the wav file for p in pitch: if p == 0: nbzero += 1 else: nbzero = 0 ipupitch.append(p) # If the number of null values exceed 300ms, # we consider this is a silence and estimate Momel # on the recorded list of pitch values of the **estimated** IPU. if (nbzero * self.PAS_TRAME) > 299: if len(ipupitch) > 0 and (len(ipupitch) > nbzero): # Estimates the real start time of the IPU ipustarttime = curtime - (len(ipupitch)) + 1 try: # It is supposed ipupitch starts at time = 0. iputargets = self.momel.annotate(ipupitch) except Exception as e: if self.logfile is not None: self.logfile.print_message( 'No Momel annotation between time ' + str(ipustarttime * 0.01) + " and " + str(curtime * 0.01) + " due to the following error: " + str(e), indent=2, status=-1) else: print("Momel Error: " + str(e)) iputargets = [] pass # Adjust time values in the targets for i in range(len(iputargets)): x = iputargets[i].get_x() iputargets[i].set_x(ipustarttime + x) # add this targets to the targets list targets = targets + iputargets del ipupitch[:] curtime += 1 # last ipu iputargets = [] if len(ipupitch) > 0 and (len(ipupitch) > nbzero): try: iputargets = self.momel.annotate(ipupitch) except Exception as e: if self.logfile is not None: self.logfile.print_message( 'No Momel annotation between time ' + str(ipustarttime * 0.01) + " and " + str(curtime * 0.01) + " due to the following error: " + str(e), indent=2, status=-1) else: print("error: " + str(e)) iputargets = [] pass ipustarttime = curtime - (len(ipupitch)) # Adjust time values for i in range(len(iputargets)): x = iputargets[i].get_x() iputargets[i].set_x(ipustarttime + x) targets = targets + iputargets # Print results and/or estimate INTSINT (if any) if trsoutput: trsm = Transcription("TrsMomel") if outputfile: momeltier = self.print_targets(targets, outputfile, trs=trsm) else: momeltier = self.print_targets(targets, output_filename=None, trs=trsm) if self.logfile is not None: self.logfile.print_message(str(len(targets)) + " targets found.", indent=2, status=3) momeltier.SetRadius( 0.005) # because one pitch estimation each 10ms... sppas.src.annotationdata.aio.write(trsoutput, trsm) elif outputfile: self.print_targets(targets, outputfile, trs=None) else: self.print_targets(targets, output_filename='STDOUT', trs=None)
# ---------------------------------------------------------------------------- # Read trsinput = sppas.src.annotationdata.aio.read(args.i) # Take all tiers or specified tiers tiersnumbs = list() if not args.t: tiersnumbs = range(1, (trsinput.GetSize() + 1)) elif args.t: tiersnumbs = args.t # ---------------------------------------------------------------------------- # Fill trsout = Transcription() for i in tiersnumbs: tier = trsinput[i - 1] tier = fill_gaps(tier, trsinput.GetMinTime(), trsinput.GetMaxTime()) ctrlvocab = tier.GetCtrlVocab() if ctrlvocab is not None: if ctrlvocab.Contains(args.f) is False: ctrlvocab.Append(args.f, descr="Filler") print "Tier: ", tier.GetName() print "Fill empty intervals with", args.f, "(and merge with previous or following if any)" tier = fct_fill(tier, args.f) print "Merge intervals during less than", args.d tier = fct_clean(tier, args.f, args.d)
def create_chunks(self, inputaudio, phontier, toktier, diralign): """ Create time-aligned tiers from raw intput tiers. :param inputaudio: (str) Name of the audio file :param phontier: (Tier) the tier with phonetization :param toktier: (Tier) the tier with tokenization to split :param diralign: (str) the working directory to store temporary data. """ trsoutput = Transcription("Chunks") # Extract the audio channel channel = autils.extract_audio_channel(inputaudio, 0) channel = autils.format_channel(channel, 16000, 2) # Extract the lists of tokens and their corresponding pronunciations pronlist = self._tier2raw(phontier, map=True).split() toklist = self._tier2raw(toktier, map=False).split() if len(pronlist) != len(toklist): raise IOError("Inconsistency between the number of items in " "phonetization %d and tokenization %d." % (len(pronlist), len(toklist))) # At a first stage, we'll find anchors. anchor_tier = AnchorTier() anchor_tier.set_duration(channel.get_duration()) anchor_tier.set_ext_delay(1.) anchor_tier.set_out_delay(0.5) # Search silences and use them as anchors. if self.SILENCES is True: anchor_tier.append_silences(channel) # Estimates the speaking rate (amount of tokens/sec. in average) self._spkrate.eval_from_duration(channel.get_duration(), len(toklist)) # Multi-pass ASR to find anchors nb_anchors = -1 # number of anchors in the preceding pass ngram = self.N # decreasing N-gram value win_length = self.W # decreasing window length while nb_anchors != anchor_tier.GetSize() and anchor_tier.check_holes_ntokens(self.NBT) is False: anchor_tier.set_win_delay(win_length) nb_anchors = anchor_tier.GetSize() logging.debug(" =========================================================== ") logging.debug(" Number of anchors: %d" % nb_anchors) logging.debug(" N-gram: %d" % ngram) logging.debug(" W-length: %d" % win_length) # perform ASR and append new anchors in the anchor tier (if any) self._asr(toklist, pronlist, anchor_tier, channel, diralign, ngram) # append the anchor tier as intermediate result if self.ANCHORS is True and nb_anchors != anchor_tier.GetSize(): Chunks._append_tier(anchor_tier, trsoutput) out_name = os.path.join(diralign, "ANCHORS-%d.xra" % anchor_tier.GetSize()) sppas.src.annotationdata.aio.write(out_name, trsoutput) # prepare next pass win_length = max(win_length-1., self.WMIN) ngram = max(ngram-1, self.NMIN) # Then, anchors are exported as tracks. tiert = anchor_tier.export(toklist) tiert.SetName("Chunks-Tokenized") tierp = anchor_tier.export(pronlist) tierp.SetName("Chunks-Phonetized") trsoutput.Append(tiert) trsoutput.Append(tierp) return trsoutput
if len(sys.argv) <= 1: sys.argv.append('-h') args = parser.parse_args() if not args.quiet: setup_logging(0, None) else: setup_logging(30, None) # ---------------------------------------------------------------------------- # Read trs_input = sppas.src.annotationdata.aio.read(args.i) trs_out = Transcription() # ---------------------------------------------------------------------------- # Transform the PhonAlign tier to a Phonetization tier try: align_tier = sppasSearchTier.aligned_phones(trs_input) logging.info("PhonAlign tier found.") phon_tier = unalign(align_tier) phon_tier.SetName("Phones") trs_out.Add(phon_tier) except IOError: logging.info("PhonAlign tier not found.") # ---------------------------------------------------------------------------- # Transform the TokensAlign tier to a Tokenization tier
if fname.endswith("-palign") is False: print("ERROR: MarsaTag plugin requires SPPAS alignment files " "(i.e. with -palign in its name).") sys.exit(1) if fext.lower() != "textgrid": # read content trs_input = aio.read(filename) tier = trs_input.Find("TokensAlign", case_sensitive=False) if tier is None: print("ERROR: A tier with name TokensAlign is required.") sys.exit(1) # write as textgrid trs = Transcription(name="TokensAlign") trs.Append(tier) filename = fname + ".TextGrid" aio.write(filename, trs) # ---------------------------------------------------------------------------- # Get MarsaTag path if len(args.p) == 0: print("ERROR: No given directory for MarsaTag software tool.") sys.exit(1) if os.path.isdir(args.p) is False: print("ERROR: {:s} is not a valid directory.".format(args.p)) sys.exit(1)
if not args.quiet: if len(phones) != len(mappings): sys.stdout.write("%s (ignored) " % phoneme.encode('utf8')) else: sys.stdout.write("%s " % phoneme.encode('utf8')) for name, value in zip(tiernames, phones): mappings[name].add(phoneme, value) if not args.quiet: print("\ndone...") # ---------------------------------------------------------------------------- # Convert input file trs = Transcription(name="PhonemesClassification") if not args.quiet: print("Classifying...") for name in mappings.keys(): if not args.quiet: print(" - {:s}".format(name)) new_tier = mappings[name].map_tier(tier) new_tier.SetName(name) trs.Append(new_tier) print("done...") # ---------------------------------------------------------------------------- # Write converted tiers if not args.quiet:
class IPUsTrs(object): """ :author: Brigitte Bigi :organization: Laboratoire Parole et Langage, Aix-en-Provence, France :contact: [email protected] :license: GPL, v3 :copyright: Copyright (C) 2011-2017 Brigitte Bigi :summary: An IPUs segmentation from an already annotated data file. """ def __init__(self, trs): """ Creates a new IPUsTrs instance. :param trs: (Transcription) Input transcription from which it's possible to extract IPUs. Expected tiers are: - first tier: the IPUs content [required] - second tier: the IPUs file names [optional] """ super(IPUsTrs, self).__init__() self._trsinput = Transcription() self._units = list() # List of the content of the units (if any) self._names = list() # List of file names for IPUs (if any) self.set_transcription(trs) # ------------------------------------------------------------------ def get_units(self): """ Return the list of the IPUs contents. """ return self._units # ------------------------------------------------------------------ def get_names(self): """ Return the list of file names for IPUs. """ return self._names # ------------------------------------------------------------------ # Manage Transcription # ------------------------------------------------------------------ def set_transcription(self, trs): """ Set a new Transcription. :param trs: (Transcription) Input transcription from which it's possible to extract IPUs. """ if trs is not None: self._trsinput = trs else: self._trsinput = Transcription() # ------------------------------------------------------------------ # Units search # ------------------------------------------------------------------ def extract_bounds(self): """ Return bound values. Bound values are boolean to know if we expect a silence at start or end of the given transcription. It is relevant only if the transcription was created from a non-aligned file. """ # False means that I DON'T know if there is a silence: # It does not mean that there IS NOT a silence. # However, True means that there is a silence, for sure! bound_start = False bound_end = False if len(self._trsinput) > 0: # Check tier tier = self._trsinput[0] if tier.GetSize() == 0: raise IOError('Got no utterances.') # Fix bounds if tier[0].GetLabel().IsSilence() is True: bound_start = True if tier[-1].GetLabel().IsSilence() is True and tier.GetSize() > 1: bound_end = True return bound_start, bound_end # ------------------------------------------------------------------ def extract(self): """ Extract units and (if any) extract tracks and silences. :returns: tracks and silences, with time as seconds. """ self._units = list() self._names = list() if self._trsinput.GetSize() == 0: return [], [] trstier = self._trsinput[0] nametier = None if self._trsinput.GetSize() == 2: nametier = self._trsinput[1] tracks = [] silences = [] if trstier.GetSize() == 0: raise IOError('Got no utterances.') if trstier[0].GetLocation().GetValue().IsTimeInterval(): (tracks, silences) = self.extract_aligned(trstier, nametier) else: self.extract_units() return tracks, silences # ------------------------------------------------------------------ def extract_units(self): """ Extract IPUs content from a non-aligned transcription file. """ self._units = [] self._names = [] tier = self._trsinput[0] if tier.GetSize() == 0: raise IOError('Got no utterances.') i = 0 for ann in tier: if ann.GetLabel().IsSilence() is False: self._units.append(ann.GetLabel().GetValue()) self._names.append("track_%.06d" % (i+1)) i += 1 # ------------------------------------------------------------------ def extract_aligned(self, trstier, nametier): """ Extract from a time-aligned transcription file. :returns: a tuple with tracks and silences lists """ trstracks = [] silences = [] self._units = list() self._names = list() i = 0 last = trstier.GetSize() while i < last: # Set the current annotation values ann = trstier[i] # Save information if ann.GetLabel().IsSilence(): start = ann.GetLocation().GetBegin().GetMidpoint() end = ann.GetLocation().GetEnd().GetMidpoint() # Verify next annotations (concatenate all silences between 2 tracks) if (i + 1) < last: nextann = trstier[i + 1] while (i + 1) < last and nextann.GetLabel().IsSilence(): end = nextann.GetLocation().GetEnd().GetMidpoint() i += 1 if (i + 1) < last: nextann = trstier[i + 1] silences.append([start, end]) else: start = ann.GetLocation().GetBegin().GetMidpoint() end = ann.GetLocation().GetEnd().GetMidpoint() trstracks.append([start, end]) self._units.append(ann.GetLabel().GetValue()) if nametier is not None: aname = nametier.Find(ann.GetLocation().GetBegin().GetMidpoint(), ann.GetLocation().GetEnd().GetMidpoint(), True) if len(aname) == 0: trstracks.pop() self._units.pop() else: sf = sppasFileUtils(aname[0].GetLabel().GetValue()) # We have to take care in case of duplicated names filename = sf.clear_whitespace() if len(filename) == 0: filename = "unnamed_track" new_name = filename idx = 2 while new_name in self._names: new_name = u"%s_%.06d" % (filename, idx) idx += 1 self._names.append(new_name) # Continue i += 1 return trstracks, silences