def writeliatxt(self, filename, t=0): """ Write an ascii file, with one tier of the Transcription. This output is the same as the output of LIA_nett script. 1 column file with tokens. Interval separated by "<s>" and "</s>" Parameters: - filename is the output file name - t is the index of the tier to write Exception: IOError Return: None """ encoding='iso8859-1' with codecs.open(filename, 'w', encoding) as fp: tier = self[t] if tier.IsEmpty(): fp.close() return if tier.IsInterval(): tier = fill_gaps(tier) tier = merge_overlapping_annotations(tier) for annotation in tier: fp.write("<s>\n") if annotation.IsInterval(): l = annotation.TextValue l = l.strip() tabl = l.split() for w in tabl: fp.write( w + "\n" ) fp.write(" ") fp.write("</s>\n")
def writecsv(self,filename): """ Write an ascii file, as csv file. Parameters: - filename is the output file name Exception: IOError, Exception Return: None """ encoding='utf-8' with codecs.open(filename, 'w', encoding) as fp: for tier in self: if tier.IsEmpty(): continue if tier.IsInterval(): tier = fill_gaps(tier) tier = merge_overlapping_annotations(tier) for annotation in tier: fp.write(' "') fp.write( tier.Name ) fp.write('";"') if annotation.IsInterval(): fp.write( str( annotation.BeginValue ) ) fp.write('";"') fp.write( str( annotation.EndValue ) ) else: fp.write( str( annotation.PointValue ) ) fp.write('";"') fp.write('";"') fp.write( annotation.TextValue ) fp.write('"\n')
def writeipulab(self, filename): """ Write an HTK lab file, segmented by IPUs. Time is represented as 100ns. Lab files are used to create MLF files; they use the following specifications: [start1 [end1]] label1 [score] {auxlabel [auxscore]} [comment] where: - [.] are optionals (0 or 1) - {.} possible repetitions (1,2,3...) Parameters: - filename is the output file name Exception: IOError, Exception Return: None """ inipu = False encoding='utf-8' with codecs.open(filename, 'w', encoding) as fp: for tier in self: if tier.IsEmpty(): continue if tier.IsInterval(): tier = fill_gaps(tier) tier = merge_overlapping_annotations(tier) for annotation in tier: if annotation.IsPoint(): __p = int(annotation.PointValue * 10000000) fp.write(str(__p)) fp.write(" ") else: __s = int(annotation.BeginValue * 10000000) __e = int(annotation.EndValue * 10000000) if annotation.IsLabel(): labstr = annotation.TextValue.strip() labstr = labstr.replace('.', ' ') tablab = labstr.split() if inipu == True: for label in tablab: fp.write(label + "\n") else: if len(tablab) < 2: fp.write(str( __s )+" ") #fp.write(str( __e )+" ") fp.write(annotation.TextValue + "\n") else: fp.write(str( __s )+" ") for label in tablab: fp.write(label + "\n") inipu = True else: fp.write(str( __s )+" ") fp.write(str( __e )+" sil\n") inipu = False
def writeinfo(self, filename, t=0): """ Write an ascii file, with one tier of the Transcription. An info file is a 5 columns file: begin_time end_time middle_time number duration Parameters: - filename is the output file name - t is the tier number Exception: IOError, Exception Return: None """ encoding='utf-8' with codecs.open(filename, 'w', encoding) as fp: tier = self[t] if tier.IsEmpty(): fp.close() return if tier.IsInterval(): tier = fill_gaps(tier) tier = merge_overlapping_annotations(tier) for annotation in tier: if annotation.IsInterval(): fp.write( str( annotation.BeginValue ) ) fp.write(" ") fp.write( str( annotation.EndValue ) ) fp.write(" ") duration = annotation.EndValue - annotation.BeginValue middle = annotation.BeginValue + ( duration / 2.0 ) fp.write(str(middle)) fp.write(" ") l = annotation.TextValue l = l.strip() tabl = l.split() fp.write( str ( len(tabl)) ) fp.write(" ") fp.write(str(duration)) fp.write("\n")
def __format_tier(self, tier, number): """ Format a tier from a transcription to the TextGrid format. @param number: The position of the tier in the list of all tiers. """ # Fill empty tiers because TextGrid does not support empty tiers. if tier.IsEmpty(): tier.Append(Annotation( TimeInterval(TimePoint(self.GetMinTime()), TimePoint(self.GetMaxTime())))) if tier.IsInterval(): tier = fill_gaps(tier, self.GetMinTime(), self.GetMaxTime()) tier = merge_overlapping_annotations(tier) result = ( ' item [%d]:\n' ' class = "%s"\n' ' name = "%s"\n' ' xmin = %f\n' ' xmax = %f\n' ' intervals: size = %s\n') % ( number, 'IntervalTier' if tier.IsInterval() else 'TextTier', tier.GetName(), tier.GetBeginValue(), tier.GetEndValue(), tier.GetSize()) if tier.IsInterval(): format_annotation = TextGrid.__format_interval_annotation else: format_annotation = TextGrid.__format_point_annotation for j, an in enumerate(tier, 1): result += format_annotation(an, j) return result