Exemple #1
0
    def writeliatxt(self, filename, t=0):
        """ Write an ascii file, with one tier of the Transcription.

            This output is the same as the output of LIA_nett script.
            1 column file with tokens. Interval separated by "<s>" and "</s>"
            Parameters:
                - filename is the output file name
                - t is the index of the tier to write
            Exception:   IOError
            Return:      None
        """
        encoding='iso8859-1'
        with codecs.open(filename, 'w', encoding) as fp:

            tier = self[t]
            if tier.IsEmpty():
                fp.close()
                return

            if tier.IsInterval():
                tier = fill_gaps(tier)
                tier = merge_overlapping_annotations(tier)

            for annotation in tier:
                fp.write("<s>\n")
                if annotation.IsInterval():
                    l = annotation.TextValue
                    l = l.strip()
                    tabl = l.split()
                    for w in tabl:
                        fp.write( w + "\n" )
                    fp.write(" ")
                fp.write("</s>\n")
Exemple #2
0
    def writecsv(self,filename):
        """ Write an ascii file, as csv file.
            Parameters:
                - filename is the output file name
            Exception:   IOError, Exception
            Return:      None
        """
        encoding='utf-8'
        with codecs.open(filename, 'w', encoding) as fp:
            for tier in self:
                if tier.IsEmpty():
                    continue

                if tier.IsInterval():
                    tier = fill_gaps(tier)
                    tier = merge_overlapping_annotations(tier)

                for annotation in tier:
                    fp.write(' "')
                    fp.write( tier.Name )
                    fp.write('";"')
                    if annotation.IsInterval():
                        fp.write( str( annotation.BeginValue ) )
                        fp.write('";"')
                        fp.write( str( annotation.EndValue ) )
                    else:
                        fp.write( str( annotation.PointValue ) )
                        fp.write('";"')
                    fp.write('";"')
                    fp.write( annotation.TextValue )
                    fp.write('"\n')
Exemple #3
0
    def writeipulab(self, filename):
        """ Write an HTK lab file, segmented by IPUs.

            Time is represented as 100ns.
            Lab files are used to create MLF files; they use the following
            specifications:
            [start1 [end1]] label1 [score] {auxlabel [auxscore]} [comment]
            where:
                - [.] are optionals (0 or 1)
                - {.} possible repetitions (1,2,3...)
            Parameters:  
                - filename is the output file name
            Exception:   IOError, Exception
            Return:      None
        """
        inipu = False
        encoding='utf-8'
        with codecs.open(filename, 'w', encoding) as fp:
            for tier in self:
                if tier.IsEmpty():
                    continue
                if tier.IsInterval():
                    tier = fill_gaps(tier)
                    tier = merge_overlapping_annotations(tier)

                for annotation in tier:
                    if annotation.IsPoint():
                        __p = int(annotation.PointValue * 10000000)
                        fp.write(str(__p))
                        fp.write(" ")
                    else:
                        __s = int(annotation.BeginValue * 10000000)
                        __e = int(annotation.EndValue * 10000000)
                        if annotation.IsLabel():
                            labstr = annotation.TextValue.strip()
                            labstr = labstr.replace('.', ' ')
                            tablab = labstr.split()
                            if inipu == True:
                                for label in tablab:
                                    fp.write(label + "\n")
                            else:
                                if len(tablab) < 2:
                                    fp.write(str( __s )+" ")
                                    #fp.write(str( __e )+" ")
                                    fp.write(annotation.TextValue + "\n")
                                else:
                                    fp.write(str( __s )+" ")
                                    for label in tablab:
                                        fp.write(label + "\n")
                            inipu = True
                        else:
                            fp.write(str( __s )+" ")
                            fp.write(str( __e )+" sil\n")
                            inipu = False
Exemple #4
0
    def writeinfo(self, filename, t=0):
        """ Write an ascii file, with one tier of the Transcription.

            An info file is a 5 columns file:
            begin_time end_time middle_time number duration
            Parameters:
                - filename is the output file name
                - t is the tier number
            Exception:   IOError, Exception
            Return:      None
        """
        encoding='utf-8'
        with codecs.open(filename, 'w', encoding) as fp:

            tier = self[t]
            if tier.IsEmpty():
                fp.close()
                return

            if tier.IsInterval():
                tier = fill_gaps(tier)
                tier = merge_overlapping_annotations(tier)


            for annotation in tier:
                if annotation.IsInterval():
                    fp.write( str( annotation.BeginValue ) )
                    fp.write(" ")
                    fp.write( str( annotation.EndValue ) )
                    fp.write(" ")
                    duration = annotation.EndValue - annotation.BeginValue
                    middle = annotation.BeginValue + ( duration / 2.0 )
                    fp.write(str(middle))
                    fp.write(" ")

                    l = annotation.TextValue
                    l = l.strip()
                    tabl = l.split()
                    fp.write( str ( len(tabl)) )
                    fp.write(" ")

                    fp.write(str(duration))
                    fp.write("\n")
Exemple #5
0
    def __format_tier(self, tier, number):
        """
        Format a tier from a transcription to the TextGrid format.
        @param number: The position of the tier in the list of all tiers.
        """
        # Fill empty tiers because TextGrid does not support empty tiers.
        if tier.IsEmpty():
            tier.Append(Annotation(
                TimeInterval(TimePoint(self.GetMinTime()),
                             TimePoint(self.GetMaxTime()))))

        if tier.IsInterval():
            tier = fill_gaps(tier, self.GetMinTime(), self.GetMaxTime())
            tier = merge_overlapping_annotations(tier)

        result = (
            '    item [%d]:\n'
            '        class = "%s"\n'
            '        name = "%s"\n'
            '        xmin = %f\n'
            '        xmax = %f\n'
            '        intervals: size = %s\n') % (
                number,
                'IntervalTier' if tier.IsInterval() else 'TextTier',
                tier.GetName(),
                tier.GetBeginValue(),
                tier.GetEndValue(),
                tier.GetSize())

        if tier.IsInterval():
            format_annotation = TextGrid.__format_interval_annotation
        else:
            format_annotation = TextGrid.__format_point_annotation

        for j, an in enumerate(tier, 1):
            result += format_annotation(an, j)
        return result