Ejemplo n.º 1
0
    def __format_tier(self, tierRoot, tier):
        linguisticType = linguistic_type_from_tier(tier)

        tierRoot.set('LINGUISTIC_TYPE_REF', linguisticType)
        tierRoot.set('TIER_ID', tier.GetName())
        for key in ['DEFAULT_LOCALE', 'PARTICIPANT']:
            if key in tier.metadata.keys():
                tierRoot.set(key, tier.metadata[key])

        if tier.IsPoint():
            tier = point2interval(tier, ELAN_RADIUS)
        tier = merge_overlapping_annotations(tier)

        parentTier = self._hierarchy.get_parent(tier)
        if parentTier is not None:
            tierRoot.set('PARENT_REF', parentTier.GetName())
            self.previousRefId = None

            for annotation in tier:
                annotationRoot = ET.SubElement(tierRoot, 'ANNOTATION')
                self.__format_ref_annotation(annotationRoot, annotation, parentTier)
            del self.previousRefId

        else:
            for annotation in tier:
                annotationRoot = ET.SubElement(tierRoot, 'ANNOTATION')
                created = self.__format_alignable_annotation(annotationRoot, annotation)
                if created is False:
                    tierRoot.remove(annotationRoot)
Ejemplo n.º 2
0
    def __build_timeslots(self):
        from operator import itemgetter
        self.timeSlotIds = OrderedDict()
        timeSlotIds = list()

        for tier in self:

            if tier.IsPoint():
                tier = point2interval(tier, ELAN_RADIUS)
            tier = merge_overlapping_annotations(tier)

            for annotation in tier:
                location = annotation.GetLocation()
                begin = round(location.GetBeginMidpoint(), 4)
                end = round(location.GetEndMidpoint(), 4)

                timeSlotIds.append((begin, annotation))
                timeSlotIds.append((end, annotation))

        # sort by time values and assign the TS
        i = 0
        for key in sorted(timeSlotIds, key=itemgetter(0)):
            i += 1
            ts = 'ts%s' % i
            self.timeSlotIds[key] = ts
Ejemplo n.º 3
0
    def writeliatxt(self, filename, t=0):
        """ Write an ascii file, with one tier of the Transcription.

            This output is the same as the output of LIA_nett script.
            1 column file with tokens. Interval separated by "<s>" and "</s>"
            Parameters:
                - filename is the output file name
                - t is the index of the tier to write
            Exception:   IOError
            Return:      None
        """
        encoding='iso8859-1'
        with codecs.open(filename, 'w', encoding) as fp:

            tier = self[t]
            if tier.IsEmpty():
                fp.close()
                return

            if tier.IsInterval():
                tier = fill_gaps(tier)
                tier = merge_overlapping_annotations(tier)

            for annotation in tier:
                fp.write("<s>\n")
                if annotation.IsInterval():
                    l = annotation.TextValue
                    l = l.strip()
                    tabl = l.split()
                    for w in tabl:
                        fp.write( w + "\n" )
                    fp.write(" ")
                fp.write("</s>\n")
Ejemplo n.º 4
0
    def writecsv(self,filename):
        """ Write an ascii file, as csv file.
            Parameters:
                - filename is the output file name
            Exception:   IOError, Exception
            Return:      None
        """
        encoding='utf-8'
        with codecs.open(filename, 'w', encoding) as fp:
            for tier in self:
                if tier.IsEmpty():
                    continue

                if tier.IsInterval():
                    tier = fill_gaps(tier)
                    tier = merge_overlapping_annotations(tier)

                for annotation in tier:
                    fp.write(' "')
                    fp.write( tier.Name )
                    fp.write('";"')
                    if annotation.IsInterval():
                        fp.write( str( annotation.BeginValue ) )
                        fp.write('";"')
                        fp.write( str( annotation.EndValue ) )
                    else:
                        fp.write( str( annotation.PointValue ) )
                        fp.write('";"')
                    fp.write('";"')
                    fp.write( annotation.TextValue )
                    fp.write('"\n')
Ejemplo n.º 5
0
    def writeipulab(self, filename):
        """ Write an HTK lab file, segmented by IPUs.

            Time is represented as 100ns.
            Lab files are used to create MLF files; they use the following
            specifications:
            [start1 [end1]] label1 [score] {auxlabel [auxscore]} [comment]
            where:
                - [.] are optionals (0 or 1)
                - {.} possible repetitions (1,2,3...)
            Parameters:  
                - filename is the output file name
            Exception:   IOError, Exception
            Return:      None
        """
        inipu = False
        encoding='utf-8'
        with codecs.open(filename, 'w', encoding) as fp:
            for tier in self:
                if tier.IsEmpty():
                    continue
                if tier.IsInterval():
                    tier = fill_gaps(tier)
                    tier = merge_overlapping_annotations(tier)

                for annotation in tier:
                    if annotation.IsPoint():
                        __p = int(annotation.PointValue * 10000000)
                        fp.write(str(__p))
                        fp.write(" ")
                    else:
                        __s = int(annotation.BeginValue * 10000000)
                        __e = int(annotation.EndValue * 10000000)
                        if annotation.IsLabel():
                            labstr = annotation.TextValue.strip()
                            labstr = labstr.replace('.', ' ')
                            tablab = labstr.split()
                            if inipu == True:
                                for label in tablab:
                                    fp.write(label + "\n")
                            else:
                                if len(tablab) < 2:
                                    fp.write(str( __s )+" ")
                                    #fp.write(str( __e )+" ")
                                    fp.write(annotation.TextValue + "\n")
                                else:
                                    fp.write(str( __s )+" ")
                                    for label in tablab:
                                        fp.write(label + "\n")
                            inipu = True
                        else:
                            fp.write(str( __s )+" ")
                            fp.write(str( __e )+" sil\n")
                            inipu = False
Ejemplo n.º 6
0
    def write(self, filename, encoding='UTF-8'):
        """ Write an Antx file.

        :param filename:
        :param encoding:

        """
        try:
            root = ET.Element('AnnotationSystemDataSet')
            root.set('xmlns', 'http://tempuri.org/AnnotationSystemDataSet.xsd')

            # Write layers
            for tier in self:
                Antx.__format_tier(root, tier)

            # Write segments
            for tier in self:

                if tier.IsPoint():
                    tier = point2interval(tier, ANTX_RADIUS)
                tier = merge_overlapping_annotations(tier)

                for ann in tier:
                    self.__format_segment(root, tier, ann)

            # Write media
            if len(self.GetMedia()) > 0:
                for media in self.GetMedia():
                    if media:
                        Antx.__format_media(root, media)

            # Write configurations
            for key, value in ELT_REQUIRED_Configuration.items():
                Antx.__format_configuration(root, key, self.metadata.get(key, value))

            for key, value in self.metadata.items():
                if key not in ELT_REQUIRED_Configuration.keys():
                    Antx.__format_configuration(root, key, self.metadata.get(key, value))

            indent(root)

            tree = ET.ElementTree(root)
            tree.write(filename, encoding=encoding, xml_declaration=True, method="xml")
            # TODO: add standalone="yes" in the declaration
            # (but not available with ElementTree)

        except Exception:
            # import traceback
            # print(traceback.format_exc())
            raise
Ejemplo n.º 7
0
    def writeinfo(self, filename, t=0):
        """ Write an ascii file, with one tier of the Transcription.

            An info file is a 5 columns file:
            begin_time end_time middle_time number duration
            Parameters:
                - filename is the output file name
                - t is the tier number
            Exception:   IOError, Exception
            Return:      None
        """
        encoding='utf-8'
        with codecs.open(filename, 'w', encoding) as fp:

            tier = self[t]
            if tier.IsEmpty():
                fp.close()
                return

            if tier.IsInterval():
                tier = fill_gaps(tier)
                tier = merge_overlapping_annotations(tier)


            for annotation in tier:
                if annotation.IsInterval():
                    fp.write( str( annotation.BeginValue ) )
                    fp.write(" ")
                    fp.write( str( annotation.EndValue ) )
                    fp.write(" ")
                    duration = annotation.EndValue - annotation.BeginValue
                    middle = annotation.BeginValue + ( duration / 2.0 )
                    fp.write(str(middle))
                    fp.write(" ")

                    l = annotation.TextValue
                    l = l.strip()
                    tabl = l.split()
                    fp.write( str ( len(tabl)) )
                    fp.write(" ")

                    fp.write(str(duration))
                    fp.write("\n")
Ejemplo n.º 8
0
    def __format_tier(self, tier, number):
        """
        Format a tier from a transcription to the TextGrid format.
        @param number: The position of the tier in the list of all tiers.
        """
        # Fill empty tiers because TextGrid does not support empty tiers.
        if tier.IsEmpty():
            tier.Append(Annotation(
                TimeInterval(TimePoint(self.GetMinTime()),
                             TimePoint(self.GetMaxTime()))))

        if tier.IsInterval():
            tier = fill_gaps(tier, self.GetMinTime(), self.GetMaxTime())
            tier = merge_overlapping_annotations(tier)

        result = (
            '    item [%d]:\n'
            '        class = "%s"\n'
            '        name = "%s"\n'
            '        xmin = %f\n'
            '        xmax = %f\n'
            '        intervals: size = %s\n') % (
                number,
                'IntervalTier' if tier.IsInterval() else 'TextTier',
                tier.GetName(),
                tier.GetBeginValue(),
                tier.GetEndValue(),
                tier.GetSize())

        if tier.IsInterval():
            format_annotation = TextGrid.__format_interval_annotation
        else:
            format_annotation = TextGrid.__format_point_annotation

        for j, an in enumerate(tier, 1):
            result += format_annotation(an, j)
        return result
Ejemplo n.º 9
0
    def __build_timeslots(self):
        timevalues = []

        for tier in self:

            if tier.IsPoint():
                tier = point2interval(tier,ELAN_RADIUS)
            tier = merge_overlapping_annotations(tier)

            for annotation in tier:
                location = annotation.GetLocation()
                #What about PointTiers???????
                #TODO !!
                begin = round(location.GetBeginMidpoint(),4)
                end   = round(location.GetEndMidpoint(),4)
                if not begin in timevalues:
                    timevalues.append(begin)

                if not end in timevalues:
                    timevalues.append(end)

        self.timeSlotIds = {}
        for i,v in enumerate(timevalues):
            self.timeSlotIds[v] = 't%s' % i