Beispiel #1
0
    def __build_timeslots(self):
        from operator import itemgetter
        self.timeSlotIds = OrderedDict()
        timeSlotIds = list()

        for tier in self:

            if tier.IsPoint():
                tier = point2interval(tier, ELAN_RADIUS)
            tier = merge_overlapping_annotations(tier)

            for annotation in tier:
                location = annotation.GetLocation()
                begin = round(location.GetBeginMidpoint(), 4)
                end = round(location.GetEndMidpoint(), 4)

                timeSlotIds.append((begin, annotation))
                timeSlotIds.append((end, annotation))

        # sort by time values and assign the TS
        i = 0
        for key in sorted(timeSlotIds, key=itemgetter(0)):
            i += 1
            ts = 'ts%s' % i
            self.timeSlotIds[key] = ts
Beispiel #2
0
    def __format_tier(self, tierRoot, tier):
        linguisticType = linguistic_type_from_tier(tier)

        tierRoot.set('LINGUISTIC_TYPE_REF', linguisticType)
        tierRoot.set('TIER_ID', tier.GetName())
        for key in ['DEFAULT_LOCALE', 'PARTICIPANT']:
            if key in tier.metadata.keys():
                tierRoot.set(key, tier.metadata[key])

        if tier.IsPoint():
            tier = point2interval(tier, ELAN_RADIUS)
        tier = merge_overlapping_annotations(tier)

        parentTier = self._hierarchy.get_parent(tier)
        if parentTier is not None:
            tierRoot.set('PARENT_REF', parentTier.GetName())
            self.previousRefId = None

            for annotation in tier:
                annotationRoot = ET.SubElement(tierRoot, 'ANNOTATION')
                self.__format_ref_annotation(annotationRoot, annotation, parentTier)
            del self.previousRefId

        else:
            for annotation in tier:
                annotationRoot = ET.SubElement(tierRoot, 'ANNOTATION')
                created = self.__format_alignable_annotation(annotationRoot, annotation)
                if created is False:
                    tierRoot.remove(annotationRoot)
Beispiel #3
0
    def __write_tier(self, tier, filefp):

        if tier.IsPoint():
            tier = point2interval(tier, SCLITE_RADIUS)

        for annotation in tier:
            wavname  = tier.GetMedia().url if tier.GetMedia() is not None else self.GetName()
            begin    = annotation.GetLocation().GetBeginMidpoint()
            end      = annotation.GetLocation().GetEndMidpoint()
            word = annotation.GetLabel().GetValue()

            if('speaker' not in tier.metadata):
                speaker = 'none'
            else:
                speaker = tier.metadata['speaker']

            channel = tier.GetName()

            filefp.write('%s %s %s %s %s %s\n' % (
                wavname,
                channel,
                speaker,
                begin,
                end,
                word))
Beispiel #4
0
    def write(self, filename, encoding='UTF-8'):
        """ Write an Antx file.

        :param filename:
        :param encoding:

        """
        try:
            root = ET.Element('AnnotationSystemDataSet')
            root.set('xmlns', 'http://tempuri.org/AnnotationSystemDataSet.xsd')

            # Write layers
            for tier in self:
                Antx.__format_tier(root, tier)

            # Write segments
            for tier in self:

                if tier.IsPoint():
                    tier = point2interval(tier, ANTX_RADIUS)
                tier = merge_overlapping_annotations(tier)

                for ann in tier:
                    self.__format_segment(root, tier, ann)

            # Write media
            if len(self.GetMedia()) > 0:
                for media in self.GetMedia():
                    if media:
                        Antx.__format_media(root, media)

            # Write configurations
            for key, value in ELT_REQUIRED_Configuration.items():
                Antx.__format_configuration(root, key, self.metadata.get(key, value))

            for key, value in self.metadata.items():
                if key not in ELT_REQUIRED_Configuration.keys():
                    Antx.__format_configuration(root, key, self.metadata.get(key, value))

            indent(root)

            tree = ET.ElementTree(root)
            tree.write(filename, encoding=encoding, xml_declaration=True, method="xml")
            # TODO: add standalone="yes" in the declaration
            # (but not available with ElementTree)

        except Exception:
            # import traceback
            # print(traceback.format_exc())
            raise
Beispiel #5
0
    def __write_tier(self, tier, filefp, channel):

        if tier.IsPoint():
            tier = point2interval(tier, SCLITE_RADIUS)

        for annotation in tier:
            wavname  = tier.GetMedia().url if tier.GetMedia() is not None else self.GetName()
            begin    = annotation.GetLocation().GetBeginMidpoint()
            duration = annotation.GetLocation().GetDuration().GetValue()
            word     = annotation.GetLabel().GetValue()
            score    = annotation.GetLabel().GetLabel().GetScore()

            filefp.write('%s %s %s %s %s %s\n' % (
                wavname,
                channel,
                begin,
                duration,
                word,
                score))
Beispiel #6
0
    def __build_timeslots(self):
        timevalues = []

        for tier in self:

            if tier.IsPoint():
                tier = point2interval(tier,ELAN_RADIUS)
            tier = merge_overlapping_annotations(tier)

            for annotation in tier:
                location = annotation.GetLocation()
                #What about PointTiers???????
                #TODO !!
                begin = round(location.GetBeginMidpoint(),4)
                end   = round(location.GetEndMidpoint(),4)
                if not begin in timevalues:
                    timevalues.append(begin)

                if not end in timevalues:
                    timevalues.append(end)

        self.timeSlotIds = {}
        for i,v in enumerate(timevalues):
            self.timeSlotIds[v] = 't%s' % i