def read_block(self, stream):
        # Decide which lexical element we're in.
        lexelt_num = bisect.bisect_right(self._lexelt_starts,
                                         stream.tell()) - 1
        lexelt = self._lexelts[lexelt_num]

        instance_lines = []
        in_instance = False
        while True:
            line = stream.readline()
            if line == '':
                assert instance_lines == []
                return []

            # Start of a lexical element?
            if line.lstrip().startswith('<lexelt'):
                lexelt_num += 1
                m = re.search('item=("[^"]+"|\'[^\']+\')', line)
                assert m is not None  # <lexelt> has no 'item=...'
                lexelt = m.group(1)[1:-1]
                if lexelt_num < len(self._lexelts):
                    assert lexelt == self._lexelts[lexelt_num]
                else:
                    self._lexelts.append(lexelt)
                    self._lexelt_starts.append(stream.tell())

            # Start of an instance?
            if line.lstrip().startswith('<instance'):
                assert instance_lines == []
                in_instance = True

            # Body of an instance?
            if in_instance:
                instance_lines.append(line)

            # End of an instance?
            if line.lstrip().startswith('</instance'):
                xml_block = '\n'.join(instance_lines)
                xml_block = _fixXML(xml_block)
                inst = ElementTree.fromstring(xml_block)
                return [self._parse_instance(inst, lexelt)]
Exemple #2
0
    def read_block(self, stream):
        # Decide which lexical element we're in.
        lexelt_num = bisect.bisect_right(self._lexelt_starts, stream.tell())-1
        lexelt = self._lexelts[lexelt_num]
        
        instance_lines = []
        in_instance = False
        while True:
            line = stream.readline()
            if line == '':
                assert instance_lines == []
                return []
            
            # Start of a lexical element?
            if line.lstrip().startswith('<lexelt'):
                lexelt_num += 1
                m = re.search('item=("[^"]+"|\'[^\']+\')', line)
                assert m is not None # <lexelt> has no 'item=...'
                lexelt = m.group(1)[1:-1]
                if lexelt_num < len(self._lexelts):
                    assert lexelt == self._lexelts[lexelt_num]
                else:
                    self._lexelts.append(lexelt)
                    self._lexelt_starts.append(stream.tell())
                
            # Start of an instance?
            if line.lstrip().startswith('<instance'):
                assert instance_lines == []
                in_instance = True

            # Body of an instance?
            if in_instance:
                instance_lines.append(line)

            # End of an instance?
            if line.lstrip().startswith('</instance'):
                xml_block = '\n'.join(instance_lines)
                xml_block = _fixXML(xml_block)
                inst = ElementTree.fromstring(xml_block)
                return [self._parse_instance(inst, lexelt)]
Exemple #3
0
 def parse_from_string(self, string):
     raw = ElementTree.fromstring(string)
     self.parse_tree(raw)
Exemple #4
0
 def fromstring(s):
     return CRFInfo._read(ElementTree.fromstring(s))
Exemple #5
0
 def fromstring(s):
     return CRFInfo._read(ElementTree.fromstring(s))