def parse(self, spec): """ KNPの出力を読み取る Args: spec (str): KNP出力 """ for string in spec.split('\n'): if string.strip() == "": continue if string.startswith('#\t'): items = string.split("\t") if len(items) >= 3 and items[1] == "PAS": self._pinfos.append(items[2]) elif string.startswith('#'): if self.comment: self.comment += "\n" self.comment += string match = re.match(r'# S-ID: ?(\S*)( .+)?$', self.comment) if match: self.sid = match.group(1) if 'KNP++' in string and 'output:KNP' not in string: self.juman_format = JUMAN_FORMAT.LATTICE_TOP_ONE # TODO elif re.match(self.pattern, string): break elif string.startswith(';;'): raise Exception("Error: %s" % string) elif string.startswith('*'): bnst = Bunsetsu(string, len(self._bnst)) self._bnst.append(bnst) elif string.startswith('+'): if self.juman_format != JUMAN_FORMAT.DEFAULT: # TODO bnst = Bunsetsu(string, len(self._bnst), self.juman_format) self._bnst.append(bnst) self._bnst[-1].push_tag( Tag(string, len(self.tag_list()), self.juman_format)) elif string.startswith('!!'): synnodes = SynNodes(string) self._bnst[-1].tag_list().push_synnodes(synnodes) elif string.startswith('!') and not string.startswith('! ! !'): synnode = SynNode(string) self._bnst[-1].tag_list().push_synnode(synnode) elif string.startswith('EOS'): pass else: mrph = Morpheme(string, len(self.mrph_list()), self.juman_format) if len(self._bnst) == 0: bnst = Bunsetsu("*", len(self._bnst)) self._bnst.append(bnst) self._bnst[-1].push_mrph(mrph)
def _parse_spec(self, spec): for string in spec.split('\n'): if string.strip() == "": continue if string.startswith('#\t'): items = string.split("\t") if len(items) >= 3 and items[1] == "PAS": self._pinfos.append(items[2]) elif string.startswith('#'): self.comment += string self.comment += "\n" match = re.match(r'# S-ID:(.*?)[ $\n]', self.comment) if match: self.sid = match.group(1) if 'KNP++' in string: self.newstyle = True elif re.match(self.pattern, string): break elif string.startswith(';;'): sys.stderr.write("Error: %s\n" % string) quit(1) elif string.startswith('*'): bnst = Bunsetsu(string, len(self._bnst)) self._bnst.append(bnst) elif string.startswith('+'): if self.newstyle: bnst = Bunsetsu(string, len(self._bnst), self.newstyle) self._bnst.append(bnst) self._bnst[-1].push_tag( Tag(string, len(self.tag_list()), self.newstyle)) elif string.startswith('!!'): synnodes = SynNodes(string) self._bnst[-1].tag_list().push_synnodes(synnodes) elif string.startswith('!') and not string.startswith('! ! !'): synnode = SynNode(string) self._bnst[-1].tag_list().push_synnode(synnode) elif string.startswith('EOS'): pass else: mrph = Morpheme(string, len(self.mrph_list()), self.newstyle) if not self._bnst: bnst = Bunsetsu("*", len(self._bnst)) self._bnst.append(bnst) self._bnst[-1].push_mrph(mrph)