def __init__(self, spec, splitter="><", ignore_first_character=True): assert isinstance(spec, six.text_type) self.spec = spec.rstrip() self.rels = None self._tag = None if len(spec) == 0: return tag_start = 0 if ignore_first_character: tag_start = 1 tag_end = None while tag_end != -1: tag_end = self.spec.find(splitter, tag_start) kv_splitter = self.spec.find(':', tag_start, tag_end) if self.spec[tag_start:].startswith('rel '): rel = Rel(self.spec[tag_start:tag_end]) if rel.ignore == False: if self.rels is None: self.rels = [] self.rels.append(rel) elif kv_splitter == -1: key = self.spec[tag_start:tag_end] val = True # Dummy value self[key] = val else: key = self.spec[tag_start:kv_splitter] val = self.spec[kv_splitter + 1:tag_end] self[key] = val tag_start = tag_end + len(splitter)
def _extract_rel_tags(tag: Tag) -> List[Rel]: """parse tag.fstring to extract <rel> tags""" splitter = "><" rels = [] spec = tag.fstring tag_start = 1 tag_end = None while tag_end != -1: tag_end = spec.find(splitter, tag_start) if spec[tag_start:].startswith('rel '): rel = Rel(spec[tag_start:tag_end]) if rel.target: rel.target = jaconv.h2z(rel.target, digit=True) # 不特定:人1 -> 不特定:人1 if rel.atype is not None: rels.append(rel) tag_start = tag_end + len(splitter) return rels
def __init__(self, spec, splitter="><", ignore_first_character=True): dict.__init__(self) assert isinstance(spec, str) self.spec = spec.rstrip() self.pas = None self.rels = None if not spec: return tag_start = 0 if ignore_first_character: tag_start = 1 tag_end = None while tag_end != -1: tag_end = self.spec.find(splitter, tag_start) kv_splitter = self.spec.find(':', tag_start, tag_end) if self.spec[tag_start:].startswith('rel '): rel = Rel(self.spec[tag_start:tag_end]) if not rel.ignore: if self.rels is None: self.rels = [] self.rels.append(rel) elif kv_splitter == -1: key = self.spec[tag_start:tag_end] val = True # Dummy value self[key] = val else: key = self.spec[tag_start:kv_splitter] val = self.spec[kv_splitter + 1:tag_end] self[key] = val if key == '格解析結果': self.pas = Pas(val, knpstyle=True) tag_start = tag_end + len(splitter)