Exemple #1
0
    def __init__(self, spec, splitter="><", ignore_first_character=True):
        assert isinstance(spec, six.text_type)

        self.spec = spec.rstrip()
        self.rels = None
        self._tag = None
        if len(spec) == 0:
            return

        tag_start = 0
        if ignore_first_character:
            tag_start = 1
        tag_end = None
        while tag_end != -1:
            tag_end = self.spec.find(splitter, tag_start)
            kv_splitter = self.spec.find(':', tag_start, tag_end)
            if self.spec[tag_start:].startswith('rel '):
                rel = Rel(self.spec[tag_start:tag_end])
                if rel.ignore == False:
                    if self.rels is None:
                        self.rels = []
                    self.rels.append(rel)
            elif kv_splitter == -1:
                key = self.spec[tag_start:tag_end]
                val = True  # Dummy value
                self[key] = val
            else:
                key = self.spec[tag_start:kv_splitter]
                val = self.spec[kv_splitter + 1:tag_end]
                self[key] = val

            tag_start = tag_end + len(splitter)
Exemple #2
0
    def _extract_rel_tags(tag: Tag) -> List[Rel]:
        """parse tag.fstring to extract <rel> tags"""
        splitter = "><"
        rels = []
        spec = tag.fstring

        tag_start = 1
        tag_end = None
        while tag_end != -1:
            tag_end = spec.find(splitter, tag_start)
            if spec[tag_start:].startswith('rel '):
                rel = Rel(spec[tag_start:tag_end])
                if rel.target:
                    rel.target = jaconv.h2z(rel.target, digit=True)  # 不特定:人1 -> 不特定:人1
                if rel.atype is not None:
                    rels.append(rel)

            tag_start = tag_end + len(splitter)
        return rels
Exemple #3
0
    def __init__(self, spec, splitter="><", ignore_first_character=True):
        dict.__init__(self)

        assert isinstance(spec, str)

        self.spec = spec.rstrip()
        self.pas = None
        self.rels = None
        if not spec:
            return

        tag_start = 0
        if ignore_first_character:
            tag_start = 1
        tag_end = None
        while tag_end != -1:
            tag_end = self.spec.find(splitter, tag_start)
            kv_splitter = self.spec.find(':', tag_start, tag_end)
            if self.spec[tag_start:].startswith('rel '):
                rel = Rel(self.spec[tag_start:tag_end])
                if not rel.ignore:
                    if self.rels is None:
                        self.rels = []
                    self.rels.append(rel)
            elif kv_splitter == -1:
                key = self.spec[tag_start:tag_end]
                val = True  # Dummy value
                self[key] = val
            else:
                key = self.spec[tag_start:kv_splitter]
                val = self.spec[kv_splitter + 1:tag_end]
                self[key] = val

                if key == '格解析結果':
                    self.pas = Pas(val, knpstyle=True)

            tag_start = tag_end + len(splitter)