Python render_labeled_spanの例

プログラミング言語: Python

名前空間/パッケージ名: hanlp_common.visualization

メソッド/関数: render_labeled_span

hotexamples.comのコード掲載数: 2

Python render_labeled_span - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのhanlp_common.visualization.render_labeled_spanの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: document.py プロジェクト: lei1993/HanLP

    def to_pretty(self,
                  tok='tok',
                  lem='lem',
                  pos='pos',
                  dep='dep',
                  sdp='sdp',
                  ner='ner',
                  srl='srl',
                  con='con',
                  show_header=True,
                  html=False) -> Union[str, List[str]]:
        """
        Convert to a pretty text representation which can be printed to visualize linguistic structures.

        Args:
            tok: Token key.
            lem: Lemma key.
            pos: Part-of-speech key.
            dep: Dependency parse tree key.
            sdp: Semantic dependency tree/graph key. SDP visualization has not been implemented yet.
            ner: Named entity key.
            srl: Semantic role labeling key.
            con: Constituency parsing key.
            show_header: ``True`` to include a header which indicates each field with its name.
            html: ``True`` to output HTML format so that non-ASCII characters can align correctly.

        Returns:
            A pretty string.

        """
        results = []
        tok = prefix_match(tok, self)
        pos = prefix_match(pos, self)
        ner = prefix_match(ner, self)
        conlls = self.to_conll(tok, lem, pos, dep, sdp)
        flat = isinstance(conlls, CoNLLSentence)
        if flat:
            conlls: List[CoNLLSentence] = [conlls]

        def condense(block_, extras_=None):
            text_ = make_table(block_, insert_header=False)
            text_ = [x.split('\t', 1) for x in text_.split('\n')]
            text_ = [[x[0], x[1].replace('\t', '')] for x in text_]
            if extras_:
                for r, s in zip(extras_, text_):
                    r.extend(s)
            return text_

        for i, conll in enumerate(conlls):
            conll: CoNLLSentence = conll
            tokens = [x.form for x in conll]
            length = len(conll)
            extras = [[] for j in range(length + 1)]
            if ner in self:
                ner_samples = self[ner]
                if flat:
                    ner_samples = [ner_samples]
                ner_per_sample = ner_samples[i]
                # For nested NER, use the longest span
                start_offsets = [None for i in range(length)]
                for ent, label, b, e in ner_per_sample:
                    if not start_offsets[b] or e > start_offsets[b][-1]:
                        start_offsets[b] = (ent, label, b, e)
                ner_per_sample = [y for y in start_offsets if y]
                header = ['Token', 'NER', 'Type']
                block = [[] for _ in range(length + 1)]
                _ner = []
                _type = []
                offset = 0
                for ent, label, b, e in ner_per_sample:
                    render_labeled_span(b, e, _ner, _type, label, offset)
                    offset = e
                if offset != length:
                    _ner.extend([''] * (length - offset))
                    _type.extend([''] * (length - offset))
                if any(_type):
                    block[0].extend(header)
                    for j, (_s, _t) in enumerate(zip(_ner, _type)):
                        block[j + 1].extend((tokens[j], _s, _t))
                    text = condense(block, extras)

            if srl in self:
                srl_samples = self[srl]
                if flat:
                    srl_samples = [srl_samples]
                srl_per_sample = srl_samples[i]
                for k, pas in enumerate(srl_per_sample):
                    if not pas:
                        continue
                    block = [[] for _ in range(length + 1)]
                    header = ['Token', 'SRL', f'PA{k + 1}']
                    _srl = []
                    _type = []
                    offset = 0
                    p_index = None
                    for _, label, b, e in pas:
                        render_labeled_span(b, e, _srl, _type, label, offset)
                        offset = e
                        if label == PRED:
                            p_index = b
                    if len(_srl) != length:
                        _srl.extend([''] * (length - offset))
                        _type.extend([''] * (length - offset))
                    if p_index is not None:
                        _srl[p_index] = '╟──►'
                        # _type[j] = 'V'
                        if len(block) != len(_srl) + 1:
                            # warnings.warn(f'Unable to visualize overlapped spans: {pas}')
                            continue
                        block[0].extend(header)
                        while len(_srl) < length:
                            _srl.append('')
                        while len(_type) < length:
                            _type.append('')
                        for j, (_s, _t) in enumerate(zip(_srl, _type)):
                            block[j + 1].extend((tokens[j], _s, _t))
                    text = condense(block, extras)
            if con in self:
                con_samples: Tree = self[con]
                if flat:
                    con_samples: List[Tree] = [con_samples]
                tree = con_samples[i]
                block = [[] for _ in range(length + 1)]
                block[0].extend(('Token', 'PoS'))
                for j, t in enumerate(tree.pos()):
                    block[j + 1].extend(t)

                for height in range(
                        2,
                        tree.height() + (0 if len(tree) == 1 else 1)):
                    offset = 0
                    spans = []
                    labels = []
                    for k, subtree in enumerate(
                            tree.subtrees(lambda x: x.height() == height)):
                        subtree: Tree = subtree
                        b, e = offset, offset + len(subtree.leaves())
                        if height >= 3:
                            b, e = subtree[0].center, subtree[-1].center + 1
                        subtree.center = b + (e - b) // 2
                        render_labeled_span(b,
                                            e,
                                            spans,
                                            labels,
                                            subtree.label(),
                                            offset,
                                            unidirectional=True)
                        offset = e
                    if len(spans) != length:
                        spans.extend([''] * (length - len(spans)))
                    if len(labels) != length:
                        labels.extend([''] * (length - len(labels)))
                    if height < 3:
                        continue
                    block[0].extend(['', f'{height}'])
                    for j, (_s, _t) in enumerate(zip(spans, labels)):
                        block[j + 1].extend((_s, _t))
                    # check short arrows and increase their length
                    for j, arrow in enumerate(spans):
                        if not arrow:
                            # -1 current tag ; -2 arrow to current tag ; -3 = prev tag ; -4 = arrow to prev tag
                            if block[j + 1][-3] or block[j + 1][-4] == '───►':
                                if height > 3:
                                    if block[j + 1][-3]:
                                        block[j + 1][-1] = block[j + 1][-3]
                                        block[j + 1][-2] = '───►'
                                    else:
                                        block[j + 1][-1] = '────'
                                        block[j + 1][-2] = '────'
                                    block[j + 1][-3] = '────'
                                    if block[j + 1][-4] == '───►':
                                        block[j + 1][-4] = '────'
                                else:
                                    block[j + 1][-1] = '────'
                                if block[j + 1][-1] == '────':
                                    block[j + 1][-2] = '────'
                                if not block[j + 1][-4]:
                                    block[j + 1][-4] = '────'
                # If the root label is shorter than the level number, extend it to the same length
                level_len = len(block[0][-1])
                for row in block[1:]:
                    if row[-1] and len(row[-1]) < level_len:
                        row[-1] = row[-1] + ' ' * (level_len - len(row[-1]))

                text = condense(block)
                # Cosmetic issues
                for row in text[1:]:
                    while '  ─' in row[1]:
                        row[1] = row[1].replace('  ─', ' ──')
                    row[1] = row[1].replace('─ ─', '───')
                    row[1] = re.sub(
                        r'([►─])([\w-]*)(\s+)([│├])', lambda m:
                        f'{m.group(1)}{m.group(2)}{"─" * len(m.group(3))}{"┤" if m.group(4) == "│" else "┼"}',
                        row[1])
                    row[1] = re.sub(r'►(─+)►', r'─\1►', row[1])
                for r, s in zip(extras, text):
                    r.extend(s)
            # warnings.warn('Unable to visualize non-projective trees.')
            if dep in self and conll.projective:
                text = conll.to_tree(extras)
                if not show_header:
                    text = text.split('\n')
                    text = '\n'.join(text[2:])
                results.append(text)
            elif any(extras):
                results.append(make_table(extras, insert_header=True))
            else:
                results.append(' '.join([
                    '/'.join(str(f) for f in x.nonempty_fields) for x in conll
                ]))
        if html:

            def to_html(pretty_text: str) -> str:
                lines = [x for x in pretty_text.split('\n') if x]
                cells = []
                for line in lines:
                    cells.append(line.split('\t'))

                num_cols = len(cells[0])
                cols = []

                for i in range(num_cols):
                    cols.append([])
                    for row in cells:
                        cols[-1].append(row[i])

                html = '<div style="display: table; padding-bottom: 1rem;">'
                for i, each in enumerate(cols):
                    html += '<pre style="display: table-cell; font-family: SFMono-Regular,Menlo,Monaco,Consolas,' \
                            'Liberation Mono,Courier New,monospace; white-space: nowrap; line-height: 128%; padding: 0;">'
                    if i != len(cols) - 1:
                        each = [x + ' ' for x in each]
                    html += '<br>'.join(
                        [x.replace(' ', '&nbsp;') for x in each])
                    html += '</pre>'
                html += '</div>'
                return html

            results = [to_html(x) for x in results]
        if flat:
            return results[0]
        return results

コード例 #2

ファイルを表示

    def to_pretty(self, tok='tok', lem='lem', pos='pos', dep='dep', sdp='sdp', ner='ner', srl='srl', con='con',
                  show_header=True) -> str:
        """
        Convert to a pretty text representation which can be printed to visualize linguistics structures.

        Args:
            tok: Token key.
            lem: Lemma key.
            pos: Part-of-speech key.
            dep: Dependency parse tree key.
            sdp: Semantic dependency tree/graph key. SDP visualization has not been implemented yet.
            ner: Named entity key.
            srl: Semantic role labeling key.
            con: Constituency parsing key.
            show_header: ``True`` to print a header which indicates each field with its name.

        Returns:
            A pretty string.

        """
        results = []
        tok = prefix_match(tok, self)
        pos = prefix_match(pos, self)
        ner = prefix_match(ner, self)
        conlls = self.to_conll(tok, lem, pos, dep, sdp)
        flat = isinstance(conlls, CoNLLSentence)
        if flat:
            conlls: List[CoNLLSentence] = [conlls]

        def condense(block_, extras_=None):
            text_ = make_table(block_, insert_header=False)
            text_ = [x.split('\t', 1) for x in text_.split('\n')]
            text_ = [[x[0], x[1].replace('\t', '')] for x in text_]
            if extras_:
                for r, s in zip(extras_, text_):
                    r.extend(s)
            return text_

        for i, conll in enumerate(conlls):
            conll: CoNLLSentence = conll
            tokens = [x.form for x in conll]
            length = len(conll)
            extras = [[] for j in range(length + 1)]
            if ner in self:
                ner_samples = self[ner]
                if flat:
                    ner_samples = [ner_samples]
                ner_per_sample = ner_samples[i]
                # For nested NER, use the longest span
                start_offsets = [None for i in range(length)]
                for ent, label, b, e in ner_per_sample:
                    if not start_offsets[b] or e > start_offsets[b][-1]:
                        start_offsets[b] = (ent, label, b, e)
                ner_per_sample = [y for y in start_offsets if y]
                header = ['Tok', 'NER', 'Type']
                block = [[] for _ in range(length + 1)]
                _ner = []
                _type = []
                offset = 0
                for ent, label, b, e in ner_per_sample:
                    render_labeled_span(b, e, _ner, _type, label, offset)
                    offset = e
                if offset != length:
                    _ner.extend([''] * (length - offset))
                    _type.extend([''] * (length - offset))
                if any(_type):
                    block[0].extend(header)
                    for j, (_s, _t) in enumerate(zip(_ner, _type)):
                        block[j + 1].extend((tokens[j], _s, _t))
                    text = condense(block, extras)

            if srl in self:
                srl_samples = self[srl]
                if flat:
                    srl_samples = [srl_samples]
                srl_per_sample = srl_samples[i]
                for k, pas in enumerate(srl_per_sample):
                    if not pas:
                        continue
                    block = [[] for _ in range(length + 1)]
                    header = ['Tok', 'SRL', f'PA{k + 1}']
                    _srl = []
                    _type = []
                    offset = 0
                    p_index = None
                    for _, label, b, e in pas:
                        render_labeled_span(b, e, _srl, _type, label, offset)
                        offset = e
                        if label == PRED:
                            p_index = b
                    if len(_srl) != length:
                        _srl.extend([''] * (length - offset))
                        _type.extend([''] * (length - offset))
                    if p_index is not None:
                        _srl[p_index] = '╟──►'
                        # _type[j] = 'V'
                        if len(block) != len(_srl) + 1:
                            warnings.warn(f'Unable to visualize overlapped spans: {pas}')
                            continue
                        block[0].extend(header)
                        for j, (_s, _t) in enumerate(zip(_srl, _type)):
                            block[j + 1].extend((tokens[j], _s, _t))
                    text = condense(block, extras)
            if con in self:
                con_samples: Tree = self[con]
                if flat:
                    con_samples: List[Tree] = [con_samples]
                tree = con_samples[i]
                block = [[] for _ in range(length + 1)]
                block[0].extend(('Tok', 'PoS'))
                for j, t in enumerate(tree.pos()):
                    block[j + 1].extend(t)

                for height in range(2, tree.height()):
                    offset = 0
                    spans = []
                    labels = []
                    for k, subtree in enumerate(tree.subtrees(lambda x: x.height() == height)):
                        subtree: Tree = subtree
                        b, e = offset, offset + len(subtree.leaves())
                        if height >= 3:
                            b, e = subtree[0].center, subtree[-1].center + 1
                        subtree.center = b + (e - b) // 2
                        render_labeled_span(b, e, spans, labels, subtree.label(), offset, unidirectional=True)
                        offset = e
                    if len(spans) != length:
                        spans.extend([''] * (length - len(spans)))
                    if len(labels) != length:
                        labels.extend([''] * (length - len(labels)))
                    if height < 3:
                        continue
                    block[0].extend(['', f'{height}'])
                    for j, (_s, _t) in enumerate(zip(spans, labels)):
                        block[j + 1].extend((_s, _t))
                    # check short arrows and increase their length
                    for j, arrow in enumerate(spans):
                        if not arrow:
                            # -1 current tag ; -2 arrow to current tag ; -3 = prev tag ; -4 = arrow to prev tag
                            if block[j + 1][-3] or block[j + 1][-4] == '───►':
                                if height > 3:
                                    if block[j + 1][-3]:
                                        block[j + 1][-1] = block[j + 1][-3]
                                        block[j + 1][-2] = '───►'
                                    else:
                                        block[j + 1][-1] = '────'
                                        block[j + 1][-2] = '────'
                                    block[j + 1][-3] = '────'
                                    if block[j + 1][-4] == '───►':
                                        block[j + 1][-4] = '────'
                                else:
                                    block[j + 1][-1] = '────'
                                if block[j + 1][-1] == '────':
                                    block[j + 1][-2] = '────'
                                if not block[j + 1][-4]:
                                    block[j + 1][-4] = '────'

                text = condense(block)
                # Cosmetic issues
                for row in text:
                    while '  ─' in row[1]:
                        row[1] = row[1].replace('  ─', ' ──')
                    row[1] = row[1].replace('─  │', '───┤')
                    row[1] = row[1].replace('─  ├', '───┼')
                    row[1] = re.sub(r'►(\w+)(\s+)([│├])', lambda
                        m: f'►{m.group(1)}{"─" * len(m.group(2))}{"┤" if m.group(3) == "│" else "┼"}', row[1])
                    row[1] = re.sub(r'►(─+)►', r'─\1►', row[1])
                for r, s in zip(extras, text):
                    r.extend(s)
            # warnings.warn('Unable to visualize non-projective trees.')
            if dep in self and conll.projective:
                text = conll.to_tree(extras)
                if not show_header:
                    text = text.split('\n')
                    text = '\n'.join(text[2:])
                results.append(text)
            elif any(extras):
                results.append(make_table(extras, insert_header=True))
            else:
                results.append(' '.join(['/'.join(str(f) for f in x.nonempty_fields) for x in conll]))
        if flat:
            return results[0]
        return results