Python DocWeightedSpans Exemples, eli5.base.DocWeightedSpans Python Exemples

Exemple #1

0

Afficher le fichier

def test_weighted_spans_feature_union():
    doc = {'text': 'I see: a leaning lemon tree', 'url': 'http://example.com'}
    vec = FeatureUnion([
        ('text',
         CountVectorizer(analyzer='word',
                         preprocessor=lambda x: x['text'].lower())),
        ('url',
         CountVectorizer(analyzer='char',
                         ngram_range=(4, 4),
                         preprocessor=lambda x: x['url'])),
    ])
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(pos=[
            FW('text__see', 2),
            FW('text__lemon', 4),
            FW('bias', 8),
            FW('url__ampl', 10),
            FW('url__mple', 7),
        ],
                       neg=[
                           FW('text__tree', -6),
                           FW('url__exam', -10),
                       ],
                       neg_remaining=10))
    assert w_spans == WeightedSpans(
        [
            DocWeightedSpans(
                document='i see: a leaning lemon tree',
                spans=[('see', [(2, 5)], 2), ('lemon', [(17, 22)], 4),
                       ('tree', [(23, 27)], -6)],
                preserve_density=False,
                vec_name='text',
            ),
            DocWeightedSpans(
                document='http://example.com',
                spans=[('exam', [(7, 11)], -10), ('ampl', [(9, 13)], 10),
                       ('mple', [(10, 14)], 7)],
                preserve_density=True,
                vec_name='url',
            ),
        ],
        other=FeatureWeights(
            pos=[
                FW('bias', 8),
                FW(FormattedFeatureName('url: Highlighted in text (sum)'), 7),
                FW(FormattedFeatureName('text: Highlighted in text (sum)'), 0),
            ],
            neg=[],
            neg_remaining=10,
        ))

Exemple #2

0

Afficher le fichier

Fichier : _vectorizer.py Projet : phillipparr/NER

    def get_doc_weighted_spans(
            self,
            doc,  # type: str
            feature_weights,  # type: FeatureWeights
            feature_fn  # type: Callable[[str], str]
    ):
        # type: (...) -> Tuple[Dict[Tuple[str, int], float], DocWeightedSpans]
        feature_weights_dict = _get_feature_weights_dict(
            feature_weights, feature_fn)
        spans = []
        found_features = {}
        for idx, (span, feature) in enumerate(self.text_.spans_and_tokens):
            featname = self._featname(idx, feature)
            if featname not in feature_weights_dict:
                continue
            weight, key = feature_weights_dict[featname]
            spans.append((feature, [span], weight))
            found_features[key] = weight

        doc_weighted_spans = DocWeightedSpans(
            document=doc,
            spans=spans,
            preserve_density=False,
        )
        return found_features, doc_weighted_spans

Exemple #3

0

Afficher le fichier

Fichier : test_formatters_html.py Projet : zevcc-gh/eli5

def test_render_weighted_spans_word():
    weighted_spans = DocWeightedSpans(
        document='i see: a leaning lemon tree',
        spans=[
            ('see', [(2, 5)], 0.2),
            ('tree', [(23, 27)], -0.6),
            ('leaning lemon', [(9, 16), (17, 22)], 0.5),
            ('lemon tree', [(17, 22), (23, 27)], 0.8)],
        preserve_density=False,
    )
    s = _render_weighted_spans(weighted_spans)
    print(s)
    assert s.startswith(
        '<span style="opacity: 0.80">i </span>'
        '<span'
        ' style="background-color: hsl(120, 100.00%, 89.21%); opacity: 0.83"'
        ' title="0.200">see</span>'
    )
    s_without_styles = re.sub('style=".*?"', '', s)
    assert s_without_styles == (
         '<span >i </span>'
         '<span  title="0.200">see</span>'
         '<span >: a </span>'
         '<span  title="0.500">leaning</span>'
         '<span > </span>'
         '<span  title="1.300">lemon</span>'
         '<span > </span>'
         '<span  title="0.200">tree</span>'
    )

Exemple #4

0

Afficher le fichier

def test_unhashed_features_other():
    """ Check that when there are several candidates, they do not appear in "other"
    if at least one is found. If none are found, they should appear in "other"
    together.
    """
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='char', ngram_range=(3, 3))
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(
            pos=[
                FW([{
                    'name': 'foo',
                    'sign': 1
                }, {
                    'name': 'see',
                    'sign': -1
                }], 2),
                FW([{
                    'name': 'zoo',
                    'sign': 1
                }, {
                    'name': 'bar',
                    'sign': 1
                }], 3),
            ],
            neg=[
                FW([{
                    'name': 'ree',
                    'sign': 1
                }, {
                    'name': 'tre',
                    'sign': 1
                }], -4),
            ],
        ))
    assert w_spans == WeightedSpans([
        DocWeightedSpans(
            document='i see: a leaning lemon tree',
            spans=[
                ('see', [(2, 5)], 2),
                ('tre', [(23, 26)], -4),
                ('ree', [(24, 27)], -4),
            ],
            preserve_density=True,
        )
    ],
                                    other=FeatureWeights(
                                        pos=[
                                            FW([{
                                                'name': 'zoo',
                                                'sign': 1
                                            }, {
                                                'name': 'bar',
                                                'sign': 1
                                            }], 3),
                                        ],
                                        neg=[FW(hl_in_text, -2)],
                                    ))

Exemple #5

0

Afficher le fichier

def test_weighted_spans_word():
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='word')
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(pos=[FW('see', 2),
                            FW('lemon', 4),
                            FW('bias', 8)],
                       neg=[FW('tree', -6)],
                       neg_remaining=10))
    assert w_spans == WeightedSpans([
        DocWeightedSpans(
            document='i see: a leaning lemon tree',
            spans=[('see', [(2, 5)], 2), ('lemon', [(17, 22)], 4),
                   ('tree', [(23, 27)], -6)],
            preserve_density=False,
        )
    ],
                                    other=FeatureWeights(
                                        pos=[FW('bias', 8),
                                             FW(hl_in_text, 0)],
                                        neg=[],
                                        neg_remaining=10,
                                    ))

Exemple #6

0

Afficher le fichier

def test_no_weighted_spans():
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='char', ngram_range=(3, 4))
    vec.fit([doc])
    w_spans = get_weighted_spans(doc, vec, FeatureWeights(pos=[], neg=[]))
    assert w_spans == WeightedSpans([
        DocWeightedSpans(
            document='i see: a leaning lemon tree',
            spans=[],
            preserve_density=True,
        )
    ],
                                    other=FeatureWeights(pos=[], neg=[]))

Exemple #7

0

Afficher le fichier

def test_override_preserve_density():
    weighted_spans = DocWeightedSpans(
        document='see',
        spans=[
            ('se', [(0, 2)], 0.2),
            ('ee', [(1, 3)], 0.1),
        ],
        preserve_density=True,
    )
    s = _render_weighted_spans(weighted_spans, preserve_density=False)
    assert s.startswith(
        '<span '
        'style="background-color: hsl(120, 100.00%, 69.88%); opacity: 0.93" '
        'title="0.200">s</span>')

Exemple #8

0

Afficher le fichier

Fichier : test_formatters_html.py Projet : zevcc-gh/eli5

def test_render_weighted_spans_zero():
    weighted_spans = DocWeightedSpans(
        document='ab',
        spans=[('a', [(0, 1)], 0.0),
               ('b', [(1, 2)], 0.0)],
        preserve_density=False,
    )
    np_err = np.geterr()
    np.seterr(all='raise')
    try:
        s = _render_weighted_spans(weighted_spans)
    finally:
        np.seterr(**np_err)
    assert s == '<span style="opacity: 0.80">ab</span>'

Exemple #9

0

Afficher le fichier

def test_render_weighted_spans_char():
    weighted_spans = DocWeightedSpans(
        document='see',
        spans=[
            ('se', [(0, 2)], 0.2),
            ('ee', [(1, 3)], 0.1),
        ],
        preserve_density=True,
    )
    s = _render_weighted_spans(weighted_spans)
    assert s == (
        '<span'
        ' style="background-color: hsl(120, 100.00%, 69.88%); opacity: 0.93"'
        ' title="0.100">s</span>'
        '<span'
        ' style="background-color: hsl(120, 100.00%, 60.00%); opacity: 1.00"'
        ' title="0.150">e</span>'
        '<span'
        ' style="background-color: hsl(120, 100.00%, 81.46%); opacity: 0.87"'
        ' title="0.050">e</span>')

Exemple #10

0

Afficher le fichier

def test_weighted_spans_char():
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='char', ngram_range=(3, 4))
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(pos=[FW('see', 2),
                            FW('a le', 5),
                            FW('on ', 8)],
                       neg=[FW('lem', -6)]))
    assert w_spans == WeightedSpans([
        DocWeightedSpans(
            document='i see: a leaning lemon tree',
            spans=[('see', [(2, 5)], 2), ('lem', [(17, 20)], -6),
                   ('on ', [(20, 23)], 8), ('a le', [(7, 11)], 5)],
            preserve_density=True,
        )
    ],
                                    other=FeatureWeights(
                                        pos=[FW(hl_in_text, 9)],
                                        neg=[],
                                    ))

Exemple #11

0

Afficher le fichier

Fichier : text.py Projet : phillipparr/NER

def _get_doc_weighted_spans(
        doc,
        vec,
        feature_weights,  # type: FeatureWeights
        feature_fn=None  # type: Callable[[str], str]
):
    # type: (...) -> Optional[Tuple[FoundFeatures, DocWeightedSpans]]
    if isinstance(vec, InvertableHashingVectorizer):
        vec = vec.vec

    if hasattr(vec, 'get_doc_weighted_spans'):
        return vec.get_doc_weighted_spans(doc, feature_weights, feature_fn)

    if not isinstance(vec, VectorizerMixin):
        return None

    span_analyzer, preprocessed_doc = build_span_analyzer(doc, vec)
    if span_analyzer is None:
        return None

    feature_weights_dict = _get_feature_weights_dict(feature_weights,
                                                     feature_fn)
    spans = []
    found_features = {}
    for f_spans, feature in span_analyzer(preprocessed_doc):
        if feature not in feature_weights_dict:
            continue
        weight, key = feature_weights_dict[feature]
        spans.append((feature, f_spans, weight))
        # XXX: this assumes feature names are unique
        found_features[key] = weight

    return found_features, DocWeightedSpans(
        document=preprocessed_doc,
        spans=spans,
        preserve_density=vec.analyzer.startswith('char'),
    )

Exemple #12

0

Afficher le fichier

def test_weighted_spans_word_bigrams():
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='word', ngram_range=(1, 2))
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(
            pos=[FW('see', 2),
                 FW('leaning lemon', 5),
                 FW('lemon tree', 8)],
            neg=[FW('tree', -6)]))
    assert w_spans == WeightedSpans([
        DocWeightedSpans(
            document='i see: a leaning lemon tree',
            spans=[('see', [(2, 5)], 2), ('tree', [(23, 27)], -6),
                   ('leaning lemon', [(9, 16), (17, 22)], 5),
                   ('lemon tree', [(17, 22), (23, 27)], 8)],
            preserve_density=False,
        )
    ],
                                    other=FeatureWeights(
                                        pos=[FW(hl_in_text, 9)],
                                        neg=[],
                                    ))

Exemple #13

0

Afficher le fichier

Fichier : test_formatters_text_helpers.py Projet : zevcc-gh/eli5

def test_prepare_weighted_spans():
    targets = [
        TargetExplanation(target='one',
                          feature_weights=FeatureWeights(pos=[], neg=[]),
                          weighted_spans=WeightedSpans(docs_weighted_spans=[
                              DocWeightedSpans(
                                  document='ab',
                                  spans=[
                                      ('a', [(0, 1)], 1.5),
                                      ('b', [(1, 2)], 2.5),
                                  ],
                              ),
                              DocWeightedSpans(
                                  document='xy',
                                  spans=[
                                      ('xy', [(0, 2)], -4.5),
                                  ],
                              )
                          ])),
        TargetExplanation(
            target='two',
            feature_weights=FeatureWeights(pos=[], neg=[]),
            weighted_spans=WeightedSpans(
                docs_weighted_spans=[
                    DocWeightedSpans(
                        document='abc',
                        spans=[
                            ('a', [(0, 1)], 0.5),
                            ('c', [(2, 3)], 3.5),
                        ],
                    ),
                    DocWeightedSpans(
                        document='xz',
                        spans=[
                            # char_wb at the start of the document
                            (' xz', [(-1, 2)], 1.5),
                        ],
                    )
                ], )),
    ]
    assert prepare_weighted_spans(targets, preserve_density=False) == [
        [
            PreparedWeightedSpans(
                targets[0].weighted_spans.docs_weighted_spans[0],
                char_weights=np.array([1.5, 2.5]),
                weight_range=3.5),
            PreparedWeightedSpans(
                targets[0].weighted_spans.docs_weighted_spans[1],
                char_weights=np.array([-4.5, -4.5]),
                weight_range=4.5),
        ],
        [
            PreparedWeightedSpans(
                targets[1].weighted_spans.docs_weighted_spans[0],
                char_weights=np.array([0.5, 0, 3.5]),
                weight_range=3.5),
            PreparedWeightedSpans(
                targets[1].weighted_spans.docs_weighted_spans[1],
                char_weights=np.array([1.5, 1.5]),
                weight_range=4.5),
        ],
    ]