Example #1
0
def test_issue3882(en_vocab):
    """Test that displaCy doesn't serialize the doc.user_data when making a
    copy of the Doc.
    """
    doc = Doc(en_vocab, words=["Hello", "world"], deps=["dep", "dep"])
    doc.user_data["test"] = set()
    displacy.parse_deps(doc)
Example #2
0
def visualize_parser(docs: List[spacy.tokens.Doc],
                     *,
                     title: Optional[str] = None,
                     key: Optional[str] = None) -> None:
    st.header(title)
    cols = st.beta_columns(2)
    num_parses = cols[1].select_slider('Number of Sentences to Visualise:',
                                       options=[0, 1, 2, 3, 4],
                                       value=1)
    vismode = cols[0].radio('Which Dependencies to Show',
                            ('All', 'Collocation Candidates'))
    if num_parses >= 1:
        for num, sent in enumerate(docs):
            if num < num_parses:
                allparsed = displacy.parse_deps(sent)
                colparsed = my_parser(sent, num)
                html = displacy.render(
                    (allparsed if vismode == 'All' else colparsed),
                    style="dep",
                    manual=True)
                # Double newlines seem to mess with the rendering
                html = html.replace("\n\n", "\n")
                if len(docs) > 1:
                    st.markdown(f"> {sent.text}")
                st.write(get_svg(html), unsafe_allow_html=True)
Example #3
0
def recognize_named_tag(event, context):

    request_body = event['body']
    text = json.loads(request_body)['text']
    print('received test from http post: ', text)

    if text is not None:
        doc = MODEL(text)
        parse = displacy.parse_deps(doc)

    setting = {}
    setting['lang'] = 'en'
    setting['direction'] = 'ltr'

    parse['setting'] = setting

    print('parse after create: ', parse)
    body = parse

    response = {
        "statusCode": 200,
        "body": json.dumps(body),
        "headers": {
            'Content-Type': 'application/json',
            'Access-Control-Allow-Origin': '*'
        }
    }

    return response
Example #4
0
def test_displacy_parse_deps(en_vocab):
    """Test that deps and tags on a Doc are converted into displaCy's format."""
    words = ["This", "is", "a", "sentence"]
    heads = [1, 1, 3, 1]
    pos = ["DET", "VERB", "DET", "NOUN"]
    tags = ["DT", "VBZ", "DT", "NN"]
    deps = ["nsubj", "ROOT", "det", "attr"]
    doc = Doc(en_vocab,
              words=words,
              heads=heads,
              pos=pos,
              tags=tags,
              deps=deps)
    deps = displacy.parse_deps(doc)
    assert isinstance(deps, dict)
    assert deps["words"] == [
        {
            "lemma": None,
            "text": words[0],
            "tag": pos[0]
        },
        {
            "lemma": None,
            "text": words[1],
            "tag": pos[1]
        },
        {
            "lemma": None,
            "text": words[2],
            "tag": pos[2]
        },
        {
            "lemma": None,
            "text": words[3],
            "tag": pos[3]
        },
    ]
    assert deps["arcs"] == [
        {
            "start": 0,
            "end": 1,
            "label": "nsubj",
            "dir": "left"
        },
        {
            "start": 2,
            "end": 3,
            "label": "det",
            "dir": "left"
        },
        {
            "start": 1,
            "end": 3,
            "label": "attr",
            "dir": "right"
        },
    ]
Example #5
0
def test_displacy_parse_deps(en_vocab):
    """Test that deps and tags on a Doc are converted into displaCy's format."""
    words = ["This", "is", "a", "sentence"]
    heads = [1, 0, 1, -2]
    pos = ["DET", "VERB", "DET", "NOUN"]
    tags = ["DT", "VBZ", "DT", "NN"]
    deps = ["nsubj", "ROOT", "det", "attr"]
    doc = get_doc(en_vocab, words=words, heads=heads, pos=pos, tags=tags, deps=deps)
    deps = displacy.parse_deps(doc)
    assert isinstance(deps, dict)
    assert deps["words"] == [
        {"text": "This", "tag": "DET"},
        {"text": "is", "tag": "VERB"},
        {"text": "a", "tag": "DET"},
        {"text": "sentence", "tag": "NOUN"},
    ]
    assert deps["arcs"] == [
        {"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
        {"start": 2, "end": 3, "label": "det", "dir": "left"},
        {"start": 1, "end": 3, "label": "attr", "dir": "right"},
    ]
Example #6
0
def get_spacy_parse(contexts):
    all_parses = []
    for context in contexts:
        tokens = context["tokens"]
        for index in range(len(tokens)):
            tokens[index]["extract"] = context["extract_mask"][index]
            tokens[index]["weight"] = context["weight_mask"][index]

        sentences = get_sentence_tokens_from_token_list(tokens)
        for sentence_tokens in sentences:
            sentence_text = get_sentence_text_from_tokens(sentence_tokens)
            parse = en_nlp(sentence_text)
            for index, token in enumerate(parse):
                # assuming same token index, as we'd already parsed it with the same spacy model before
                if token.text.strip().lower(
                ) != sentence_tokens[index]["text"].strip().lower():
                    print(token.text.strip().lower(), "!=",
                          sentence_tokens[index]["text"].strip().lower())
                    print(parse[max(0, index -
                                    7):min(len(parse) - 1, index + 7)])
                    print(" ".join([
                        x["text"] for x in
                        sentence_tokens[max(0, index -
                                            7):min(len(parse) - 1, index + 7)]
                    ]))
                    break
                token._.extract = sentence_tokens[index]["extract"]
                token._.weight = sentence_tokens[index]["weight"]
                token._.dist_cit = sentence_tokens[index].get("dist_cit", 0)
                # token._.dist_cit_norm = sentence_tokens[index][0].get("dist_cit_norm", 0)

            display_dict = displacy.parse_deps(parse)

            # for index, word in enumerate(display_dict["words"]):
            #     if word["text"] != parse[index].text:
            #         print("ARGh")
            all_parses.append(parse)

    return all_parses
Example #7
0
def create_dep(text):
    doc = MODEL(text)

    return displacy.parse_deps(doc)
Example #8
0
import spacy
import os
from spacy import displacy
import json

nlp = spacy.load("en_core_web_sm")

cwd = os.getcwd()
path = cwd + "/py/uploads/display-upload.txt"

#print(path)

with open(path, 'r') as f:
    text = f.read()

doc = nlp(text)

sentence_spans = list(doc.sents)

parse_dict = displacy.parse_deps(doc)

parsed_json = json.dumps(parse_dict)

print(parsed_json)
Example #9
0
def visualize_text(text):
    language = settings.LANG_ID.classify(text)[0]
    lang = settings.LANGUAGE_MODELS[language]
    doc = lang(text)
    return displacy.parse_deps(doc)
def tag_and_parse(text):
    doc = MODEL(text)
    return displacy.parse_deps(doc)
Example #11
0
def displacy_service(text):
    doc = nlp(text)
    return displacy.parse_deps(doc)
Example #12
0
def displacy_service(text):
    """Deploys a displaCy server in localhost"""
    doc = nlp(text)
    return displacy.parse_deps(doc)
Example #13
0
 def displacyService(self, text):
     'Returns a graph object'
     doc = self.nlp(text)
     return displacy.parse_deps(doc)