コード例 #1
0
ファイル: ner_demo.py プロジェクト: sean-dingxu/sciwing
st.markdown("---")
text_citation = st.text_input(label="Enter a citation string", value=citation_selected)
parse_citation_clicked = st.button("Parse Citation")

if parse_citation_clicked:
    citation_selected = text_citation


response = requests.get(f"http://localhost:8000/parscit/{citation_selected}")
json = response.json()
text = json["text_tokens"]
tags = json["tags"].split()

# tokenize the text using white space
tokenizer = WordTokenizer(tokenizer="spacy-whitespace")
doc = tokenizer.nlp(" ".join(text))

# start index of every token
token_indices = [token.idx for token in doc]

# get start end index of every word
start_end_indices = itertools.zip_longest(
    token_indices, token_indices[1:], fillvalue=len(" ".join(text))
)
start_end_indices = list(start_end_indices)


HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
ents = []
for tag, (start_idx, end_idx) in zip(tags, start_end_indices):
    ents.append({"start": start_idx, "end": end_idx, "label": tag})
コード例 #2
0
ファイル: pipeline_demo.py プロジェクト: yyht/sciwing
            f"{header} ({normalized})" for header, normalized in zip(
                section_headers, normalized_section_headers)
        ]

        st.write("### Sections (Normalized Sections)")
        st.write(
            HTML_WRAPPER.format("<br />".join(header_normalized_header)),
            unsafe_allow_html=True,
        )

        st.write("### Parsed References. ")

        for reference, tags in zip(references, parsed_reference_strings):
            HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
            tokenizer = WordTokenizer(tokenizer="spacy-whitespace")
            doc = tokenizer.nlp(reference)

            # start index of every token
            token_indices = [token.idx for token in doc]

            # get start end index of every word
            start_end_indices = itertools.zip_longest(token_indices,
                                                      token_indices[1:],
                                                      fillvalue=len(reference))
            start_end_indices = list(start_end_indices)

            ents = []
            for tag, (start_idx, end_idx) in zip(tags.split(),
                                                 start_end_indices):
                ents.append({"start": start_idx, "end": end_idx, "label": tag})