def test_section_data_ent(self):
     consumer = DocConsumer(nlp)
     doc = consumer(section_doc)
     data = doc._.get_data("ent")
     ent = doc.ents[0]
     assert data["section_category"][0] == ent._.section_category
     assert data["section_parent"][0] == ent._.section_parent
def write_nlp_db(nlp, df, filepath="./nlp.db"):
    from datetime import datetime
    start = datetime.now()
    texts = df["text"] # Process all of the texts
    print("Processing {} texts".format(len(texts)))
    docs = list(nlp.pipe(texts))

    print("Processed {} docs in {} seconds".format(len(docs), datetime.now() - start))
    
    from medspacy.io import DocConsumer, DbReader, DbWriter, Pipeline, DbConnect
    import sqlite3
    conn = sqlite3.connect(filepath)
    doc_consumer = DocConsumer(nlp)
    
    for doc in docs:
        doc_consumer(doc)
        
    db_conn = DbConnect(conn=conn)
    writer = DbWriter(db_conn, "ents", create_table=True, drop_existing=True)
    for doc in docs:
        writer.write(doc)
        
    conn.close()
    print()
    print("Saved output from {} docs to '{}'".format(len(docs), filepath))
 def test_default_cols(self):
     consumer = DocConsumer(nlp)
     doc = consumer(simple_doc)
     data = doc._.get_data("ent")
     assert data is not None
     assert set(data.keys()) == set(consumer.dtype_attrs["ent"])
     assert set(data.keys()) == set(DEFAULT_ENT_ATTRS)
 def test_get_default_attrs(self):
     attrs = DocConsumer.get_default_attrs()
     assert set(attrs.keys()) == {"ent", "context", "section", "doc"}
     assert set(attrs["ent"]) == set(DEFAULT_ENT_ATTRS)
     assert set(attrs["section"]) == set(ALLOWED_SECTION_ATTRS)
     assert set(attrs["context"]) == set(ALLOWED_CONTEXT_ATTRS)
     assert set(attrs["doc"]) == set(DEFAULT_DOC_ATTRS)
 def test_default_data(self):
     consumer = DocConsumer(nlp)
     doc = consumer(simple_doc)
     data = doc._.get_data("ent")
     ent = doc.ents[0]
     assert data["text"][0] == ent.text
     assert data["label_"][0] == ent.label_
     assert data["start_char"][0] == ent.start_char
     assert data["end_char"][0] == ent.end_char
 def test_context_data(self):
     consumer = DocConsumer(nlp)
     doc = consumer(context_doc)
     data = doc._.get_data("ent")
     ent = doc.ents[0]
     assert data["is_family"][0] == ent._.is_family
     assert data["is_hypothetical"][0] == ent._.is_hypothetical
     assert data["is_historical"][0] == ent._.is_historical
     assert data["is_uncertain"][0] == ent._.is_uncertain
     assert data["is_negated"][0] == ent._.is_negated
 def test_ten_concepts(self):
     consumer = DocConsumer(nlp, dtypes=("ent", ))
     docs = [consumer(d) for d in many_concept_docs]
     for doc in docs:
         print(doc)
         num_concepts = len(doc.ents)
         data = doc._.get_data("ent")
         for key in data.keys():
             print(key)
             print(num_concepts)
             print(data[key])
             assert num_concepts == len(data[key])
 def test_section_data_section(self):
     consumer = DocConsumer(nlp, dtypes=("section",))
     doc = consumer(section_doc)
     data = doc._.get_data("section")
     section = doc._.sections[0]
     assert data["section_category"][0] == section.category
     assert data["section_title_text"][0] == section.title_span.text
     assert data["section_title_start_char"][0] == section.title_span.start_char
     assert data["section_title_end_char"][0] == section.title_span.end_char
     assert data["section_text"][0] == section.section_span.text
     assert data["section_text_start_char"][0] == section.section_span.start_char
     assert data["section_text_end_char"][0] == section.section_span.end_char
     assert data["section_parent"][0] == section.parent
Exemple #9
0
from medspacy.io.db_connect import DbConnect
import sqlite3

import medspacy
from medspacy.target_matcher import TargetRule
from medspacy.io import DocConsumer

tmpdirname = tempfile.TemporaryDirectory()
db = os.path.join(tmpdirname.name, "test")

nlp = medspacy.load(enable=["sentencizer", "target_matcher", "context", "sectionizer"])
nlp.get_pipe("target_matcher").add(TargetRule("pneumonia", "CONDITION"))
doc = nlp("There is no evidence of pneumonia.")

doc_consumer = DocConsumer(nlp)
doc_consumer(doc)



class TestDbWriter:

    def test_init_from_sqlite3_conn_defaults(self):
        """Test writing with default values for ent attributes."""
        sq_conn = sqlite3.connect(db)
        cursor = sq_conn.cursor()
        db_conn = DbConnect(conn=sq_conn)
        from medspacy.io.db_writer import DbWriter
        writer = DbWriter(db_conn, "ents", cols=None, col_types=None,
                          create_table=True, drop_existing=False)
        writer.write(doc)
 def test_all_dtypes(self):
     consumer = DocConsumer(nlp, dtypes="all")
     assert consumer.dtypes == ALLOWED_DATA_TYPES
 def test_section_cols(self):
     consumer = DocConsumer(nlp, dtypes=("section", ))
     doc = consumer(context_doc)
     data = doc._.get_data("section")
     assert data is not None
     assert set(data.keys()) == set(ALLOWED_SECTION_ATTRS)
 def test_context_cols(self):
     consumer = DocConsumer(nlp, dtypes=("context", ))
     doc = consumer(context_doc)
     data = doc._.get_data("context")
     assert data is not None
     assert set(data.keys()) == set(ALLOWED_CONTEXT_ATTRS)
 def test_init_context(self):
     doc_consumer = DocConsumer(nlp, dtypes=("context", ))
     assert doc_consumer.dtypes == ("context", )
 def test_init_default(self):
     doc_consumer = DocConsumer(nlp)
     assert DocConsumer(nlp)
     assert doc_consumer.dtypes == ("ent", )
 def test_get_data_attrs_not_none(self):
     consumer = DocConsumer(nlp)
     doc = consumer(simple_doc)
     data = doc._.get_data("ent", attrs=["label_", "is_negated"])
     assert set(data.keys()) == {"label_", "is_negated"}
from medspacy.io import DocConsumer

tmpdirname = tempfile.TemporaryDirectory()
db = os.path.join(tmpdirname.name, "test.db")

# Set up a simple pipeline which will allow us to write results
nlp = medspacy.load(
    enable=["pyrush", "target_matcher", "context", "sectionizer"])
nlp.get_pipe("medspacy_target_matcher").add([
    TargetRule("pneumonia", "CONDITION"),
    TargetRule("breast ca", "CONDITION")
])
doc = nlp("There is no evidence of pneumonia.")

doc_consumer = DocConsumer(
    nlp,
    dtype_attrs={"ent": ["text", "label_", "is_negated", "section_category"]})
nlp.add_pipe("medspacy_doc_consumer",
             config={
                 "dtype_attrs": {
                     "ent":
                     ["text", "label_", "is_negated", "section_category"]
                 }
             })

db_dtypes = [
    "varchar(100)",
    "varchar(100)",
    "int",
    "varchar(100)",
]