Ejemplo n.º 1
0
def mongo_writer(client, pipeline, job, batch, pipeline_config: config.PipelineConfig, val, doc, type):
    db = client[util.mongo_db]

    obj = {
        "pipeline_type": type,
        "pipeline_id": pipeline,
        "job_id": job,
        "batch": batch,
        "owner": pipeline_config.owner,
        "sentence": val.sentence,
        "report_type": doc["report_type"],
        "nlpql_feature": pipeline_config.name,
        "inserted_date": datetime.datetime.now(),
        "report_id": doc["report_id"],
        "subject": doc["subject"],
        "report_date": doc["report_date"],
        "section": "",
        "concept_code": pipeline_config.concept_code,
        "term": val.text,
        "text": val.text,
        "start": val.start,
        "end": val.end,
        "label": val.label,
        "description": val.description,
        "phenotype_final": False
    }

    inserted = config.insert_pipeline_results(pipeline_config, db, obj)

    return inserted
Ejemplo n.º 2
0
def mongo_writer(client, pipeline, job, batch, pipeline_config, term, doc,
                 type):
    db = client[util.mongo_db]

    obj = {
        "pipeline_type": type,
        "pipeline_id": pipeline,
        "job_id": job,
        "batch": batch,
        "owner": pipeline_config.owner,
        "sentence": term.sentence,
        "report_type": doc["report_type"],
        "nlpql_feature": pipeline_config.name,
        "inserted_date": datetime.datetime.now(),
        "report_id": doc["report_id"],
        "subject": doc["subject"],
        "report_date": doc["report_date"],
        "section": term.section,
        "term": term.term,
        "start": term.start,
        "end": term.end,
        "concept_code": pipeline_config.concept_code,
        "negation": term.negex,
        "temporality": term.temporality,
        "experiencer": term.experiencer,
        "phenotype_final": False
    }

    inserted = config.insert_pipeline_results(pipeline_config, db, obj)

    return inserted
Ejemplo n.º 3
0
def mongo_writer(client, pipeline, job, batch, pipeline_config, val, doc, p_type):
    db = client[util.mongo_db]

    obj = {
        "pipeline_type": p_type,
        "pipeline_id": pipeline,
        "job_id": job,
        "batch": batch,
        "owner": pipeline_config.owner,
        "sentence": val.sentence,
        "report_type": doc["report_type"],
        "nlpql_feature": pipeline_config.name,
        "inserted_date": datetime.datetime.now(),
        "report_id": doc["report_id"],
        "subject": doc["subject"],
        "report_date": doc["report_date"],
        "section": "",
        "concept_code": pipeline_config.concept_code,
        "term": val.text,
        "text": val.text,
        "lemma": val.lemma,
        "pos": val.pos,
        "tag": val.tag,
        "dep": val.dep,
        "shape": val.shape,
        "is_alpha": val.is_alpha,
        "is_stop": val.is_stop,
        "description": val.description,
        "phenotype_final": False
    }

    inserted = config.insert_pipeline_results(pipeline_config, db, obj)

    return inserted
Ejemplo n.º 4
0
def pipeline_mongo_writer(client,
                          pipeline_id,
                          pipeline_type,
                          job,
                          batch,
                          p_config: pipeline_config.PipelineConfig,
                          doc,
                          data_fields: dict,
                          prefix: str = '',
                          phenotype_final: bool = False):
    db = client[util.mongo_db]

    if not data_fields:
        print('must have additional data fields')
        return None

    if not p_config:
        print('must have pipeline config')
        return None

    data_fields["pipeline_type"] = pipeline_type
    data_fields["pipeline_id"] = pipeline_id
    data_fields["job_id"] = job
    data_fields["batch"] = batch
    data_fields["owner"] = p_config.owner
    data_fields["nlpql_feature"] = (prefix + p_config.name)
    data_fields["inserted_date"] = datetime.datetime.now()
    data_fields["concept_code"] = p_config.concept_code
    data_fields["concept_code_system"] = p_config.concept_code_system
    data_fields["phenotype_final"] = (phenotype_final or p_config.final)

    if doc:
        data_fields["report_id"] = doc[util.solr_report_id_field]
        data_fields["subject"] = doc[util.solr_subject_field]
        data_fields["report_date"] = doc[util.solr_report_date_field]
        data_fields["report_type"] = doc[util.solr_report_type_field]
        data_fields["source"] = doc[util.solr_source_field]
        data_fields["solr_id"] = doc[util.solr_id_field]
    else:
        for df in doc_fields:
            if df not in data_fields:
                data_fields[df] = ''

    inserted = config.insert_pipeline_results(p_config, db, data_fields)

    return inserted
Ejemplo n.º 5
0
def mongo_writer(client, pipeline, job, batch,
                 pipeline_config: config.PipelineConfig, meas: Measurement,
                 doc, type):
    db = client[util.mongo_db]
    value = meas['X']

    obj = {
        "pipeline_type": type,
        "pipeline_id": pipeline,
        "job_id": job,
        "batch": batch,
        "owner": pipeline_config.owner,
        "sentence": meas.sentence,
        "report_type": doc["report_type"],
        "nlpql_feature": pipeline_config.name,
        "inserted_date": datetime.datetime.now(),
        "report_id": doc["report_id"],
        "subject": doc["subject"],
        "report_date": doc["report_date"],
        "section": "",
        "concept_code": pipeline_config.concept_code,
        "text": meas.text,
        "start": meas.start,
        "value": value,
        "end": meas.end,
        "term": meas.subject,
        "dimension_X": meas.X,
        "dimension_Y": meas.Y,
        "dimension_Z": meas.Z,
        "units": meas.units,
        "location": meas.location,
        "condition": meas.condition,
        "value1": meas.value1,
        "value2": meas.value2,
        "temporality": meas.temporality,
        "phenotype_final": False
    }

    inserted = config.insert_pipeline_results(pipeline_config, db, obj)

    return inserted
Ejemplo n.º 6
0
def pipeline_mongo_writer(client, pipeline_id, pipeline_type, job, batch,
                          p_config: pipeline_config.PipelineConfig, doc,
                          data_fields: dict):
    db = client[util.mongo_db]

    data_fields["pipeline_type"] = pipeline_type
    data_fields["pipeline_id"] = pipeline_id
    data_fields["job_id"] = job
    data_fields["batch"] = batch
    data_fields["owner"] = p_config.owner
    data_fields["nlpql_feature"] = p_config.name
    data_fields["inserted_date"] = datetime.datetime.now()
    data_fields["report_id"] = doc[util.solr_report_id_field]
    data_fields["subject"] = doc[util.solr_subject_field]
    data_fields["report_date"] = doc[util.solr_report_date_field]
    data_fields["concept_code"] = p_config.concept_code
    data_fields["phenotype_final"] = False

    inserted = config.insert_pipeline_results(p_config, db, data_fields)

    return inserted
Ejemplo n.º 7
0
def pipeline_mongo_writer(client,
                          pipeline_id,
                          pipeline_type,
                          job,
                          batch,
                          p_config: pipeline_config.PipelineConfig,
                          doc,
                          data_fields: dict,
                          prefix: str = '',
                          phenotype_final: bool = False):
    db = client[util.mongo_db]

    if not data_fields:
        log('must have additional data fields', ERROR)
        return None

    if not p_config:
        log('must have pipeline config', ERROR)
        return None

    # log('writing results...', DEBUG)
    data_fields["pipeline_type"] = pipeline_type
    data_fields["pipeline_id"] = int(pipeline_id)
    data_fields["job_id"] = int(job)
    data_fields["batch"] = batch
    data_fields["owner"] = p_config.owner
    data_fields["nlpql_feature"] = (prefix + p_config.name)
    data_fields["inserted_date"] = datetime.datetime.now()
    data_fields["concept_code"] = p_config.concept_code
    data_fields["concept_code_system"] = p_config.concept_code_system
    data_fields["phenotype_final"] = (phenotype_final or p_config.final)
    data_fields["display_name"] = p_config.display_name

    if doc:
        data_fields["report_id"] = doc[util.solr_report_id_field]
        data_fields["subject"] = doc[util.solr_subject_field]
        data_fields["report_date"] = doc[util.solr_report_date_field]
        data_fields["report_type"] = doc[util.solr_report_type_field]
        data_fields["source"] = doc[util.solr_source_field]
        data_fields["solr_id"] = doc[util.solr_id_field]
    else:
        for df in doc_fields:
            if df not in data_fields:
                data_fields[df] = ''

    highlight_fields = [
        'term', 'text', 'value', 'units', 'word', 'highlight', 'highlights'
    ]
    if "result_display" not in data_fields:
        s = data_fields.get('start')
        e = data_fields.get('end')
        if not s:
            s = 0
        if not e:
            e = 0

        highlights = []
        for h in highlight_fields:
            txt = data_fields.get(h, '')
            if len(txt) > 0:
                highlights.append(txt)
                break
        data_fields["result_display"] = {
            "date": data_fields.get('report_date'),
            "result_content": data_fields.get('sentence'),
            "highlights": highlights,
            "sentence": data_fields.get('sentence'),
            'start': [s],
            'end': [e]
        }
    else:
        display = data_fields.get('result_display')
        highlights = display.get("highlights", list())
        if len(highlights) == 0:
            highlights = []
            for h in highlight_fields:
                txt = data_fields.get(h, '')
                if len(txt) > 0:
                    highlights.append(txt)
                    break
            data_fields['result_display']['highlights'] = highlights

    inserted = config.insert_pipeline_results(p_config, db, data_fields)
    log(
        '(job={}; pipeline={}) inserted into mongodb {}'.format(
            job, pipeline_id, repr(inserted.inserted_id)), DEBUG)

    return inserted
Ejemplo n.º 8
0
def pipeline_mongo_writer(client,
                          pipeline_id,
                          pipeline_type,
                          job,
                          batch,
                          p_config: pipeline_config.PipelineConfig,
                          doc,
                          data_fields: dict,
                          prefix: str = '',
                          phenotype_final: bool = False):
    db = client[util.mongo_db]

    if not data_fields:
        print('must have additional data fields')
        return None

    if not p_config:
        print('must have pipeline config')
        return None

    data_fields["pipeline_type"] = pipeline_type
    data_fields["pipeline_id"] = pipeline_id
    data_fields["job_id"] = job
    data_fields["batch"] = batch
    data_fields["owner"] = p_config.owner
    data_fields["nlpql_feature"] = (prefix + p_config.name)
    data_fields["inserted_date"] = datetime.datetime.now()
    data_fields["concept_code"] = p_config.concept_code
    data_fields["concept_code_system"] = p_config.concept_code_system
    data_fields["phenotype_final"] = (phenotype_final or p_config.final)
    data_fields["display_name"] = p_config.display_name

    if doc:
        data_fields["report_id"] = doc[util.solr_report_id_field]
        data_fields["subject"] = doc[util.solr_subject_field]
        data_fields["report_date"] = doc[util.solr_report_date_field]
        data_fields["report_type"] = doc[util.solr_report_type_field]
        data_fields["source"] = doc[util.solr_source_field]
        data_fields["solr_id"] = doc[util.solr_id_field]
    else:
        for df in doc_fields:
            if df not in data_fields:
                data_fields[df] = ''

    if "result_display" not in data_fields:
        s = data_fields.get('start')
        e = data_fields.get('end')
        if not s:
            s = 0
        if not e:
            e = 0

        highlights = []
        txt = data_fields.get('text')
        if txt:
            highlights = [txt]
        data_fields["result_display"] = {
            "date": data_fields.get('report_date'),
            "result_content": data_fields.get('sentence'),
            "highlights": highlights,
            "sentence": data_fields.get('sentence'),
            'start': [s],
            'end': [e]
        }

    inserted = config.insert_pipeline_results(p_config, db, data_fields)

    return inserted