Example #1
0
def data_entities_to_pipelines(e: PhenotypeEntity, report_tags, all_terms,
                               owner, debug, cohorts):
    if e['named_arguments'] is None:
        e['named_arguments'] = dict()
    if 'value_sets' not in e['named_arguments']:
        e['named_arguments']['value_sets'] = []
    if 'termsets' not in e['named_arguments']:
        e['named_arguments']['termsets'] = []

    if e['library'] == "Clarity":
        # config_type, name, description, terms

        if 'documentsets' in e['named_arguments']:
            doc_sets = e['named_arguments']['documentsets']
        elif 'documentset' in e['named_arguments']:
            doc_sets = e['named_arguments']['documentset']
        else:
            doc_sets = list()

        tags = get_report_tags_by_keys(report_tags, doc_sets)
        if debug:
            limit = DEBUG_LIMIT
        else:
            limit = 0

        if 'termset' in e['named_arguments']:
            terms = e['named_arguments']["termset"]
        elif 'termsets' in e['named_arguments']:
            terms = e['named_arguments']["termsets"]
        else:
            terms = list()

        if 'cohort' in e['named_arguments']:
            cohort = get_cohort_items(e['named_arguments']['cohort'], cohorts)
        elif 'cohorts' in e['named_arguments']:
            cohort = get_cohort_items(e['named_arguments']['cohorts'], cohorts)
        else:
            cohort = list()

        pipeline = PipelineConfig(e['funct'],
                                  e['name'],
                                  get_terms_by_keys(
                                      all_terms, terms,
                                      e['named_arguments']['value_sets']),
                                  owner=owner,
                                  limit=limit,
                                  cohort=cohort,
                                  report_tags=tags,
                                  is_phenotype=True)
        map_arguments(pipeline, e)
        map_arguments(pipeline, e['named_arguments'])
        return pipeline
    else:
        raise ValueError("External pipelines not yet supported")
def map_arguments(pipeline: PipelineConfig, e):
    for k in e.keys():
        if not (
                k == 'owner' or k == 'limit' or k == 'owner' or k == "name" or k == "config_type" or k == "terms" or
                k == "cohort"):
            if k in pipeline_keys:
                try:
                    pipeline[k] = e[k]
                except Exception as ex:
                    traceback.print_exc(file=sys.stdout)
                    print(ex)
            else:
                try:
                    pipeline.custom_arguments[k] = e[k]
                except Exception as ex:
                    traceback.print_exc(file=sys.stdout)
                    print(ex)
def map_arguments(pipeline: PipelineConfig, e, all_terms):
    for k in e.keys():
        if k not in manually_mapped_keys:
            if k in pipeline_keys:
                try:
                    pipeline[k] = e[k]
                except Exception as ex:
                    traceback.print_exc(file=sys.stdout)
                    print(ex)
            else:
                try:
                    term_mappings = get_terms_by_keys(all_terms, e[k], list())
                    if len(term_mappings) > 0:
                        val = term_mappings
                    else:
                        val = e[k]
                    pipeline.custom_arguments[k] = val
                except Exception as ex:
                    traceback.print_exc(file=sys.stdout)
                    print(ex)
Example #4
0
def map_arguments(pipeline: PipelineConfig, e, all_terms):
    for k in e.keys():
        if not (
                k == 'owner' or k == 'limit' or k == "name" or k == "config_type" or k == "terms" or
                k == "cohort" or k == "job_results" or k =="concept_code" or k == "concept_code_system"):
            if k in pipeline_keys:
                try:
                    pipeline[k] = e[k]
                except Exception as ex:
                    traceback.print_exc(file=sys.stdout)
                    print(ex)
            else:
                try:
                    term_mappings = get_terms_by_keys(all_terms, e[k], list())
                    if len(term_mappings) > 0:
                        val = term_mappings
                    else:
                        val = e[k]
                    pipeline.custom_arguments[k] = val
                except Exception as ex:
                    traceback.print_exc(file=sys.stdout)
                    print(ex)
def map_arguments(pipeline: PipelineConfig, e, all_terms):
    for k in e.keys():
        if not (
                k == 'owner' or k == 'limit' or k == 'owner' or k == "name" or k == "config_type" or k == "terms" or
                k == "cohort" or k == "job_results"):
            if k in pipeline_keys:
                try:
                    pipeline[k] = e[k]
                except Exception as ex:
                    traceback.print_exc(file=sys.stdout)
                    print(ex)
            else:
                try:
                    term_mappings = get_terms_by_keys(all_terms, e[k], list())
                    if len(term_mappings) > 0:
                        val = term_mappings
                    else:
                        val = e[k]
                    pipeline.custom_arguments[k] = val
                except Exception as ex:
                    traceback.print_exc(file=sys.stdout)
                    print(ex)
def data_entities_to_pipelines(e: PhenotypeEntity,
                               report_tags,
                               all_terms,
                               owner,
                               debug,
                               cohorts,
                               phenotype_limit=0,
                               report_types: dict = None,
                               custom_query: dict = None,
                               filter_query: dict = None,
                               source: dict = None,
                               job_results: dict = None):
    if report_types is None:
        report_types = dict()
    if custom_query is None:
        custom_query = dict()
    if filter_query is None:
        filter_query = dict()
    if source is None:
        source = dict()
    if job_results is None:
        job_results = dict()

    if e['named_arguments'] is None:
        e['named_arguments'] = dict()
    if 'value_sets' not in e['named_arguments']:
        e['named_arguments']['value_sets'] = []
    if 'termsets' not in e['named_arguments']:
        e['named_arguments']['termsets'] = []

    if e['library'] == "Clarity" or e['library'] == 'ClarityNLP':
        # config_type, name, description, terms

        if 'documentsets' in e['named_arguments']:
            doc_sets = e['named_arguments']['documentsets']
        elif 'documentset' in e['named_arguments']:
            doc_sets = e['named_arguments']['documentset']
        else:
            doc_sets = list()

        if phenotype_limit > 0:
            limit = phenotype_limit
        elif debug:
            limit = DEBUG_LIMIT
        else:
            limit = 0

        if 'termset' in e['named_arguments']:
            terms = e['named_arguments']["termset"]
        elif 'termsets' in e['named_arguments']:
            terms = e['named_arguments']["termsets"]
        else:
            terms = list()

        if 'code' in e['named_arguments']:
            code = e['named_arguments']["code"]
        elif 'concept_code' in e['named_arguments']:
            code = e['named_arguments']["concept_code"]
        else:
            code = ''

        if 'code_system' in e['named_arguments']:
            code_system = e['named_arguments']["code_system"]
        elif 'concept_code_system' in e['named_arguments']:
            code_system = e['named_arguments']["concept_code_system"]
        elif 'codesystem' in e['named_arguments']:
            code_system = e['named_arguments']["codesystem"]
        else:
            code_system = ''

        if 'cohort' in e['named_arguments']:
            cohort, job_results_filter = get_cohort_items(
                e['named_arguments']['cohort'], cohorts, job_results)
        elif 'cohorts' in e['named_arguments']:
            cohort, job_results_filter = get_cohort_items(
                e['named_arguments']['cohorts'], cohorts, job_results)
        else:
            cohort, job_results_filter = list(), dict()

        tags = get_item_list_by_key(report_tags, doc_sets)
        types = get_item_list_by_key(report_types, doc_sets)
        query = get_item_by_key(custom_query, doc_sets)
        fq = get_item_by_key(filter_query, doc_sets)
        sources = get_item_list_by_key(source, doc_sets)

        pipeline = PipelineConfig(e['funct'],
                                  e['name'],
                                  get_terms_by_keys(
                                      all_terms, terms,
                                      e['named_arguments']['value_sets']),
                                  owner=owner,
                                  limit=limit,
                                  cohort=cohort,
                                  job_results=job_results_filter,
                                  report_tags=tags,
                                  report_types=types,
                                  sources=sources,
                                  custom_query=query,
                                  filter_query=fq,
                                  concept_code=code,
                                  concept_code_system=code_system,
                                  is_phenotype=True)
        map_arguments(pipeline, e, all_terms)
        map_arguments(pipeline, e['named_arguments'], all_terms)
        return pipeline
    else:
        raise ValueError("External pipelines not yet supported")
import pandas as pd
import util
from pymongo import MongoClient

from data_access import PhenotypeModel, PipelineConfig, PhenotypeEntity, PhenotypeOperations, results
from data_access import expr_eval, expr_result
from ohdsi import getCohort

#import json
#from bson import json_util, ObjectId

DEBUG_LIMIT = 1000
COL_LIST = ["_id", "report_date", 'report_id', 'subject', 'sentence']

pipeline_keys = PipelineConfig('test', 'test', 'test').__dict__.keys()
numeric_comp_operators = ['==', '=', '>', '<', '<=', '>=']


def get_terms(model: PhenotypeModel):
    terms = dict()
    if model:
        if model.term_sets and len(model.term_sets) > 0:
            for t in model.term_sets:
                terms[t['name']] = t['values']
        # TODO expand concept_sets

    return terms


def get_terms_by_keys(term_dict, term_keys: list, concept_keys: list):