def data_entities_to_pipelines(e: PhenotypeEntity, report_tags, all_terms, owner, debug, cohorts): if e['named_arguments'] is None: e['named_arguments'] = dict() if 'value_sets' not in e['named_arguments']: e['named_arguments']['value_sets'] = [] if 'termsets' not in e['named_arguments']: e['named_arguments']['termsets'] = [] if e['library'] == "Clarity": # config_type, name, description, terms if 'documentsets' in e['named_arguments']: doc_sets = e['named_arguments']['documentsets'] elif 'documentset' in e['named_arguments']: doc_sets = e['named_arguments']['documentset'] else: doc_sets = list() tags = get_report_tags_by_keys(report_tags, doc_sets) if debug: limit = DEBUG_LIMIT else: limit = 0 if 'termset' in e['named_arguments']: terms = e['named_arguments']["termset"] elif 'termsets' in e['named_arguments']: terms = e['named_arguments']["termsets"] else: terms = list() if 'cohort' in e['named_arguments']: cohort = get_cohort_items(e['named_arguments']['cohort'], cohorts) elif 'cohorts' in e['named_arguments']: cohort = get_cohort_items(e['named_arguments']['cohorts'], cohorts) else: cohort = list() pipeline = PipelineConfig(e['funct'], e['name'], get_terms_by_keys( all_terms, terms, e['named_arguments']['value_sets']), owner=owner, limit=limit, cohort=cohort, report_tags=tags, is_phenotype=True) map_arguments(pipeline, e) map_arguments(pipeline, e['named_arguments']) return pipeline else: raise ValueError("External pipelines not yet supported")
def map_arguments(pipeline: PipelineConfig, e): for k in e.keys(): if not ( k == 'owner' or k == 'limit' or k == 'owner' or k == "name" or k == "config_type" or k == "terms" or k == "cohort"): if k in pipeline_keys: try: pipeline[k] = e[k] except Exception as ex: traceback.print_exc(file=sys.stdout) print(ex) else: try: pipeline.custom_arguments[k] = e[k] except Exception as ex: traceback.print_exc(file=sys.stdout) print(ex)
def map_arguments(pipeline: PipelineConfig, e, all_terms): for k in e.keys(): if k not in manually_mapped_keys: if k in pipeline_keys: try: pipeline[k] = e[k] except Exception as ex: traceback.print_exc(file=sys.stdout) print(ex) else: try: term_mappings = get_terms_by_keys(all_terms, e[k], list()) if len(term_mappings) > 0: val = term_mappings else: val = e[k] pipeline.custom_arguments[k] = val except Exception as ex: traceback.print_exc(file=sys.stdout) print(ex)
def map_arguments(pipeline: PipelineConfig, e, all_terms): for k in e.keys(): if not ( k == 'owner' or k == 'limit' or k == "name" or k == "config_type" or k == "terms" or k == "cohort" or k == "job_results" or k =="concept_code" or k == "concept_code_system"): if k in pipeline_keys: try: pipeline[k] = e[k] except Exception as ex: traceback.print_exc(file=sys.stdout) print(ex) else: try: term_mappings = get_terms_by_keys(all_terms, e[k], list()) if len(term_mappings) > 0: val = term_mappings else: val = e[k] pipeline.custom_arguments[k] = val except Exception as ex: traceback.print_exc(file=sys.stdout) print(ex)
def map_arguments(pipeline: PipelineConfig, e, all_terms): for k in e.keys(): if not ( k == 'owner' or k == 'limit' or k == 'owner' or k == "name" or k == "config_type" or k == "terms" or k == "cohort" or k == "job_results"): if k in pipeline_keys: try: pipeline[k] = e[k] except Exception as ex: traceback.print_exc(file=sys.stdout) print(ex) else: try: term_mappings = get_terms_by_keys(all_terms, e[k], list()) if len(term_mappings) > 0: val = term_mappings else: val = e[k] pipeline.custom_arguments[k] = val except Exception as ex: traceback.print_exc(file=sys.stdout) print(ex)
def data_entities_to_pipelines(e: PhenotypeEntity, report_tags, all_terms, owner, debug, cohorts, phenotype_limit=0, report_types: dict = None, custom_query: dict = None, filter_query: dict = None, source: dict = None, job_results: dict = None): if report_types is None: report_types = dict() if custom_query is None: custom_query = dict() if filter_query is None: filter_query = dict() if source is None: source = dict() if job_results is None: job_results = dict() if e['named_arguments'] is None: e['named_arguments'] = dict() if 'value_sets' not in e['named_arguments']: e['named_arguments']['value_sets'] = [] if 'termsets' not in e['named_arguments']: e['named_arguments']['termsets'] = [] if e['library'] == "Clarity" or e['library'] == 'ClarityNLP': # config_type, name, description, terms if 'documentsets' in e['named_arguments']: doc_sets = e['named_arguments']['documentsets'] elif 'documentset' in e['named_arguments']: doc_sets = e['named_arguments']['documentset'] else: doc_sets = list() if phenotype_limit > 0: limit = phenotype_limit elif debug: limit = DEBUG_LIMIT else: limit = 0 if 'termset' in e['named_arguments']: terms = e['named_arguments']["termset"] elif 'termsets' in e['named_arguments']: terms = e['named_arguments']["termsets"] else: terms = list() if 'code' in e['named_arguments']: code = e['named_arguments']["code"] elif 'concept_code' in e['named_arguments']: code = e['named_arguments']["concept_code"] else: code = '' if 'code_system' in e['named_arguments']: code_system = e['named_arguments']["code_system"] elif 'concept_code_system' in e['named_arguments']: code_system = e['named_arguments']["concept_code_system"] elif 'codesystem' in e['named_arguments']: code_system = e['named_arguments']["codesystem"] else: code_system = '' if 'cohort' in e['named_arguments']: cohort, job_results_filter = get_cohort_items( e['named_arguments']['cohort'], cohorts, job_results) elif 'cohorts' in e['named_arguments']: cohort, job_results_filter = get_cohort_items( e['named_arguments']['cohorts'], cohorts, job_results) else: cohort, job_results_filter = list(), dict() tags = get_item_list_by_key(report_tags, doc_sets) types = get_item_list_by_key(report_types, doc_sets) query = get_item_by_key(custom_query, doc_sets) fq = get_item_by_key(filter_query, doc_sets) sources = get_item_list_by_key(source, doc_sets) pipeline = PipelineConfig(e['funct'], e['name'], get_terms_by_keys( all_terms, terms, e['named_arguments']['value_sets']), owner=owner, limit=limit, cohort=cohort, job_results=job_results_filter, report_tags=tags, report_types=types, sources=sources, custom_query=query, filter_query=fq, concept_code=code, concept_code_system=code_system, is_phenotype=True) map_arguments(pipeline, e, all_terms) map_arguments(pipeline, e['named_arguments'], all_terms) return pipeline else: raise ValueError("External pipelines not yet supported")
import pandas as pd import util from pymongo import MongoClient from data_access import PhenotypeModel, PipelineConfig, PhenotypeEntity, PhenotypeOperations, results from data_access import expr_eval, expr_result from ohdsi import getCohort #import json #from bson import json_util, ObjectId DEBUG_LIMIT = 1000 COL_LIST = ["_id", "report_date", 'report_id', 'subject', 'sentence'] pipeline_keys = PipelineConfig('test', 'test', 'test').__dict__.keys() numeric_comp_operators = ['==', '=', '>', '<', '<=', '>='] def get_terms(model: PhenotypeModel): terms = dict() if model: if model.term_sets and len(model.term_sets) > 0: for t in model.term_sets: terms[t['name']] = t['values'] # TODO expand concept_sets return terms def get_terms_by_keys(term_dict, term_keys: list, concept_keys: list):