def search_and_filter(self): # get distinct pat ids from base search collection = "task_" + self.task_id + "_base" connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() pgCur.execute("select distinct data->'subject'->>'reference' from " + collection + " ;") pat_ids = [] item = pgCur.fetchone() while item is not None: pat_ids.append(item[0].split('/')[1]) item = pgCur.fetchone() # TODO: change to "patient.reference" ALSO check if whole url or id # get first exclude for base search based on pat_id collection = "task_" + self.task_id + "_excl" # TODO: change to filtering all # params = [{"key":"subject", "values":pat_ids},{"key":"code", "values":self.exclude[0]['codes']} ] # TODO: filter search for relevant encounters (check performance gain) - might need to send via body not request params params = [{"key": "code", "values": self.exclude[0]['codes']}] self.search_resource_by_params("Condition", params, collection) self.filter(self.exclude[0])
def process_search_results(self, search_result, collection): # TODO: Check if FHIR can resolve the dependencies, so less calls necessary result_list = [] next_page = True while next_page: for entry_elem in search_result.entry: ret_element = entry_elem.resource element = ret_element.as_json() result_list.append(element) if len(search_result.link) < 2 or search_result.link[1].relation != "next": next_page = False break res = server.server.request_json(search_result.link[1].url) search_result = bundle.Bundle(res) connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() pgCur.execute("CREATE TABLE IF NOT EXISTS " + collection + " (ID serial NOT NULL PRIMARY KEY, data jsonb NOT NULL);") pgCur.execute("TRUNCATE TABLE " + collection + " ;") insertSql = "INSERT INTO " + collection + "(data) VALUES %s" entry_list = [] for entry in result_list: entry_list.append((json.dumps(entry),)) execute_values(pgCur, insertSql, entry_list, "(%s::jsonb)") connection.commit() return True
def search_and_filter(self, exclude): # get distinct pat ids from base search collection = "task_" + self.task_id + "_base" connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() pgCur.execute("select distinct data->'subject'->>'reference' from " + collection + " ;") pat_ids = [] item = pgCur.fetchone() while item is not None: pat_ids.append(item[0].split('/')[1]) item = pgCur.fetchone() # TODO: change to "patient.reference" ALSO check if whole url or id collection = "task_" + self.task_id + "_excl" # TODO: filter search for relevant encounters (check performance gain) - might need to send via body not request params params = [{"key": "code", "values": exclude['codes']}] resources_found = self.search_resource_by_params(exclude['resource'], params, collection) if resources_found: collection_source = "task_" + self.task_id + "_excl" collection_target = "task_" + self.task_id + "_excl_enc" self.get_encs(collection_source, collection_target) self.filter(exclude)
def delete(self): try: connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() sql = "SELECT data->>'task_id' from filter_tasks" pgCur.execute(sql) tasks = pgCur.fetchall() if tasks is None: return "no tasks found", 400 tasks = [r[0] for r in tasks] for task_id in tasks: sql = "DELETE from filter_tasks where data->>'task_id' = %s" pgCur.execute(sql, (task_id,)) tables_to_drop = ['pat', 'featureset', 'result'] for to_drop in tables_to_drop: sql = "DROP table IF EXISTS task_" + task_id + "_" + to_drop pgCur.execute(sql, (task_id,)) connection.commit() except Exception as e: connection.close() return str(e.args[0]) return "tasks deleted", 200
def execute_task(self): print("Task is now being executed enjoy") connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s" self.status = 'running' pgCur.execute(sql, (json.dumps(self.__dict__), self.task_id)) connection.commit() # get base search - as in Observations or resources to start search and filter from collection_source = "task_" + self.task_id + "_base" params = [{"key": "code", "values": self.base_search}] self.search_resource_by_params("Observation", params, collection_source) # get distinct enc ids from base search collection_target = "task_" + self.task_id + "_enc" self.get_encs(collection_source, collection_target) for exclude in self.exclude: self.search_and_filter(exclude) feat_task_id = str(ObjectId()) feature_task = FeatureTask(feat_task_id, "running", self.feature_set, self.task_id) feature_task.execute_task() pgCur = connection.cursor() sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s" self.status = 'done' pgCur.execute(sql, (json.dumps(self.__dict__), self.task_id)) connection.commit()
def execute_task(self): # get patients search collection_base = "task_" + self.filter_task_id + "_base" # get patient ids from observation sql = "select distinct data->'subject'->>'reference' from " + collection_base + ";" pgCur = psqlDbConnection.get_db_connection().cursor() pgCur.execute(sql) pat_ids = [] item = pgCur.fetchone() while item is not None: pat_ids.append(item[0].split('/')[1]) item = pgCur.fetchone() collection_patient = "task_" + self.filter_task_id + "_pat" pat_final = [] chunk_size = 1000 for i in range(0, len(pat_ids), chunk_size): pat_final.append(pat_ids[i:i + chunk_size]) for cur_pat_ids in pat_final: params = [{"key": "_id", "values": cur_pat_ids}] self.search_resource_by_params("Patient", params, collection_patient) self.create_feature_set() return
def create_feature_set(self): feature_set = self.feature_set task_id = self.filter_task_id sqlSelectBegin = "Select " sqlFrom = ( "from task_" + task_id + "_result as Observation" + " join task_" + task_id + "_pat as Patient" + " on Observation.data ->'subject' ->> 'reference' = concat('Patient/',Patient.data ->> 'id')" ) sql_features = "" for feature in feature_set: sql_features = sql_features + self.process_feature(feature) sql_features = sql_features[0:-2] sql_feature_task = sqlSelectBegin + sql_features + sqlFrom collection = "task_" + task_id + "_featureset" connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() pgCur.execute("CREATE TABLE IF NOT EXISTS " + collection + " as " + sql_feature_task) connection.commit()
def get(self, task_id): connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() sql = "SELECT data from filter_tasks where data->>'task_id' = %s " pgCur.execute(sql, (task_id,)) task = pgCur.fetchone() if task is None: return "no task with this task id found", 400 return task[0]
def create_base_db(): connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() tables = [ "filter_tasks (ID serial NOT NULL PRIMARY KEY, data jsonb NOT NULL);", "feature_tasks (ID serial NOT NULL PRIMARY KEY, data jsonb NOT NULL);" ] for table in tables: pgCur.execute("CREATE TABLE IF NOT EXISTS " + table) connection.commit()
def get(self): connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() sql = "SELECT data from filter_tasks" pgCur.execute(sql) task = pgCur.fetchall() if task is None: return "no tasks found", 400 task = [r[0] for r in task] return task
def filter(self, exclude): task_string = "task_" + self.task_id base = task_string + "_base" excl = task_string + "_excl" enc = excl + "_enc" interval_m = exclude['time_interval'] if interval_m > 0: sql_delete = "delete from " + base + " where data->>'id' in " sql_filter_select = (" (select base.data ->> 'id' " " from " + excl + " as excl left join " + enc + " as enc" " on excl.data ->'encounter' ->> 'reference' = concat('Encounter/',enc.data ->> 'id')" " join " + base + " as base" " on excl.data -> 'subject'->>'reference' = base.data -> 'subject' ->> 'reference'" " WHERE" " (TO_TIMESTAMP(base.data ->> %s, 'YYYY-MM-DD HH24:MI:SS') >=" " TO_TIMESTAMP(enc.data -> 'period'->> 'start', 'YYYY-MM-DD HH24:MI:SS') - interval '%s month'" " AND" " TO_TIMESTAMP(base.data ->> %s, 'YYYY-MM-DD HH24:MI:SS') <=" " TO_TIMESTAMP(enc.data -> 'period'->> 'end', 'YYYY-MM-DD HH24:MI:SS') + interval '%s month' )" " OR" " (TO_TIMESTAMP(base.data ->> %s, 'YYYY-MM-DD HH24:MI:SS') >=" " TO_TIMESTAMP(excl.data -> 'period'->> 'start', 'YYYY-MM-DD HH24:MI:SS') - interval '%s month'" " AND" " TO_TIMESTAMP(base.data ->> %s, 'YYYY-MM-DD HH24:MI:SS') <=" " TO_TIMESTAMP(excl.data -> 'period'->> 'end', 'YYYY-MM-DD HH24:MI:SS') + interval '%s month' )" ) else: sql_delete = "delete from " + base + " where data->'subject'->>'reference' in " sql_filter_select = (" (select distinct excl.data -> 'subject' ->> 'reference'" " from " + excl + " as excl" " where 1=1" "" ) if 'value_restriction' in exclude: operator = exclude['value_restriction']['operator'] compare_to = exclude['value_restriction']['compare_to'] sql_filter_select = sql_filter_select + " and TO_NUMBER(excl.data->'valueQuantity'->>'value', '99.9') " + operator + " " + str(compare_to) dateField = exclude['dateField'] sql = sql_delete + sql_filter_select + " );" connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() pgCur.execute(sql, (dateField, interval_m, dateField, interval_m, dateField, interval_m, dateField, interval_m)) connection.commit()
def get_encs(self, collection_source, collection_target): connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() pgCur.execute("select distinct data->'encounter'->>'reference' from " + collection_source + " ;") enc_ids = [] item = pgCur.fetchone() while item is not None: enc_ids.append(item[0].split('/')[1]) item = pgCur.fetchone() enc_final = [] chunk_size = 1000 for i in range(0, len(enc_ids), chunk_size): enc_final.append(enc_ids[i:i + chunk_size]) for cur_enc_ids in enc_final: params = [{"key": "_id", "values": cur_enc_ids}] self.search_resource_by_params("Encounter", params, collection_target)
def execute_task(task): try: filter_task = FilterTaskPsql(task['task_id'], "running", task['base_search'], task['include'], task['exclude'], task['feature_set']) log.info("TASK WORKER: Executing task with id: " + task['task_id']) filter_task.execute_task() except Exception as e: log.error("TASK WORKER: ERROR executing task, stack trace: " + str(e)) connection = psqlDbConnection.get_db_connection() log.error("TASK WORKER: after db connection") pgCur = connection.cursor() sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s" filter_task.status = 'ERROR' filter_task.error_message = str(e) pgCur.execute(sql, (json.dumps(filter_task.__dict__), filter_task.task_id)) connection.commit() return
def delete(self, task_id): connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() sql = "DELETE FROM filter_tasks WHERE data->>'task_id' = %s " try: pgCur.execute(sql, (task_id,)) tables_to_drop = ['pat', 'featureset', 'result'] for to_drop in tables_to_drop: sql = "DROP table IF EXISTS task_" + task_id + "_" + to_drop pgCur.execute(sql, (task_id,)) connection.commit() except Exception as e: connection.close() return str(e.args[0]) return "task deleted", 200
def main(): executor = ThreadPoolExecutor(max_workers=4) connection = psqlDbConnection.get_db_connection() while True: pgCur = connection.cursor() sql = "SELECT data FROM filter_tasks WHERE data->>'status' = 'new'" pgCur.execute(sql) task = pgCur.fetchone() if (task is None): time.sleep(10) continue task = task[0] sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s" task['status'] = 'queued' pgCur.execute(sql, (json.dumps(task), task['task_id'])) connection.commit() log.debug("Async execution of task = " + str(task)) executor.submit(execute_task(task))
def post(self): args = parser.parse_args() execution_type = args["execution_type"] filter_task_id = str(ObjectId()) connection = psqlDbConnection.get_db_connection() if execution_type == 'sync': request_vals = request.get_json() filter_task = FilterTaskPsql(filter_task_id, "running", request_vals['loinc'], request_vals['include'], request_vals['exclude'], request_vals['feature_set']) pgCur = connection.cursor() insertSql = "INSERT INTO filter_tasks (data) VALUES (%s::jsonb)" log.debug("Executing synchronous task, id:" + filter_task_id) pgCur.execute(insertSql, (json.dumps(filter_task.__dict__),)) connection.commit() filter_task.execute_task() else: filter_task_id = str(ObjectId()) request_vals = request.get_json() filter_task = FilterTaskPsql(filter_task_id, "new", request_vals['loinc'], request_vals['include'], request_vals['exclude'], request_vals['feature_set']) filter_task.queued_time = str(datetime.now()) pgCur = connection.cursor() insertSql = "INSERT INTO filter_tasks (data) VALUES (%s::jsonb)" pgCur.execute(insertSql, (json.dumps(filter_task.__dict__),)) connection.commit() task = {"task_id": filter_task_id} return task
def execute_task(self): log.debug("Task," + self.task_id + ": " + "Begin filter Task") connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s" self.status = 'running' self.start_time = str(datetime.now()) pgCur.execute(sql, (json.dumps(self.__dict__), self.task_id)) connection.commit() result_collection = "task_" + self.task_id + "_result" # TODO - Check if it should not rather be select * from base prefilter_base = (" WITH base as " " ( SELECT * from resources where type = 'Observation' and resources.data -> 'code' -> 'coding' -> 0 ->> 'code' = '" + self.base_search[0] + "' )," " enc as (SELECT * from resources where type = 'Encounter')," ) params = () include_select = "" include_where = "" exclude_select = "" exclude_where = "" prefilter_excl = "" prefilter_excl_where = "" prefilter_incl = "" prefilter_incl_where = "" first_constraint = True if len(self.include) > 0: first_incl = True for constraint in self.include: cur_constraint = self.get_filter_statement(constraint, "incl") params = params + cur_constraint['params'] if not first_incl: include_where = include_where + "AND" prefilter_incl_where = prefilter_incl_where + "AND" first_incl = False include_where = include_where + " (" + cur_constraint['sql'] + ") " prefilter_incl_where = prefilter_incl_where + " (" + self.get_filter_prefilter(constraint) + ")" include_select = (" select distinct base.fhir_id" " from base" " left join incl on base.data -> 'subject' ->> 'reference' = incl.data -> 'subject' ->> 'reference'" " left join enc on incl.data -> 'encounter' ->> 'reference' = concat('Encounter/', enc.data ->> 'id')" " where" ) prefilter_incl = "incl as ( select * from resources WHERE " + prefilter_incl_where + ")" first_constraint = False if len(self.exclude) > 0: first_exlcude = True for exclude in self.exclude: cur_exclude = self.get_filter_statement(exclude, "excl") params = params + cur_exclude['params'] if not first_exlcude: exclude_where = exclude_where + "OR" prefilter_excl_where = prefilter_excl_where + "OR" first_exlcude = False exclude_where = exclude_where + " (" + cur_exclude['sql'] + ") " prefilter_excl_where = prefilter_excl_where + " (" + self.get_filter_prefilter(exclude) + ")" exclude_select = (" select distinct base.fhir_id" " from base" " left join excl on base.data -> 'subject' ->> 'reference' = excl.data -> 'subject' ->> 'reference'" " left join enc on excl.data -> 'encounter' ->> 'reference' = concat('Encounter/', enc.data ->> 'id')" " where" ) prefilter_excl = "excl as ( select * from resources WHERE " + prefilter_excl_where + ")" if first_constraint is False: prefilter_excl = "," + prefilter_excl base = "SELECT * from base WHERE" first = True if len(self.include) > 0: base = base + " base.fhir_id IN" first = False base = base + "(" + include_select + include_where + ") " if len(self.exclude) > 0: if first: base = base + " base.fhir_id NOT IN" first = False else: base = base + " AND base.fhir_id NOT IN" base = base + "(" + exclude_select + exclude_where + ")" if first: base_where = " type = 'Observation' and base.data -> 'code' -> 'coding' -> 0 ->> 'code' = '" + self.base_search[0] + "'" else: base_where = " AND type = 'Observation' and base.data -> 'code' -> 'coding' -> 0 ->> 'code' = '" + self.base_search[0] + "'" sql = prefilter_base + prefilter_incl + prefilter_excl + base + base_where log.debug("Task," + self.task_id + ":" + "begin execution of filter sql on fhir server") fhir_connection = psqlDbConnection.get_fhir_db_connection() fhir_pgCur = fhir_connection.cursor() sql_statement = fhir_pgCur.mogrify(sql, params) self.gen_filter_query = sql_statement.decode("UTF-8") log.debug("Task," + self.task_id + " sql params:" + str(params)) log.debug("Task," + self.task_id + " sql: \n #########\n " + sqlparse.format(sql_statement, reindent=True, keyword_case='upper') + "\n #########\n ") try: fhir_pgCur.execute(sql, params) except Exception as e: log.error("TASK WORKER: ERROR executing task, stack trace: " + str(e)) connection = psqlDbConnection.get_db_connection() log.error("TASK WORKER: after db connection") pgCur = connection.cursor() sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s" self.status = 'ERROR' self.error_message = str(e) pgCur.execute(sql, (json.dumps(self.__dict__), self.task_id)) connection.commit() item = fhir_pgCur.fetchmany(10000) connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() sql = "create table " + result_collection sql = sql + ''' ( id SERIAL, fhir_id varchar(64) NOT NULL, type varchar(64) NOT NULL, data jsonb NOT NULL, created_at timestamp NOT NULL DEFAULT NOW(), last_updated_at timestamp NOT NULL DEFAULT NOW(), is_deleted boolean NOT NULL DEFAULT FALSE ) ''' pgCur.execute(sql) connection.commit() log.debug("Task," + self.task_id + ":" + "Finished execution of filter sql on fhir server") connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() insertSql = "INSERT INTO " + result_collection + " VALUES %s" while len(item) > 0: item_list_insert = [] for cur_item in item: lst = list(cur_item) lst[3] = json.dumps(lst[3]) cur_item = tuple(lst) item_list_insert.append(cur_item) execute_values(pgCur, insertSql, item_list_insert, "(%s, %s, %s, %s::jsonb, %s ,%s, %s)") item = fhir_pgCur.fetchmany(10000) connection.commit() feat_task_id = str(ObjectId()) feature_task = FeatureTaskPsql(feat_task_id, "running", self.feature_set, self.task_id) try: feature_task.execute_task() except Exception as e: log.error("TASK WORKER: ERROR executing task, stack trace: " + str(e)) connection = psqlDbConnection.get_db_connection() log.error("TASK WORKER: after db connection") pgCur = connection.cursor() sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s" self.status = 'ERROR' self.error_message = str(e) pgCur.execute(sql, (json.dumps(self.__dict__), self.task_id)) connection.commit() return pgCur = connection.cursor() sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s" self.status = 'done' pgCur.execute(sql, (json.dumps(self.__dict__), self.task_id)) connection.commit()
def execute_task(self): log.debug("Task," + self.filter_task_id + ":" + "Begin feature task for filter task") # get patients search collection_base = "task_" + self.filter_task_id + "_result" # get patient ids from observation sql = "select distinct data->'subject'->>'reference' from " + collection_base + ";" pgCur = psqlDbConnection.get_db_connection().cursor() pgCur.execute(sql) pat_ids = [] item = pgCur.fetchone() while item is not None: pat_ids.append(item[0].split('/')[1]) item = pgCur.fetchone() collection_patient = "task_" + self.filter_task_id + "_pat " connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() sql = "create table " + collection_patient sql = sql + ''' ( id SERIAL, fhir_id varchar(64) NOT NULL, type varchar(64) NOT NULL, data jsonb NOT NULL, created_at timestamp NOT NULL DEFAULT NOW(), last_updated_at timestamp NOT NULL DEFAULT NOW(), is_deleted boolean NOT NULL DEFAULT FALSE ) ''' pgCur.execute(sql) connection.commit() pat_ids_final = (",".join(map(lambda x: "'" + x + "'", pat_ids))) select_pats = "SELECT * from resources where type = 'Patient' and fhir_id in (" + pat_ids_final + ")" fhir_connection = psqlDbConnection.get_fhir_db_connection() fhir_pgCur = fhir_connection.cursor() fhir_pgCur.execute(select_pats) item = fhir_pgCur.fetchmany(10000) connection = psqlDbConnection.get_db_connection() pgCur = connection.cursor() insertSql = "INSERT INTO " + collection_patient + " VALUES %s" while len(item) > 0: item_list_insert = [] for cur_item in item: lst = list(cur_item) lst[3] = json.dumps(lst[3]) cur_item = tuple(lst) item_list_insert.append(cur_item) execute_values(pgCur, insertSql, item_list_insert, "(%s, %s, %s, %s::jsonb, %s, %s, %s)") item = fhir_pgCur.fetchmany(10000) connection.commit() #for cur_pat_ids in pat_final: # params = [{"key": "_id", "values": cur_pat_ids}] # self.search_resource_by_params("Patient", params, collection_patient) log.debug("Task," + self.filter_task_id + ":" + "Begin creation of feature set for filter task") self.create_feature_set() return