def search_and_filter(self):

        # get distinct pat ids from base search
        collection = "task_" + self.task_id + "_base"
        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        pgCur.execute("select distinct data->'subject'->>'reference' from " +
                      collection + " ;")

        pat_ids = []
        item = pgCur.fetchone()

        while item is not None:
            pat_ids.append(item[0].split('/')[1])
            item = pgCur.fetchone()

        # TODO: change to "patient.reference"   ALSO check if whole url or id

        # get first exclude for base search based on pat_id
        collection = "task_" + self.task_id + "_excl"

        # TODO: change to filtering all
        # params = [{"key":"subject", "values":pat_ids},{"key":"code", "values":self.exclude[0]['codes']} ]
        # TODO: filter search for relevant encounters (check performance gain) - might need to send via body not request params
        params = [{"key": "code", "values": self.exclude[0]['codes']}]
        self.search_resource_by_params("Condition", params, collection)

        self.filter(self.exclude[0])
    def process_search_results(self, search_result, collection):

        # TODO: Check if FHIR can resolve the dependencies, so less calls necessary

        result_list = []

        next_page = True

        while next_page:
            for entry_elem in search_result.entry:
                ret_element = entry_elem.resource
                element = ret_element.as_json()
                result_list.append(element)

            if len(search_result.link) < 2 or search_result.link[1].relation != "next":
                next_page = False
                break

            res = server.server.request_json(search_result.link[1].url)
            search_result = bundle.Bundle(res)

        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        pgCur.execute("CREATE TABLE IF NOT EXISTS " + collection + " (ID serial NOT NULL PRIMARY KEY, data jsonb NOT NULL);")
        pgCur.execute("TRUNCATE TABLE " + collection + " ;")

        insertSql = "INSERT INTO " + collection + "(data) VALUES %s"

        entry_list = []
        for entry in result_list:
            entry_list.append((json.dumps(entry),))

        execute_values(pgCur, insertSql, entry_list, "(%s::jsonb)")
        connection.commit()
        return True
    def search_and_filter(self, exclude):

        # get distinct pat ids from base search
        collection = "task_" + self.task_id + "_base"
        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        pgCur.execute("select distinct data->'subject'->>'reference' from " + collection + " ;")

        pat_ids = []
        item = pgCur.fetchone()

        while item is not None:
            pat_ids.append(item[0].split('/')[1])
            item = pgCur.fetchone()

        # TODO: change to "patient.reference"   ALSO check if whole url or id

        collection = "task_" + self.task_id + "_excl"

        # TODO: filter search for relevant encounters (check performance gain) - might need to send via body not request params
        params = [{"key": "code", "values": exclude['codes']}]
        resources_found = self.search_resource_by_params(exclude['resource'], params, collection)

        if resources_found:
            collection_source = "task_" + self.task_id + "_excl"
            collection_target = "task_" + self.task_id + "_excl_enc"
            self.get_encs(collection_source, collection_target)
            self.filter(exclude)
    def delete(self):

        try:
            connection = psqlDbConnection.get_db_connection()
            pgCur = connection.cursor()
            sql = "SELECT data->>'task_id' from filter_tasks"
            pgCur.execute(sql)

            tasks = pgCur.fetchall()

            if tasks is None:
                return "no tasks found", 400

            tasks = [r[0] for r in tasks]

            for task_id in tasks:

                sql = "DELETE from filter_tasks where data->>'task_id' = %s"
                pgCur.execute(sql, (task_id,))
                
                tables_to_drop = ['pat', 'featureset', 'result']
                for to_drop in tables_to_drop:
                    sql = "DROP table IF EXISTS task_" + task_id + "_" + to_drop
                    pgCur.execute(sql, (task_id,))

            connection.commit()

        except Exception as e:
            connection.close()
            return str(e.args[0])

        return "tasks deleted", 200
    def execute_task(self):

        print("Task is now being executed enjoy")
        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s"
        self.status = 'running'
        pgCur.execute(sql, (json.dumps(self.__dict__), self.task_id))
        connection.commit()

        # get base search - as in Observations or resources to start search and filter from
        collection_source = "task_" + self.task_id + "_base"
        params = [{"key": "code", "values": self.base_search}]
        self.search_resource_by_params("Observation", params, collection_source)

        # get distinct enc ids from base search
        collection_target = "task_" + self.task_id + "_enc"
        self.get_encs(collection_source, collection_target)

        for exclude in self.exclude:
            self.search_and_filter(exclude)

        feat_task_id = str(ObjectId())
        feature_task = FeatureTask(feat_task_id, "running", self.feature_set, self.task_id)
        feature_task.execute_task()

        pgCur = connection.cursor()
        sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s"
        self.status = 'done'
        pgCur.execute(sql, (json.dumps(self.__dict__), self.task_id))
        connection.commit()
    def execute_task(self):

        # get patients search
        collection_base = "task_" + self.filter_task_id + "_base"

        # get patient ids from observation
        sql = "select distinct data->'subject'->>'reference' from " + collection_base + ";"

        pgCur = psqlDbConnection.get_db_connection().cursor()
        pgCur.execute(sql)

        pat_ids = []
        item = pgCur.fetchone()

        while item is not None:
            pat_ids.append(item[0].split('/')[1])
            item = pgCur.fetchone()

        collection_patient = "task_" + self.filter_task_id + "_pat"

        pat_final = []
        chunk_size = 1000
        for i in range(0, len(pat_ids), chunk_size):
            pat_final.append(pat_ids[i:i + chunk_size])

        for cur_pat_ids in pat_final:
            params = [{"key": "_id", "values": cur_pat_ids}]
            self.search_resource_by_params("Patient", params,
                                           collection_patient)

        self.create_feature_set()

        return
    def create_feature_set(self):

        feature_set = self.feature_set
        task_id = self.filter_task_id

        sqlSelectBegin = "Select "
        sqlFrom = (
            "from task_" + task_id + "_result as Observation" + " join task_" +
            task_id + "_pat as Patient" +
            " on Observation.data ->'subject' ->> 'reference' = concat('Patient/',Patient.data ->> 'id')"
        )

        sql_features = ""

        for feature in feature_set:
            sql_features = sql_features + self.process_feature(feature)

        sql_features = sql_features[0:-2]

        sql_feature_task = sqlSelectBegin + sql_features + sqlFrom

        collection = "task_" + task_id + "_featureset"
        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        pgCur.execute("CREATE TABLE IF NOT EXISTS " + collection + " as " +
                      sql_feature_task)
        connection.commit()
    def get(self, task_id):

        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        sql = "SELECT data from filter_tasks where data->>'task_id' = %s  "
        pgCur.execute(sql, (task_id,))
        task = pgCur.fetchone()

        if task is None:
            return "no task with this task id found", 400

        return task[0]
def create_base_db():

    connection = psqlDbConnection.get_db_connection()
    pgCur = connection.cursor()
    tables = [
        "filter_tasks (ID serial NOT NULL PRIMARY KEY, data jsonb NOT NULL);",
        "feature_tasks (ID serial NOT NULL PRIMARY KEY, data jsonb NOT NULL);"
    ]

    for table in tables:
        pgCur.execute("CREATE TABLE IF NOT EXISTS " + table)

    connection.commit()
    def get(self):

        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        sql = "SELECT data from filter_tasks"
        pgCur.execute(sql)
        task = pgCur.fetchall()

        if task is None:
            return "no tasks found", 400

        task = [r[0] for r in task]

        return task
    def filter(self, exclude):

        task_string = "task_" + self.task_id
        base = task_string + "_base"
        excl = task_string + "_excl"
        enc = excl + "_enc"

        interval_m = exclude['time_interval']
        if interval_m > 0:
            sql_delete = "delete from " + base + " where data->>'id' in "
            sql_filter_select = (" (select base.data ->> 'id'   "
                                 " from " + excl + " as excl left join " + enc + " as enc"
                                 " on excl.data ->'encounter' ->> 'reference' = concat('Encounter/',enc.data ->> 'id')"
                                 " join " + base + " as base"
                                 " on excl.data -> 'subject'->>'reference' = base.data -> 'subject' ->> 'reference'"
                                 " WHERE"
                                 " (TO_TIMESTAMP(base.data ->> %s, 'YYYY-MM-DD HH24:MI:SS') >="
                                 " TO_TIMESTAMP(enc.data -> 'period'->> 'start', 'YYYY-MM-DD HH24:MI:SS') - interval '%s month'"
                                 " AND"
                                 " TO_TIMESTAMP(base.data ->> %s, 'YYYY-MM-DD HH24:MI:SS') <="
                                 " TO_TIMESTAMP(enc.data -> 'period'->> 'end', 'YYYY-MM-DD HH24:MI:SS') + interval '%s month' )"
                                 " OR"
                                 " (TO_TIMESTAMP(base.data ->> %s, 'YYYY-MM-DD HH24:MI:SS') >="
                                 " TO_TIMESTAMP(excl.data -> 'period'->> 'start', 'YYYY-MM-DD HH24:MI:SS') - interval '%s month'"
                                 " AND"
                                 " TO_TIMESTAMP(base.data ->> %s, 'YYYY-MM-DD HH24:MI:SS') <="
                                 " TO_TIMESTAMP(excl.data -> 'period'->> 'end', 'YYYY-MM-DD HH24:MI:SS') + interval '%s month' )"
                                 )
        else:
            sql_delete = "delete from " + base + " where data->'subject'->>'reference' in "
            sql_filter_select = (" (select distinct excl.data -> 'subject' ->> 'reference'"
                                 " from " + excl + " as excl"
                                 " where 1=1"
                                 ""
                                 )

        if 'value_restriction' in exclude:
            operator = exclude['value_restriction']['operator']
            compare_to = exclude['value_restriction']['compare_to']
            sql_filter_select = sql_filter_select + " and TO_NUMBER(excl.data->'valueQuantity'->>'value', '99.9') " + operator + " " + str(compare_to)

        dateField = exclude['dateField']

        sql = sql_delete + sql_filter_select + " );"
        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        pgCur.execute(sql, (dateField, interval_m, dateField, interval_m, dateField, interval_m, dateField, interval_m))

        connection.commit()
    def get_encs(self, collection_source, collection_target):
        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        pgCur.execute("select distinct data->'encounter'->>'reference' from " + collection_source + " ;")

        enc_ids = []
        item = pgCur.fetchone()

        while item is not None:
            enc_ids.append(item[0].split('/')[1])
            item = pgCur.fetchone()

        enc_final = []
        chunk_size = 1000
        for i in range(0, len(enc_ids), chunk_size):
            enc_final.append(enc_ids[i:i + chunk_size])

        for cur_enc_ids in enc_final:
            params = [{"key": "_id", "values": cur_enc_ids}]
            self.search_resource_by_params("Encounter", params, collection_target)
Example #13
0
def execute_task(task):

    try:
        filter_task = FilterTaskPsql(task['task_id'], "running",
                                     task['base_search'], task['include'],
                                     task['exclude'], task['feature_set'])
        log.info("TASK WORKER: Executing task with id: " + task['task_id'])
        filter_task.execute_task()

    except Exception as e:
        log.error("TASK WORKER: ERROR executing task, stack trace: " + str(e))
        connection = psqlDbConnection.get_db_connection()
        log.error("TASK WORKER: after db connection")
        pgCur = connection.cursor()
        sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s"
        filter_task.status = 'ERROR'
        filter_task.error_message = str(e)
        pgCur.execute(sql,
                      (json.dumps(filter_task.__dict__), filter_task.task_id))
        connection.commit()
        return
    def delete(self, task_id):

        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        sql = "DELETE FROM filter_tasks WHERE data->>'task_id' = %s  "

        try:
            pgCur.execute(sql, (task_id,))
        
            tables_to_drop = ['pat', 'featureset', 'result']

            for to_drop in tables_to_drop:
                sql = "DROP table IF EXISTS task_" + task_id + "_" + to_drop
                pgCur.execute(sql, (task_id,))

            connection.commit()

        except Exception as e:
            connection.close()
            return str(e.args[0])

        return "task deleted", 200
Example #15
0
def main():
    executor = ThreadPoolExecutor(max_workers=4)

    connection = psqlDbConnection.get_db_connection()

    while True:
        pgCur = connection.cursor()
        sql = "SELECT data FROM filter_tasks WHERE data->>'status' = 'new'"
        pgCur.execute(sql)
        task = pgCur.fetchone()

        if (task is None):
            time.sleep(10)
            continue

        task = task[0]
        sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s"
        task['status'] = 'queued'
        pgCur.execute(sql, (json.dumps(task), task['task_id']))
        connection.commit()

        log.debug("Async execution of task = " + str(task))

        executor.submit(execute_task(task))
    def post(self):

        args = parser.parse_args()
        execution_type = args["execution_type"]
        filter_task_id = str(ObjectId())

        connection = psqlDbConnection.get_db_connection()

        if execution_type == 'sync':
            request_vals = request.get_json()
            filter_task = FilterTaskPsql(filter_task_id, "running", request_vals['loinc'], request_vals['include'],
                                         request_vals['exclude'], request_vals['feature_set'])

            pgCur = connection.cursor()
            insertSql = "INSERT INTO filter_tasks (data) VALUES (%s::jsonb)"

            log.debug("Executing synchronous task, id:" + filter_task_id)
            pgCur.execute(insertSql, (json.dumps(filter_task.__dict__),))
            connection.commit()
            filter_task.execute_task()

        else:
            filter_task_id = str(ObjectId())
            request_vals = request.get_json()
            filter_task = FilterTaskPsql(filter_task_id, "new", request_vals['loinc'], request_vals['include'],
                                         request_vals['exclude'], request_vals['feature_set'])

            filter_task.queued_time = str(datetime.now())

            pgCur = connection.cursor()
            insertSql = "INSERT INTO filter_tasks (data) VALUES (%s::jsonb)"
            pgCur.execute(insertSql, (json.dumps(filter_task.__dict__),))
            connection.commit()

        task = {"task_id": filter_task_id}
        return task
    def execute_task(self):

        log.debug("Task," + self.task_id + ": " + "Begin filter Task")

        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s"
        self.status = 'running'
        self.start_time = str(datetime.now())
        pgCur.execute(sql, (json.dumps(self.__dict__), self.task_id))
        connection.commit()

        result_collection = "task_" + self.task_id + "_result"

        # TODO - Check if it should not rather be select * from base
        prefilter_base = (" WITH base as "
                          " ( SELECT * from resources where type = 'Observation' and resources.data -> 'code' -> 'coding' -> 0 ->> 'code' = '" + self.base_search[0] + "' ),"
                          " enc as (SELECT * from resources where type = 'Encounter'),"
                          )

        params = ()
        include_select = ""
        include_where = ""
        exclude_select = ""
        exclude_where = ""
        prefilter_excl = ""
        prefilter_excl_where = ""
        prefilter_incl = ""
        prefilter_incl_where = ""

        first_constraint = True

        if len(self.include) > 0:
            first_incl = True
            for constraint in self.include:
                cur_constraint = self.get_filter_statement(constraint, "incl")
                params = params + cur_constraint['params']

                if not first_incl:
                    include_where = include_where + "AND"
                    prefilter_incl_where = prefilter_incl_where + "AND"

                first_incl = False
                  
                include_where = include_where + " (" + cur_constraint['sql'] + ") "
                prefilter_incl_where = prefilter_incl_where + " (" + self.get_filter_prefilter(constraint) + ")"

            include_select = (" select distinct base.fhir_id"
                          " from base"
                          " left join incl on base.data -> 'subject' ->> 'reference' = incl.data -> 'subject' ->> 'reference'"
                          " left join enc on incl.data -> 'encounter' ->> 'reference' = concat('Encounter/', enc.data ->> 'id')"
                          " where"
                          )

            prefilter_incl = "incl as ( select * from resources WHERE " + prefilter_incl_where + ")"
            first_constraint = False
        
        if len(self.exclude) > 0:
            
            first_exlcude = True
            for exclude in self.exclude:
                cur_exclude = self.get_filter_statement(exclude, "excl")
                params = params + cur_exclude['params']

                if not first_exlcude:
                    exclude_where = exclude_where + "OR"
                    prefilter_excl_where = prefilter_excl_where + "OR"

                first_exlcude = False
                  
                exclude_where = exclude_where + " (" + cur_exclude['sql'] + ") "
                prefilter_excl_where = prefilter_excl_where + " (" + self.get_filter_prefilter(exclude) + ")"

            exclude_select = (" select distinct base.fhir_id"
                          " from base"
                          " left join excl on base.data -> 'subject' ->> 'reference' = excl.data -> 'subject' ->> 'reference'"
                          " left join enc on excl.data -> 'encounter' ->> 'reference' = concat('Encounter/', enc.data ->> 'id')"
                          " where"
                          )

            prefilter_excl = "excl as ( select * from resources WHERE " + prefilter_excl_where + ")"
            if first_constraint is False:
                prefilter_excl = "," + prefilter_excl

        base = "SELECT * from base WHERE"

        first = True

        if len(self.include) > 0:
            base = base + " base.fhir_id IN"
            first = False
            base = base + "(" + include_select + include_where + ") "

        if len(self.exclude) > 0:
            if first:
                base = base + " base.fhir_id NOT IN"
                first = False
            else:
                base = base + " AND base.fhir_id NOT IN"

            base = base + "(" + exclude_select + exclude_where + ")"

        if first:
            base_where = " type = 'Observation' and base.data -> 'code' -> 'coding' -> 0 ->> 'code' = '" + self.base_search[0] + "'"
        else:
            base_where = " AND type = 'Observation' and base.data -> 'code' -> 'coding' -> 0 ->> 'code' = '" + self.base_search[0] + "'"

        sql = prefilter_base + prefilter_incl + prefilter_excl + base + base_where

        log.debug("Task," + self.task_id + ":" + "begin execution of filter sql on fhir server")
        fhir_connection = psqlDbConnection.get_fhir_db_connection()
        fhir_pgCur = fhir_connection.cursor()
        sql_statement = fhir_pgCur.mogrify(sql, params)
        self.gen_filter_query = sql_statement.decode("UTF-8")
        log.debug("Task," + self.task_id + " sql params:" + str(params))
        log.debug("Task," + self.task_id + " sql: \n #########\n " + sqlparse.format(sql_statement, reindent=True, keyword_case='upper') + "\n #########\n ")

        try:
            fhir_pgCur.execute(sql, params)

        except Exception as e:
            log.error("TASK WORKER: ERROR executing task, stack trace: " + str(e))
            connection = psqlDbConnection.get_db_connection()
            log.error("TASK WORKER: after db connection")
            pgCur = connection.cursor()
            sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s"
            self.status = 'ERROR'
            self.error_message = str(e)
            pgCur.execute(sql, (json.dumps(self.__dict__), self.task_id))
            connection.commit()

        item = fhir_pgCur.fetchmany(10000)
        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()

        sql = "create table " + result_collection
        sql = sql + ''' 
          (
              id         SERIAL,
              fhir_id    varchar(64) NOT NULL,
              type       varchar(64) NOT NULL,
              data       jsonb       NOT NULL,
              created_at timestamp   NOT NULL DEFAULT NOW(),
              last_updated_at timestamp   NOT NULL DEFAULT NOW(),
              is_deleted      boolean     NOT NULL DEFAULT FALSE
          )
        '''

        pgCur.execute(sql)
        connection.commit()
        log.debug("Task," + self.task_id + ":" + "Finished execution of filter sql on fhir server")

        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        insertSql = "INSERT INTO " + result_collection + " VALUES %s"

        while len(item) > 0:
            item_list_insert = []
            for cur_item in item:
                lst = list(cur_item)
                lst[3] = json.dumps(lst[3])
                cur_item = tuple(lst)
                item_list_insert.append(cur_item)

            execute_values(pgCur, insertSql, item_list_insert, "(%s, %s, %s, %s::jsonb, %s ,%s, %s)")
            item = fhir_pgCur.fetchmany(10000)

        connection.commit()

        feat_task_id = str(ObjectId())
        feature_task = FeatureTaskPsql(feat_task_id, "running", self.feature_set, self.task_id)
        try:
            feature_task.execute_task()
        except Exception as e:
            log.error("TASK WORKER: ERROR executing task, stack trace: " + str(e))
            connection = psqlDbConnection.get_db_connection()
            log.error("TASK WORKER: after db connection")
            pgCur = connection.cursor()
            sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s"
            self.status = 'ERROR'
            self.error_message = str(e)
            pgCur.execute(sql, (json.dumps(self.__dict__), self.task_id))
            connection.commit()
            return
        
        pgCur = connection.cursor()
        sql = "UPDATE filter_tasks set data = %s where data->>'task_id' = %s"
        self.status = 'done'
        pgCur.execute(sql, (json.dumps(self.__dict__), self.task_id))
        connection.commit()
    def execute_task(self):

        log.debug("Task," + self.filter_task_id + ":" +
                  "Begin feature task for filter task")

        # get patients search
        collection_base = "task_" + self.filter_task_id + "_result"

        # get patient ids from observation
        sql = "select distinct data->'subject'->>'reference' from " + collection_base + ";"

        pgCur = psqlDbConnection.get_db_connection().cursor()

        pgCur.execute(sql)

        pat_ids = []
        item = pgCur.fetchone()

        while item is not None:
            pat_ids.append(item[0].split('/')[1])
            item = pgCur.fetchone()

        collection_patient = "task_" + self.filter_task_id + "_pat "

        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        sql = "create table " + collection_patient
        sql = sql + ''' 
          (
              id         SERIAL,
              fhir_id    varchar(64) NOT NULL,
              type       varchar(64) NOT NULL,
              data       jsonb       NOT NULL,
              created_at timestamp   NOT NULL DEFAULT NOW(),
              last_updated_at timestamp   NOT NULL DEFAULT NOW(),
              is_deleted      boolean     NOT NULL DEFAULT FALSE
          )
        '''

        pgCur.execute(sql)
        connection.commit()

        pat_ids_final = (",".join(map(lambda x: "'" + x + "'", pat_ids)))

        select_pats = "SELECT * from resources where type = 'Patient' and fhir_id in (" + pat_ids_final + ")"

        fhir_connection = psqlDbConnection.get_fhir_db_connection()
        fhir_pgCur = fhir_connection.cursor()

        fhir_pgCur.execute(select_pats)
        item = fhir_pgCur.fetchmany(10000)

        connection = psqlDbConnection.get_db_connection()
        pgCur = connection.cursor()
        insertSql = "INSERT INTO " + collection_patient + " VALUES %s"

        while len(item) > 0:
            item_list_insert = []
            for cur_item in item:
                lst = list(cur_item)
                lst[3] = json.dumps(lst[3])
                cur_item = tuple(lst)
                item_list_insert.append(cur_item)

            execute_values(pgCur, insertSql, item_list_insert,
                           "(%s, %s, %s, %s::jsonb, %s, %s, %s)")
            item = fhir_pgCur.fetchmany(10000)

        connection.commit()
        #for cur_pat_ids in pat_final:
        #    params = [{"key": "_id", "values": cur_pat_ids}]
        #    self.search_resource_by_params("Patient", params, collection_patient)

        log.debug("Task," + self.filter_task_id + ":" +
                  "Begin creation of feature set for filter task")
        self.create_feature_set()

        return