def conceptPatientCount(self, top: pd.DataFrame, c_fullname: str, lc: LoggedConnection, # Moderate degree to support work on several tables in parallel. parallel_degree: int=8) -> int: counts = read_sql_step( ''' select /*+ parallel({degree}) */ c_fullname, c_hlevel, c_visualattributes, c_name , case when upper(meta.c_visualattributes) like 'C%' then :sentinel * 1 when lower(meta.c_tablename) <> 'concept_dimension' or lower(meta.c_operator) <> 'like' or lower(meta.c_facttablecolumn) <> 'concept_cd' then :sentinel * 2 else coalesce(( select count(distinct obs.patient_num) from ( select concept_cd from {i2b2star}.concept_dimension where concept_path like (meta.c_dimcode || '%') ) cd join {i2b2star}.observation_fact obs on obs.concept_cd = cd.concept_cd), :sentinel * 3) end c_totalnum from {i2b2meta}.{table_name} meta where c_synonym_cd = 'N' and meta.c_fullname = :c_fullname '''.strip().format(i2b2star=self.i2b2star, i2b2meta=self.i2b2meta, degree=parallel_degree, table_name=top.c_table_name), lc=lc, params=dict(c_fullname=c_fullname, sentinel=self.sentinel)).set_index('c_fullname') [count] = counts.c_totalnum.values return int(count)
def topFolders(i2b2meta: str, lc: LoggedConnection) -> pd.DataFrame: folders = read_sql_step(''' select c_table_cd, c_hlevel, c_visualattributes, c_name, upper(c_table_name) c_table_name, c_fullname from {i2b2meta}.table_access ta where upper(ta.c_visualattributes) like '_A%' order by ta.c_name '''.format(i2b2meta=i2b2meta).strip(), lc, {}).set_index('c_table_cd') return folders
def cohorts(self, lc: et.LoggedConnection) -> pd.DataFrame: cohorts = rif_etl.read_sql_step( ''' select site_schema, result_instance_id, start_date, task_id, count(distinct patient_num) from site_cohorts group by site_schema, result_instance_id, start_date, task_id order by start_date desc ''', lc) cohorts = cohorts.append( rif_etl.read_sql_step( ''' select count(distinct site_schema) site_schema , max(result_instance_id) result_instance_id , max(start_date) start_date , 'Total' task_id , count(distinct patient_num) from site_cohorts''', lc)).set_index('task_id') return cohorts
def top(self, lc: LoggedConnection) -> pd.Series: return read_sql_step(''' select c_table_cd, c_hlevel, c_visualattributes, c_name , upper(c_table_name) c_table_name, c_fullname from {i2b2meta}.table_access ta where upper(ta.c_visualattributes) like '_A%' and ta.c_table_cd = :c_table_cd '''.format(i2b2meta=self.i2b2meta).strip(), lc, dict(c_table_cd=self.c_table_cd)).set_index('c_table_cd').iloc[0]
def uploads(self, lc: et.LoggedConnection) -> pd.DataFrame: return rif_etl.read_sql_step( ''' select * from upload_status up where load_status like 'OK%' and (( loaded_record > 0 and substr(transform_name, -11) in ( select distinct task_id from site_cohorts ) ) or ( upload_label like 'migrate obs%' ) or ( message like 'UP#%' and upload_label like '% #1 of 1%' )) order by load_date desc ''', lc).set_index('upload_id')
def activeDescendants(self, lc: LoggedConnection) -> pd.DataFrame: top = self.top(lc) desc = read_sql_step( ''' select c_fullname, c_hlevel, c_visualattributes, c_totalnum, c_name, c_tooltip from {i2b2meta}.{meta_table} meta where meta.c_hlevel > :c_hlevel and meta.c_fullname like (:c_fullname || '%') and upper(meta.c_visualattributes) like '_A%' and c_synonym_cd = 'N' and m_applied_path = '@' order by meta.c_hlevel, upper(meta.c_name) '''.format(i2b2meta=self.i2b2meta, meta_table=top.c_table_name).strip(), lc=lc, params=dict(c_fullname=top.c_fullname, c_hlevel=int(top.c_hlevel))).set_index('c_fullname') return desc
def inclusion_criteria(self, lc: et.LoggedConnection) -> pd.DataFrame: # ISSUE: sync with build_cohort.sql? # ISSUE: left out 'NAACCR|400:C509' return rif_etl.read_sql_step( ''' select concept_cd, min(name_char) name_char from blueherondata_kumc_calamus.concept_dimension where concept_cd in ( 'SEER_SITE:26000', 'NAACCR|400:C500', 'NAACCR|400:C501', 'NAACCR|400:C502', 'NAACCR|400:C503', 'NAACCR|400:C504', 'NAACCR|400:C505', 'NAACCR|400:C506', 'NAACCR|400:C507', 'NAACCR|400:C508', 'NAACCR|400:C509' ) group by concept_cd order by concept_cd ''', lc).set_index('concept_cd')