def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute( f"TRUNCATE TABLE `{Administration.__tablename__}`;") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_ADMINISTRATION_SQL) for row in rs: v = Administration( uhl_system_number=row['uhl_system_number'], administration_id=row['administration_id'], administration_datetime=row['administration_datetime'], medication_name=row['medication_name'], dose_id=row['dose_id'], dose=row['dose'], dose_unit=row['dose_unit'], form_name=row['form_name'], route_name=row['route_name'], ) inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info(f"Saving administration. total = {cnt}") session.add_all(inserts) inserts = [] session.commit() session.add_all(inserts) session.commit()
def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute(f"TRUNCATE TABLE `{Procedure.__tablename__}`;") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_PROCEDURE_SQL) for row in rs: v = Procedure(procedure_id=row['procedure_id'], ) v.spell_id = row['spell_id'] v.episode_id = row['episode_id'] v.uhl_system_number = row['uhl_system_number'] v.procedure_number = row['procedure_number'] v.procedure_code = row['procedure_code'] v.procedure_name = row['procedure_name'] v.admission_datetime = row['admission_datetime'] inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info(f"Saving Procedure batch. total = {cnt}") session.add_all(inserts) inserts = [] session.commit() session.add_all(inserts) session.commit()
def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute(f"TRUNCATE TABLE `{Transfer.__tablename__}`;") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_TRANSFERS_SQL) for row in rs: v = Transfer(transfer_id=row['transfer_id'], ) v.spell_id = row['spell_id'] v.transfer_type = row['transfer_type'] v.uhl_system_number = row['uhl_system_number'] v.transfer_datetime = row['transfer_datetime'] v.ward_code = row['ward_code'] v.ward_name = row['ward_name'] v.hospital = row['hospital'] inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info(f"Saving transfer batch. total = {cnt}") session.add_all(inserts) inserts = [] session.commit() session.add_all(inserts) session.commit()
def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute(f"TRUNCATE TABLE `{Order.__tablename__}`;") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_ORDERS_SQL, current_datetime=datetime.utcnow()) for row in rs: v = Order( uhl_system_number=row['uhl_system_number'], order_id=row['ORDER_ID'], order_key=row['ORDER_KEY'], scheduled_datetime=row['SCHEDULE_DATE'], request_datetime=row['Request_Date_Time'], examination_code=row['examination_code'], examination_description=row['examination_description'], snomed_code=row['snomed_code'], modality=row['MODALITY'], ) inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info(f"Saving Order. total = {cnt}") session.add_all(inserts) session.commit() inserts = [] session.add_all(inserts) session.commit()
def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute(f"TRUNCATE TABLE {Diagnosis.__tablename__};") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_DIAGNOSIS_SQL) for row in rs: v = Diagnosis(diagnosis_id=row['diagnosis_id'], ) v.spell_id = row['spell_id'] v.episode_id = row['episode_id'] v.uhl_system_number = row['uhl_system_number'] v.diagnosis_number = row['diagnosis_number'] v.diagnosis_code = row['diagnosis_code'] v.diagnosis_name = row['diagnosis_name'] v.admission_datetime = row['admission_datetime'] inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info(f"Saving Diagnosis batch. Total = {cnt}") session.add_all(inserts) inserts = [] session.commit() session.add_all(inserts) session.commit()
def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute(f"TRUNCATE TABLE `{Observation.__tablename__}`;") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_OBSERVATION_SQL) observation_names = [ c[:-4] for c in rs.keys() if c.lower().endswith('_ews') and c[:-4] in rs.keys() ] ews_names = { c[:-4]: c for c in rs.keys() if c.lower().endswith('_ews') } units_names = { c[:-6]: c for c in rs.keys() if c.lower().endswith('_units') } observations = [(o, ews_names[o], units_names[o]) for o in observation_names] for row in rs: observation_id = row['ObsId'] uhl_system_number = row['System Number > Patient ID'] observation_datetime = row['Timestamp'] for o, ews, units in observations: if row[o] is not None or row[ews] is not None: v = Observation( uhl_system_number=uhl_system_number, observation_id=observation_id, observation_datetime=observation_datetime, observation_name=o, observation_value=row[o], observation_ews=row[ews], observation_units=row[units], ) inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info( f"Saving observation. total = {cnt}") session.add_all(inserts) session.commit() inserts = [] session.add_all(inserts) session.commit()
def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute( f"TRUNCATE TABLE `{CriticalCarePeriod.__tablename__}`;") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_CLINICAL_CARE_PERIOD_SQL) for row in rs: v = CriticalCarePeriod( uhl_system_number=row['uhl_system_number'], ccp_id=row['ccp_id'], local_identifier=row['CCP_LOCAL_IDENTIFIER'], treatment_function_code=row['treatment_function_code'], treatment_function_name=row['treatment_function_name'], start_datetime=row['CCP_START_DATE_TIME'], basic_respiratory_support_days=row[ 'BASIC_RESPIRATORY_SUPPORT_DAYS'], advanced_respiratory_support_days=row[ 'ADVANCED_RESPIRATORY_SUPPORT_DAYS'], basic_cardiovascular_support_days=row[ 'BASIC_CARDIOVASCULAR_SUPPORT_DAYS'], advanced_cardiovascular_support_days=row[ 'ADVANCED_CARDIOVASCULAR_SUPPORT_DAYS'], renal_support_days=row['RENAL_SUPPORT_DAYS'], neurological_support_days=row[ 'NEUROLOGICAL_SUPPORT_DAYS'], dermatological_support_days=row[ 'DERMATOLOGICAL_SUPPORT_DAYS'], liver_support_days=row['LIVER_SUPPORT_DAYS'], critical_care_level_2_days=row[ 'CRITICAL_CARE_LEVEL_2_DAYS'], critical_care_level_3_days=row[ 'CRITICAL_CARE_LEVEL_3_DAYS'], discharge_datetime=row['CCP_END_DATE_TIME'], ) inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info( f"Saving clinical care period. total = {cnt}") session.add_all(inserts) inserts = [] session.commit() session.add_all(inserts) session.commit()
def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute(f"TRUNCATE TABLE {Virology.__tablename__};") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_VIROLOGY_SQL) for row in rs: v = Virology( uhl_system_number=row['uhl_system_number'], test_id=row['test_id'], laboratory_code=row['laboratory_code'], order_code=row['order_code'], order_name=row['order_name'], test_code=row['test_code'], test_name=row['test_name'], organism=row['organism'], test_result=row['test_result'], sample_collected_date_time=row[ 'sample_collected_date_time'], sample_received_date_time=row[ 'sample_received_date_time'], sample_available_date_time=row[ 'sample_available_date_time'], order_status=row['order_status'], ) inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info(f"Saving Virology batch. Total = {cnt}") session.add_all(inserts) inserts = [] session.commit() session.add_all(inserts) session.commit()
def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute( f"TRUNCATE TABLE `{MicrobiologyTest.__tablename__}`;") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_MICROBIOLOGY_SQL) for row in rs: v = MicrobiologyTest( uhl_system_number=row['uhl_system_number'], test_id=row['test_id'], order_code=row['order_code'], order_name=row['order_name'], test_code=row['test_code'], test_name=row['test_name'], organism=row['organism'], result=row['test_result'], sample_collected_datetime=row[ 'sample_collected_date_time'], sample_received_datetime=row[ 'sample_received_date_time'], result_datetime=row['result_datetime'], specimen_site=row['specimen_site'], ) inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info(f"Saving Test batch. total = {cnt}") session.add_all(inserts) inserts = [] session.commit() session.add_all(inserts) session.commit()
def do_etl(self): inserts = [] updates = [] with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_DEMOGRAPHICS_SQL) for row in rs: d = session.query(Demographics).filter_by( uhl_system_number=row['uhl_system_number'] ).one_or_none() if d is None: d = Demographics( uhl_system_number=row['uhl_system_number']) inserts.append(d) else: updates.append(d) d.nhs_number = row['nhs_number'] d.gp_practice = row['gp_practice'] d.age = row['age'] d.date_of_birth = row['date_of_birth'] d.date_of_death = row['date_of_death'] d.postcode = row['postcode'] d.sex = row['sex'] d.ethnic_category = row['ethnic_category'] if len(inserts) > 0: url_parts = urlparse(IDENTITY_HOST) url_parts = url_parts._replace( query=urlencode({'api_key': IDENTITY_API_KEY}), path='api/create_pseudorandom_ids', ) url = urlunparse(url_parts) # Do not verify locally signed certificate ids = requests.post(url, json={ 'prefix': 'HCVPt', 'id_count': len(inserts) }, verify=False) for id, d in zip(ids.json()['ids'], inserts): d.participant_identifier = id session.add_all(inserts) session.add_all(updates) session.commit()
def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute(f"TRUNCATE TABLE {Episode.__tablename__};") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_EPISODE_SQL) for row in rs: v = Episode(episode_id=row['episode_id'], ) v.spell_id = row['spell_id'] v.uhl_system_number = row['uhl_system_number'] v.admission_datetime = row['admission_datetime'] v.discharge_datetime = row['discharge_datetime'] v.order_no_of_episode = row['order_no_of_episode'] v.admission_method_code = row['admission_method_code'] v.admission_method_name = row['admission_method_name'] v.admission_source_code = row['admission_source_code'] v.admission_source_name = row['admission_source_name'] v.discharge_method_code = row['discharge_method_code'] v.discharge_method_name = row['discharge_method_name'] v.treatment_function_code = row['treatment_function_code'] v.treatment_function_name = row['treatment_function_name'] inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info(f"Saving Episode batch. Total = {cnt}") session.add_all(inserts) inserts = [] session.commit() session.add_all(inserts) session.commit()
def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute(f"TRUNCATE TABLE `{Prescribing.__tablename__}`;") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_PRESCRIBING_SQL) for row in rs: v = Prescribing( uhl_system_number=row['uhl_system_number'], order_id=row['order_id'], method_name=row['method_name'], order_type=row['order_type'], medication_name=row['medication_name'], min_dose=row['min_dose'], max_does=row['max_dose'], frequency=row['frequency'], form=['form'], does_units=row['dose_units'], route=row['route'], ordered_datetime=row['ordered_datetime'], ) inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info(f"Saving medication. total = {cnt}") session.add_all(inserts) inserts = [] session.commit() session.add_all(inserts) session.commit()
def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute(f"TRUNCATE TABLE `{BloodTest.__tablename__}`;") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_BLOODS_SQL) for row in rs: v = BloodTest(test_id=row['test_id'], ) v.uhl_system_number = row['uhl_system_number'] v.test_code = row['test_code'] v.test_name = row['test_name'] v.result = row['result'] v.result_expansion = row['result_expansion'] v.result_units = row['result_units'] v.sample_collected_datetime = row[ 'sample_collected_datetime'] v.result_datetime = row['result_datetime'] v.lower_range = row['lower_range'] v.higher_range = row['higher_range'] v.receive_datetime = row['receive_datetime'] inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info(f"Saving Test batch. total = {cnt}") session.add_all(inserts) inserts = [] session.commit() session.add_all(inserts) session.commit()
def do_etl(self): inserts = [] cnt = 0 with hic_covid_session() as session: session.execute(f"TRUNCATE TABLE {Emergency.__tablename__};") with hic_covid_session() as session: with uhl_dwh_databases_engine() as conn: rs = conn.execute(COVID_EMERGENCY_SQL) for row in rs: v = Emergency(visitid=row['visitid'], ) v.uhl_system_number = row['PP_IDENTIFIER'] v.arrival_datetime = self._date_and_time( row['ARRIVAL_DATE'], row['ARRIVAL_TIME']) v.departure_datetime = self._date_and_time( row['DISCHARGE_DATE'], row['DISCHARGE_TIME']) v.arrival_mode_code = row['PP_ARRIVAL_TRANS_MODE_CODE'] v.arrival_mode_text = row['PP_ARRIVAL_TRANS_MODE_NAME'] v.departure_code = row['PP_DEP_DEST_ID'] v.departure_text = row['PP_DEP_DEST'] v.complaint_code = row['PP_PRESENTING_PROBLEM_CODE'] v.complaint_text = row['PP_PRESENTING_PROBLEM'] inserts.append(v) cnt += 1 if cnt % 1000 == 0: logging.info(f"Saving Emergency batch. Total = {cnt}") session.add_all(inserts) inserts = [] session.commit() session.add_all(inserts) session.commit()
def do_etl(self): count = 0 with hic_covid_session() as session: for x in session.query(self.cls_).yield_per(1000): count += 1 if count % 1000 == 0: self.log(f'{count:,} records checked') for c in self.columns: value = getattr(x, c) self.contains_uhl_system_number(c, value) self.contains_postcode(c, value) self.contains_nhs_number(c, value) self.contains_dob(c, value) self.contains_name(c, value) self.log(f'{count:,} records checked') report = '' for c in self.columns: if self.errors[c]['UHL System Number']: report += f'Column "{c}" may contain a UHL System Number\n' if self.errors[c]['postcode']: report += f'Column "{c}" may contain a postcode\n' if self.errors[c]['NHS Number']: report += f'Column "{c}" may contain an NHS Number\n' if self.errors[c]['Date of Birth']: report += f'Column "{c}" may contain a Date of Birth\n' if len(self.errors[c]['Name']) > 0: report += f'Column "{c}" may contain the names: {", ".join(self.errors[c]["Name"])}\n' self.log( message='Validation report', attachment=report, )