Example #1
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(
                f"TRUNCATE TABLE `{Administration.__tablename__}`;")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_ADMINISTRATION_SQL)
                for row in rs:
                    v = Administration(
                        uhl_system_number=row['uhl_system_number'],
                        administration_id=row['administration_id'],
                        administration_datetime=row['administration_datetime'],
                        medication_name=row['medication_name'],
                        dose_id=row['dose_id'],
                        dose=row['dose'],
                        dose_unit=row['dose_unit'],
                        form_name=row['form_name'],
                        route_name=row['route_name'],
                    )

                    inserts.append(v)
                    cnt += 1

                    if cnt % 1000 == 0:
                        logging.info(f"Saving administration.  total = {cnt}")
                        session.add_all(inserts)
                        inserts = []
                        session.commit()

            session.add_all(inserts)
            session.commit()
Example #2
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(f"TRUNCATE TABLE `{Procedure.__tablename__}`;")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_PROCEDURE_SQL)
                for row in rs:
                    v = Procedure(procedure_id=row['procedure_id'], )

                    v.spell_id = row['spell_id']
                    v.episode_id = row['episode_id']
                    v.uhl_system_number = row['uhl_system_number']
                    v.procedure_number = row['procedure_number']
                    v.procedure_code = row['procedure_code']
                    v.procedure_name = row['procedure_name']
                    v.admission_datetime = row['admission_datetime']

                    inserts.append(v)
                    cnt += 1

                    if cnt % 1000 == 0:
                        logging.info(f"Saving Procedure batch.  total = {cnt}")
                        session.add_all(inserts)
                        inserts = []
                        session.commit()

            session.add_all(inserts)
            session.commit()
Example #3
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(f"TRUNCATE TABLE `{Transfer.__tablename__}`;")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_TRANSFERS_SQL)
                for row in rs:
                    v = Transfer(transfer_id=row['transfer_id'], )

                    v.spell_id = row['spell_id']
                    v.transfer_type = row['transfer_type']
                    v.uhl_system_number = row['uhl_system_number']
                    v.transfer_datetime = row['transfer_datetime']
                    v.ward_code = row['ward_code']
                    v.ward_name = row['ward_name']
                    v.hospital = row['hospital']

                    inserts.append(v)
                    cnt += 1

                    if cnt % 1000 == 0:
                        logging.info(f"Saving transfer batch.  total = {cnt}")
                        session.add_all(inserts)
                        inserts = []
                        session.commit()

            session.add_all(inserts)
            session.commit()
Example #4
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(f"TRUNCATE TABLE `{Order.__tablename__}`;")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_ORDERS_SQL,
                                  current_datetime=datetime.utcnow())
                for row in rs:
                    v = Order(
                        uhl_system_number=row['uhl_system_number'],
                        order_id=row['ORDER_ID'],
                        order_key=row['ORDER_KEY'],
                        scheduled_datetime=row['SCHEDULE_DATE'],
                        request_datetime=row['Request_Date_Time'],
                        examination_code=row['examination_code'],
                        examination_description=row['examination_description'],
                        snomed_code=row['snomed_code'],
                        modality=row['MODALITY'],
                    )

                    inserts.append(v)
                    cnt += 1

                    if cnt % 1000 == 0:
                        logging.info(f"Saving Order.  total = {cnt}")
                        session.add_all(inserts)
                        session.commit()
                        inserts = []

            session.add_all(inserts)
            session.commit()
Example #5
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(f"TRUNCATE TABLE {Diagnosis.__tablename__};")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_DIAGNOSIS_SQL)
                for row in rs:
                    v = Diagnosis(diagnosis_id=row['diagnosis_id'], )

                    v.spell_id = row['spell_id']
                    v.episode_id = row['episode_id']
                    v.uhl_system_number = row['uhl_system_number']
                    v.diagnosis_number = row['diagnosis_number']
                    v.diagnosis_code = row['diagnosis_code']
                    v.diagnosis_name = row['diagnosis_name']
                    v.admission_datetime = row['admission_datetime']

                    inserts.append(v)
                    cnt += 1

                    if cnt % 1000 == 0:
                        logging.info(f"Saving Diagnosis batch. Total = {cnt}")
                        session.add_all(inserts)
                        inserts = []
                        session.commit()

            session.add_all(inserts)
            session.commit()
Example #6
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(f"TRUNCATE TABLE `{Observation.__tablename__}`;")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_OBSERVATION_SQL)

                observation_names = [
                    c[:-4] for c in rs.keys()
                    if c.lower().endswith('_ews') and c[:-4] in rs.keys()
                ]
                ews_names = {
                    c[:-4]: c
                    for c in rs.keys() if c.lower().endswith('_ews')
                }
                units_names = {
                    c[:-6]: c
                    for c in rs.keys() if c.lower().endswith('_units')
                }

                observations = [(o, ews_names[o], units_names[o])
                                for o in observation_names]

                for row in rs:

                    observation_id = row['ObsId']
                    uhl_system_number = row['System Number > Patient ID']
                    observation_datetime = row['Timestamp']

                    for o, ews, units in observations:
                        if row[o] is not None or row[ews] is not None:
                            v = Observation(
                                uhl_system_number=uhl_system_number,
                                observation_id=observation_id,
                                observation_datetime=observation_datetime,
                                observation_name=o,
                                observation_value=row[o],
                                observation_ews=row[ews],
                                observation_units=row[units],
                            )

                            inserts.append(v)
                            cnt += 1

                            if cnt % 1000 == 0:
                                logging.info(
                                    f"Saving observation.  total = {cnt}")
                                session.add_all(inserts)
                                session.commit()
                                inserts = []

            session.add_all(inserts)
            session.commit()
Example #7
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(
                f"TRUNCATE TABLE `{CriticalCarePeriod.__tablename__}`;")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_CLINICAL_CARE_PERIOD_SQL)
                for row in rs:
                    v = CriticalCarePeriod(
                        uhl_system_number=row['uhl_system_number'],
                        ccp_id=row['ccp_id'],
                        local_identifier=row['CCP_LOCAL_IDENTIFIER'],
                        treatment_function_code=row['treatment_function_code'],
                        treatment_function_name=row['treatment_function_name'],
                        start_datetime=row['CCP_START_DATE_TIME'],
                        basic_respiratory_support_days=row[
                            'BASIC_RESPIRATORY_SUPPORT_DAYS'],
                        advanced_respiratory_support_days=row[
                            'ADVANCED_RESPIRATORY_SUPPORT_DAYS'],
                        basic_cardiovascular_support_days=row[
                            'BASIC_CARDIOVASCULAR_SUPPORT_DAYS'],
                        advanced_cardiovascular_support_days=row[
                            'ADVANCED_CARDIOVASCULAR_SUPPORT_DAYS'],
                        renal_support_days=row['RENAL_SUPPORT_DAYS'],
                        neurological_support_days=row[
                            'NEUROLOGICAL_SUPPORT_DAYS'],
                        dermatological_support_days=row[
                            'DERMATOLOGICAL_SUPPORT_DAYS'],
                        liver_support_days=row['LIVER_SUPPORT_DAYS'],
                        critical_care_level_2_days=row[
                            'CRITICAL_CARE_LEVEL_2_DAYS'],
                        critical_care_level_3_days=row[
                            'CRITICAL_CARE_LEVEL_3_DAYS'],
                        discharge_datetime=row['CCP_END_DATE_TIME'],
                    )

                    inserts.append(v)
                    cnt += 1

                    if cnt % 1000 == 0:
                        logging.info(
                            f"Saving clinical care period.  total = {cnt}")
                        session.add_all(inserts)
                        inserts = []
                        session.commit()

            session.add_all(inserts)
            session.commit()
Example #8
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(f"TRUNCATE TABLE {Virology.__tablename__};")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_VIROLOGY_SQL)
                for row in rs:
                    v = Virology(
                        uhl_system_number=row['uhl_system_number'],
                        test_id=row['test_id'],
                        laboratory_code=row['laboratory_code'],
                        order_code=row['order_code'],
                        order_name=row['order_name'],
                        test_code=row['test_code'],
                        test_name=row['test_name'],
                        organism=row['organism'],
                        test_result=row['test_result'],
                        sample_collected_date_time=row[
                            'sample_collected_date_time'],
                        sample_received_date_time=row[
                            'sample_received_date_time'],
                        sample_available_date_time=row[
                            'sample_available_date_time'],
                        order_status=row['order_status'],
                    )

                    inserts.append(v)
                    cnt += 1

                    if cnt % 1000 == 0:
                        logging.info(f"Saving Virology batch. Total = {cnt}")
                        session.add_all(inserts)
                        inserts = []
                        session.commit()

            session.add_all(inserts)
            session.commit()
Example #9
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(
                f"TRUNCATE TABLE `{MicrobiologyTest.__tablename__}`;")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_MICROBIOLOGY_SQL)
                for row in rs:
                    v = MicrobiologyTest(
                        uhl_system_number=row['uhl_system_number'],
                        test_id=row['test_id'],
                        order_code=row['order_code'],
                        order_name=row['order_name'],
                        test_code=row['test_code'],
                        test_name=row['test_name'],
                        organism=row['organism'],
                        result=row['test_result'],
                        sample_collected_datetime=row[
                            'sample_collected_date_time'],
                        sample_received_datetime=row[
                            'sample_received_date_time'],
                        result_datetime=row['result_datetime'],
                        specimen_site=row['specimen_site'],
                    )

                    inserts.append(v)
                    cnt += 1

                    if cnt % 1000 == 0:
                        logging.info(f"Saving Test batch.  total = {cnt}")
                        session.add_all(inserts)
                        inserts = []
                        session.commit()

            session.add_all(inserts)
            session.commit()
Example #10
0
    def do_etl(self):
        inserts = []
        updates = []

        with hic_covid_session() as session:

            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_DEMOGRAPHICS_SQL)
                for row in rs:
                    d = session.query(Demographics).filter_by(
                        uhl_system_number=row['uhl_system_number']
                    ).one_or_none()

                    if d is None:
                        d = Demographics(
                            uhl_system_number=row['uhl_system_number'])
                        inserts.append(d)
                    else:
                        updates.append(d)

                    d.nhs_number = row['nhs_number']
                    d.gp_practice = row['gp_practice']
                    d.age = row['age']
                    d.date_of_birth = row['date_of_birth']
                    d.date_of_death = row['date_of_death']
                    d.postcode = row['postcode']
                    d.sex = row['sex']
                    d.ethnic_category = row['ethnic_category']

            if len(inserts) > 0:
                url_parts = urlparse(IDENTITY_HOST)
                url_parts = url_parts._replace(
                    query=urlencode({'api_key': IDENTITY_API_KEY}),
                    path='api/create_pseudorandom_ids',
                )
                url = urlunparse(url_parts)

                # Do not verify locally signed certificate
                ids = requests.post(url,
                                    json={
                                        'prefix': 'HCVPt',
                                        'id_count': len(inserts)
                                    },
                                    verify=False)

                for id, d in zip(ids.json()['ids'], inserts):
                    d.participant_identifier = id

            session.add_all(inserts)
            session.add_all(updates)
            session.commit()
Example #11
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(f"TRUNCATE TABLE {Episode.__tablename__};")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_EPISODE_SQL)
                for row in rs:
                    v = Episode(episode_id=row['episode_id'], )

                    v.spell_id = row['spell_id']
                    v.uhl_system_number = row['uhl_system_number']
                    v.admission_datetime = row['admission_datetime']
                    v.discharge_datetime = row['discharge_datetime']
                    v.order_no_of_episode = row['order_no_of_episode']
                    v.admission_method_code = row['admission_method_code']
                    v.admission_method_name = row['admission_method_name']
                    v.admission_source_code = row['admission_source_code']
                    v.admission_source_name = row['admission_source_name']
                    v.discharge_method_code = row['discharge_method_code']
                    v.discharge_method_name = row['discharge_method_name']
                    v.treatment_function_code = row['treatment_function_code']
                    v.treatment_function_name = row['treatment_function_name']

                    inserts.append(v)
                    cnt += 1

                    if cnt % 1000 == 0:
                        logging.info(f"Saving Episode batch. Total = {cnt}")
                        session.add_all(inserts)
                        inserts = []
                        session.commit()

            session.add_all(inserts)
            session.commit()
Example #12
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(f"TRUNCATE TABLE `{Prescribing.__tablename__}`;")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_PRESCRIBING_SQL)
                for row in rs:
                    v = Prescribing(
                        uhl_system_number=row['uhl_system_number'],
                        order_id=row['order_id'],
                        method_name=row['method_name'],
                        order_type=row['order_type'],
                        medication_name=row['medication_name'],
                        min_dose=row['min_dose'],
                        max_does=row['max_dose'],
                        frequency=row['frequency'],
                        form=['form'],
                        does_units=row['dose_units'],
                        route=row['route'],
                        ordered_datetime=row['ordered_datetime'],
                    )

                    inserts.append(v)
                    cnt += 1

                    if cnt % 1000 == 0:
                        logging.info(f"Saving medication.  total = {cnt}")
                        session.add_all(inserts)
                        inserts = []
                        session.commit()

            session.add_all(inserts)
            session.commit()
Example #13
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(f"TRUNCATE TABLE `{BloodTest.__tablename__}`;")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_BLOODS_SQL)
                for row in rs:
                    v = BloodTest(test_id=row['test_id'], )

                    v.uhl_system_number = row['uhl_system_number']
                    v.test_code = row['test_code']
                    v.test_name = row['test_name']
                    v.result = row['result']
                    v.result_expansion = row['result_expansion']
                    v.result_units = row['result_units']
                    v.sample_collected_datetime = row[
                        'sample_collected_datetime']
                    v.result_datetime = row['result_datetime']
                    v.lower_range = row['lower_range']
                    v.higher_range = row['higher_range']
                    v.receive_datetime = row['receive_datetime']

                    inserts.append(v)
                    cnt += 1

                    if cnt % 1000 == 0:
                        logging.info(f"Saving Test batch.  total = {cnt}")
                        session.add_all(inserts)
                        inserts = []
                        session.commit()

            session.add_all(inserts)
            session.commit()
Example #14
0
    def do_etl(self):
        inserts = []
        cnt = 0

        with hic_covid_session() as session:
            session.execute(f"TRUNCATE TABLE {Emergency.__tablename__};")

        with hic_covid_session() as session:
            with uhl_dwh_databases_engine() as conn:
                rs = conn.execute(COVID_EMERGENCY_SQL)
                for row in rs:
                    v = Emergency(visitid=row['visitid'], )

                    v.uhl_system_number = row['PP_IDENTIFIER']
                    v.arrival_datetime = self._date_and_time(
                        row['ARRIVAL_DATE'], row['ARRIVAL_TIME'])
                    v.departure_datetime = self._date_and_time(
                        row['DISCHARGE_DATE'], row['DISCHARGE_TIME'])
                    v.arrival_mode_code = row['PP_ARRIVAL_TRANS_MODE_CODE']
                    v.arrival_mode_text = row['PP_ARRIVAL_TRANS_MODE_NAME']
                    v.departure_code = row['PP_DEP_DEST_ID']
                    v.departure_text = row['PP_DEP_DEST']
                    v.complaint_code = row['PP_PRESENTING_PROBLEM_CODE']
                    v.complaint_text = row['PP_PRESENTING_PROBLEM']

                    inserts.append(v)
                    cnt += 1

                    if cnt % 1000 == 0:
                        logging.info(f"Saving Emergency batch. Total = {cnt}")
                        session.add_all(inserts)
                        inserts = []
                        session.commit()

            session.add_all(inserts)
            session.commit()
Example #15
0
    def do_etl(self):
        count = 0

        with hic_covid_session() as session:
            for x in session.query(self.cls_).yield_per(1000):
                count += 1

                if count % 1000 == 0:
                    self.log(f'{count:,} records checked')

                for c in self.columns:
                    value = getattr(x, c)

                    self.contains_uhl_system_number(c, value)
                    self.contains_postcode(c, value)
                    self.contains_nhs_number(c, value)
                    self.contains_dob(c, value)
                    self.contains_name(c, value)

        self.log(f'{count:,} records checked')

        report = ''
        for c in self.columns:
            if self.errors[c]['UHL System Number']:
                report += f'Column "{c}" may contain a UHL System Number\n'
            if self.errors[c]['postcode']:
                report += f'Column "{c}" may contain a postcode\n'
            if self.errors[c]['NHS Number']:
                report += f'Column "{c}" may contain an NHS Number\n'
            if self.errors[c]['Date of Birth']:
                report += f'Column "{c}" may contain a Date of Birth\n'
            if len(self.errors[c]['Name']) > 0:
                report += f'Column "{c}" may contain the names: {", ".join(self.errors[c]["Name"])}\n'

        self.log(
            message='Validation report',
            attachment=report,
        )