Ejemplo n.º 1
0
def init_runtime_config():
    """
    初始化运行时配置文件
    """
    runrc_path = os.path.join(ENV['home'], RUNTIME_CONFIG_FILE_NAME)
    if not os.path.exists(runrc_path):
        FileHelper.create_empty_file(runrc_path)
Ejemplo n.º 2
0
    def get_chartevents_by_pthadmicu(self,
                                     subject_ids=None,
                                     hadm_ids=None,
                                     icustay_ids=None):
        """ Retrieve CHARTEVENTS matching the give hospital admission
        """

        ### Conditions
        criteria = {}
        if subject_ids is not None:
            criteria[self.config['PREFIX_CHEV'] + 'SUBJECT_ID'] = subject_ids
        if hadm_ids is not None:
            criteria[self.config['PREFIX_CHEV'] + 'HADM_ID'] = hadm_ids
        if icustay_ids is not None:
            criteria[self.config['PREFIX_CHEV'] + 'ICUSTAY_ID'] = icustay_ids

        ### Read only 100 000 rows
        if self.config['PARAM']['LIMIT_NUM_CHARTEVENTS'] > 0:
            criteria[self.config['CONST']['N_ROWS']] = self.config['PARAM'][
                'LIMIT_NUM_CHARTEVENTS']

        chartevent = ChartEvent(**self.config)
        df_chartevs = chartevent.get_chartevents_by_phadmicu(criteria)

        ### Save chartevents to file
        filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME'][
            'CHARTEVENTS']
        FileHelper.save_to_csv(df_chartevs, filename)

        return df_chartevs
Ejemplo n.º 3
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "ChestX-ray8"

        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.file_helper = FileHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.cmc_submitter_id = format_submitter_id("cmc_chestxray8", {})
        self.core_metadata_collection = [{
            "submitter_id":
            self.cmc_submitter_id,
            "projects": [{
                "code": self.project_code
            }],
        }]
        self.imaging_file = []
Ejemplo n.º 4
0
def generate_triage_history_data(db_conn, project_name, file_path):
    # triage_history_sql = "SELECT * FROM `automation_case_results` where triage_result is not NULL and error_type_id in (select id from error_types where name in ('Product Error', 'Product Change')) and automation_script_result_id in (select id from automation_script_results where triage_result is not NULL and automation_script_id in (select id from automation_scripts where project_id=2))"
    # triage_history_sql = "SELECT * FROM `automation_case_results` where error_type_id in (select id from error_types) and automation_script_result_id in (select id from automation_script_results where automation_script_id in (select id from automation_scripts where project_id=2))"
    triage_history_sql = """
        SELECT tr.id as round_id, acr.automation_case_id, asr.automation_script_id, te.name as env, b.name as browser, et.name as triage_type, acr.error_message, (UNIX_TIMESTAMP(asr.end_time)-UNIX_TIMESTAMP(asr.start_time)) as script_duration FROM `automation_case_results` as acr
        left join `automation_script_results` as asr on acr.automation_script_result_id=asr.id
        left join `test_rounds` as tr on asr.test_round_id=tr.id
        left join `test_environments` as te on tr.test_environment_id=te.id
        left join `browsers` as b on tr.browser_id=b.id
        left join `projects` as p on p.id=tr.project_id
        left join `error_types` as et on et.id=acr.error_type_id
        where p.name='%s' and et.name is not NULL  
        ORDER BY `round_id`  ASC
    """ % project_name
    print("generate triage history data of project: %s" % project_name)
    triage_history_data = db_conn.get_all_results_from_database(
        triage_history_sql)
    if len(triage_history_data) == 0:
        print("no triage history in project: %s" % project_name)
        return False
    else:
        FileHelper.save_db_query_result_to_csv(triage_history_data, file_path)
        print(
            "there are %d rows in database when query the triage history of project: %s\n"
            % (len(triage_history_data), project_name))
        return True
Ejemplo n.º 5
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "COXRAY"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.file_helper = FileHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.nodes = {
            "core_metadata_collection": [],
            "study": [],
            "subject": [],
            "observation": [],
            "follow_up": [],
            "demographic": [],
            "imaging_file": [],
        }
Ejemplo n.º 6
0
    def get_outputevents_by_pthadmicu(self, subject_ids=None, hadm_ids=None, icustay_ids=None):
        """ Retrieve OUTPUTEVENTS matching the give hospital admission

        Pararmeters
        -----------
            subject_ids : the list of patient id
            hamd_ids : the list hospital admission stay id

        Return
        ------

        """

        ### Conditions
        criteria = {}
        if subject_ids is not None:
            criteria[self.config['PREFIX_OUEV'] + 'SUBJECT_ID'] = subject_ids
        if hadm_ids is not None:
            criteria[self.config['PREFIX_OUEV'] + 'HADM_ID'] = hadm_ids
        if icustay_ids is not None:
            criteria[self.config['PREFIX_OUEV'] + 'ICUSTAY_ID'] = icustay_ids

        ### Read only 100 000 rows
        # criteria['nrows'] = 4500000
        outputevs = OutputEvent(**self.config)
        df_outputevs = outputevs.get_outputevents_by_pthadmicu(criteria)

        ### Save OUTPUTEVENTS to file
        filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['OUTPUTEVENTS']
        FileHelper.save_to_csv(df_outputevs, filename)

        return df_outputevs
Ejemplo n.º 7
0
class COXRAY_FILE(base.BaseETL):
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "COXRAY"

        self.file_helper = FileHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

    def files_to_submissions(self):
        for image_filepath in Path(COXRAY_DATA_PATH).joinpath(
                "images").iterdir():
            did, rev, md5, size = self.file_helper.find_by_name(
                image_filepath.name)
            if not did:
                guid = self.file_helper.upload_file(image_filepath)
                print(f"file {image_filepath.name} uploaded with guid: {guid}")
            else:
                print(
                    f"file {image_filepath.name} exists in indexd... skipping..."
                )

    def submit_metadata(self):
        pass
Ejemplo n.º 8
0
def generate_test_round_results_data(db_conn,
                                     file_path,
                                     round_id=None,
                                     script_id=None,
                                     case_id=None):
    # test_round_results_sql = "SELECT * FROM automation_case_results where automation_script_result_id in (2609677, 2609831, 2609879, 2609971, 2610080, 2610095, 2610333, 2610366, 2610380, 2610415, 2609629, 2609636, 2609638, 2609644, 2609651, 2609663);"
    if case_id:
        test_round_results_sql = "SELECT * FROM automation_case_results where id=%d;" % int(
            case_id)
    elif script_id:
        test_round_results_sql = "SELECT * FROM automation_case_results where automation_script_result_id=%d;" % int(
            script_id)
    else:
        test_round_results_sql = "SELECT * FROM automation_case_results where automation_script_result_id in (select id from automation_script_results where test_round_id=%d);" % int(
            round_id)
    print("generate test round all results data")
    test_round_results = db_conn.get_all_results_from_database(
        test_round_results_sql)
    if len(test_round_results) == 0:
        print("no result in this test round with id: %d" % int(round_id))
        return False
    else:
        FileHelper.save_db_query_result_to_csv(test_round_results, file_path)
        print(
            "there are %d rows in database when query the round all results\n"
            % len(test_round_results))
        return True
Ejemplo n.º 9
0
    def __output_num_patient_by_limit(self, df_pt_adm_icu_outevs_charevs):

        if self.config['PARAM']['LIMIT_NUM_PATIENT'] > 0:
            num_patients = self.config['PARAM']['LIMIT_NUM_PATIENT']

            # Get unique subject_id from columns SUBJECT_ID
            list_unique_subject_id = df_pt_adm_icu_outevs_charevs[
                'SUBJECT_ID'].unique().tolist()
            # Generate a uniform random sample from np.arange(len) of size num_patients
            ran_idx = np.random.choice(len(list_unique_subject_id),
                                       num_patients)
            # Get values of SUBJECT_ID using ran_idx and column name
            ran_subject_id = [
                df_pt_adm_icu_outevs_charevs['SUBJECT_ID'].iloc[idx]
                for idx in ran_idx
            ]
            # Filter only matching subject_id in the list
            mask = df_pt_adm_icu_outevs_charevs['SUBJECT_ID'].isin(
                ran_subject_id)
            df_events_by_patient = df_pt_adm_icu_outevs_charevs[mask]

            # Save to File
            filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][
                'OUT_LIMIT_NUM_EVENTS_WINSIZE_24H']
            FileHelper.save_to_csv(df_events_by_patient, filename)
Ejemplo n.º 10
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "COXRAY"

        self.file_helper = FileHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )
Ejemplo n.º 11
0
 def generate_regression_history_data(db_conn, project_id, file_path):
     generate_flag = Config.load_env("generate_regression_history")
     if not os.path.exists(file_path):
         generate_flag = True
     if generate_flag:
         print("generate history regression data")
         # select history data of 12 month for reference
         period_regression_sql = "select * from test_rounds where project_id=%d and DATE_SUB(CURDATE(), INTERVAL 12 MONTH) <= date(start_time) and end_time is not NULL;" % int(project_id)
         period_regression_history = db_conn.get_all_results_from_database(period_regression_sql)
         FileHelper.save_db_query_result_to_csv(period_regression_history, file_path)
         print("there are %d rows in database when query the history\n" % len(period_regression_history))
     else:
         print("NOT generate history regression data\n")
Ejemplo n.º 12
0
def convert_code():
    args = arg_parser.parse_args()
    source_filename = args.source
    destination_filename = args.dest
    result = inputs.json_file(source_filename, converters.to_html)

    if destination_filename is None:
        print(result)
    else:
        FileHelper.write_to_file(destination_filename, result)
        print(
            f'OK. JSON File `{source_filename}` converted to `{destination_filename}`'
        )
Ejemplo n.º 13
0
    def get_patients_by_hamd(self, ids):
        """
            Get Patients based on list of SUBJECT ID
        """

        patient = Patient(**self.config)
        df_pts = patient.get_patients_by_ids(ids)

        ### Save Patients to file
        filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['PATIENTS']
        FileHelper.save_to_csv(df_pts, filename)

        return df_pts
Ejemplo n.º 14
0
def generate_test_round_errors_data(db_conn, round_id, file_path):
    test_round_errors_sql = "SELECT * FROM automation_case_results where automation_script_result_id in (select id from automation_script_results where test_round_id=%d) and result = 'failed';" % int(
        round_id)
    print("generate test round errors data")
    test_round_errors = db_conn.get_all_results_from_database(
        test_round_errors_sql)
    if len(test_round_errors) == 0:
        print("no errors in this test round with id: %d" % int(round_id))
        return False
    else:
        FileHelper.save_db_query_result_to_csv(test_round_errors, file_path)
        print("there are %d rows in database when query the round error\n" %
              len(test_round_errors))
        return True
Ejemplo n.º 15
0
def generate_triage_history_data(db_conn, project_name, file_path):
    # triage_history_sql = "SELECT * FROM `automation_case_results` where triage_result is not NULL and error_type_id in (select id from error_types where name in ('Product Error', 'Product Change')) and automation_script_result_id in (select id from automation_script_results where triage_result is not NULL and automation_script_id in (select id from automation_scripts where project_id=2))"
    # triage_history_sql = "SELECT * FROM `automation_case_results` where error_type_id in (select id from error_types) and automation_script_result_id in (select id from automation_script_results where automation_script_id in (select id from automation_scripts where project_id=2))"
    triage_history_sql = "select * from prejudge_seeds where project_name='%s'" % project_name
    print("generate triage history data")
    triage_history = db_conn.get_all_results_from_database(triage_history_sql)
    if len(triage_history) == 0:
        print("no triage history in project: %s" % project_name)
        return False
    else:
        FileHelper.save_db_query_result_to_csv(triage_history, file_path)
        print(
            "there are %d rows in database when query the triage history of project: %s\n"
            % (len(triage_history), project_name))
        return True
Ejemplo n.º 16
0
    def merge_df(self, df_left, df_right, left, right, how, out_filename=None):
        """ Merge dataframe
        """

        ### Merge 2 tables Patients, Admissions and ICU Stays
        result = pd.merge(df_left, df_right, left_on=left, right_on=right, how=how)

        ### Save Admissions to file
        if out_filename is not None:
            ### filename = self.config['OUT_DIR_S1'] + out_filename
            FileHelper.save_to_csv(result, out_filename)
        else:
            filename = self.config['OUT_DIR_S1'] + 'merged_df.csv'
            FileHelper.save_to_csv(result, filename)

        return result
Ejemplo n.º 17
0
    def get_adms(self, criteria=None):
        """ Read admissions groupby date and
        Choose admissions of the year during which contains biggest number of admission
        """
         ### criteria = {'nrows':10}
        admission = Admission(**self.config)

        ### Read admissions groupby date and
        ### Choose admissions of the year during which contains biggest number of admissions
        # df_adms = admission.get_admissions_by_year(criteria)
        df_adms = admission.get_admissions(criteria)
        ### Limit number of patients based on condition LIMIT_NUM_PATIENT
        df_adms = self.__shape_num_patient_by_limit(df_adms)
        filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['ADMISSIONS']
        FileHelper.save_to_csv(df_adms, filename)

        return df_adms
Ejemplo n.º 18
0
    def get_cptevents_by_phamd(self, subject_ids, hadm_ids):
        """
            Get CPTEVENTS by SUBJECT_ID AND HADM_ID
        """

        criteria = {}
        criteria['SUBJECT_ID'] = subject_ids
        criteria['HADM_ID'] = hadm_ids

        cptevent = CPTEvent(**self.config)
        df_cptevents = cptevent.get_cptevents_by_subject_hamd(criteria)

        ### Save ICUStays filtered by Patients and Admissions
        filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['ICUSTAYS']
        FileHelper.save_to_csv(df_cptevents, filename)

        return df_cptevents
Ejemplo n.º 19
0
    def get_icustays_by_pthamd(self, subject_ids, hadm_ids):
        """
            Get CPTEVENTS by SUBJECT_ID AND HADM_ID
        """

        ### Conditions
        criteria = {}
        criteria[self.config['PREFIX_ICU'] + 'SUBJECT_ID'] = subject_ids
        criteria[self.config['PREFIX_ICU'] + 'HADM_ID'] = hadm_ids

        icustay = ICUStay(**self.config)
        df_icustays = icustay.get_icustays_by_subject_hamd(criteria)

        ### Save ICUStays filtered by Patients and Admissions
        filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['ICUSTAYS']
        FileHelper.save_to_csv(df_icustays, filename)

        return df_icustays
Ejemplo n.º 20
0
    def movefile_ins1_to_outs2(self):
        """ Move files from Output Step 1 to Input Step 2
            Three files to move: OUT_PTS_ADMS_ICUS, OUT_PTS_ADMS_ICUS, OUT_CHARTEVENTS
        """

        files_to_move = []
        files_to_move.append(self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['PTS_ADMS_ICU'])
        files_to_move.append(self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['CHARTEVENTS'])
        files_to_move.append(self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['OUTPUTEVENTS'])

        move_to = []
        move_to.append(self.config['FILE_DIR_S2'] + self.config['IN_FNAME']['CSV_OUT_PTS_ADMS_ICUS'])
        move_to.append(self.config['FILE_DIR_S2'] + self.config['IN_FNAME']['CSV_OUT_CHARTEVENTS'])
        move_to.append(self.config['FILE_DIR_S2'] + self.config['IN_FNAME']['CSV_OUT_OUTPUTEVENTS'])

        ### Move from Output Step 1 to Input Step 2
        for idx, src_file in enumerate(files_to_move):
            FileHelper.move_file(src_file, move_to[idx])
Ejemplo n.º 21
0
    def get_ditems_outevents_by_itemid(self, item_id):
        """ Retrieve D_ITEMS of outputevents by item_id
        """

        ### Conditions
        criteria = {}
        criteria[self.config['PREFIX_DITEM'] + 'ITEMID'] = item_id

        ### Read only 100 000 rows
        ### criteria['nrows'] = 4500000

        ditem = DItem(**self.config)
        df_ditems = ditem.get_ditems_outevents_by_itemid(criteria)

        ### Save Admissions to file
        filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['D_ITEMS']
        FileHelper.save_to_csv(df_ditems, filename)

        return df_ditems
Ejemplo n.º 22
0
    def execute(self, cmd):
        if len(cmd.cmd_simple_args) <= 0 or len(cmd.cmd_simple_args) > 1:
            print("alias: 命令参数个数错误")
            return ShellStatus.RUN

        arg = cmd.cmd_simple_args[0].split('=')
        alias_cmd = arg[0]

        if '"' in cmd.raw_cmd:
            raw_cmd = cmd.raw_cmd[cmd.raw_cmd.index('"') +
                                  1:cmd.raw_cmd.rfind('"')]
        else:
            raw_cmd = arg[1]

        # 如果有 w 选项则将别名写入.yashrc
        if 'w' in cmd.cmd_options:
            content = '\nalias %s="%s"\n' % (alias_cmd, raw_cmd)
            FileHelper.write_file_from_string(
                cmd.env['runtime_config_file_name'], content, 'w+')

        cmd.env['alias'][alias_cmd] = raw_cmd

        return ShellStatus.RUN
Ejemplo n.º 23
0
class COXRAY(base.BaseETL):
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "COXRAY"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.file_helper = FileHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.nodes = {
            "core_metadata_collection": [],
            "study": [],
            "subject": [],
            "observation": [],
            "follow_up": [],
            "demographic": [],
            "imaging_file": [],
        }

    def files_to_submissions(self):
        with open(Path(COXRAY_DATA_PATH).joinpath("metadata.csv")) as f:
            reader = csv.reader(f, delimiter=",", quotechar='"')
            headers = next(reader)
            for row in reader:
                row_nodes = self.parse_row(headers, row)
                for k, v in row_nodes.items():
                    self.nodes[k].append(v)

    def parse_row(self, headers, row):
        cmc_submitter_id = format_submitter_id("cmc_coxray", {})
        subject_submitter_id = format_submitter_id(
            "subject_coxray", {"patientid": row[headers.index("patientid")]})
        observation_submitter_id = derived_submitter_id(
            subject_submitter_id, "subject_coxray", "observation_coxray", {})
        follow_up_submitter_id = derived_submitter_id(
            subject_submitter_id,
            "subject_coxray",
            "follow_up_coxray",
            {"offset": row[headers.index("offset")]},
        )
        demographic_submitter_id = derived_submitter_id(
            subject_submitter_id, "subject_coxray", "demographic_coxray", {})
        imaging_file_submitter_id = format_submitter_id(
            "imaging_file_coxray",
            {"filename": row[headers.index("filename")]})
        study_submitter_id = format_submitter_id(
            "study_coxray", {"doi": row[headers.index("doi")]})

        filename = row[headers.index("filename")]
        filename = Path(filename)
        filepath = Path(COXRAY_DATA_PATH).joinpath("images", filename)
        filepath_exist = filepath.exists()

        nodes = {
            "core_metadata_collection": {
                "submitter_id": cmc_submitter_id,
                "projects": [{
                    "code": self.project_code
                }],
            },
            "study": {
                "submitter_id": study_submitter_id,
                "projects": [{
                    "code": self.project_code
                }],
            },
            "subject": {
                "submitter_id": subject_submitter_id,
                "projects": [{
                    "code": self.project_code
                }],
                "studies": [{
                    "submitter_id": study_submitter_id
                }],
            },
            "observation": {
                "submitter_id": observation_submitter_id,
                "subjects": [{
                    "submitter_id": subject_submitter_id
                }],
            },
            "follow_up": {
                "submitter_id": follow_up_submitter_id,
                "subjects": [{
                    "submitter_id": subject_submitter_id
                }],
            },
            "demographic": {
                "submitter_id": demographic_submitter_id,
                "subjects": [{
                    "submitter_id": subject_submitter_id
                }],
            },
        }

        if filepath_exist:
            data_type = "".join(filename.suffixes)
            did, rev, md5sum, filesize = self.file_helper.find_by_name(
                filename=filename)
            assert (
                did
            ), f"file {filename} does not exist in the index, rerun COXRAY_FILE ETL"
            self.file_helper.update_authz(did=did, rev=rev)

            nodes["imaging_file"] = {
                "submitter_id": imaging_file_submitter_id,
                "subjects": [{
                    "submitter_id": subject_submitter_id
                }],
                "follow_ups": [{
                    "submitter_id": follow_up_submitter_id
                }],
                "core_metadata_collections": [{
                    "submitter_id": cmc_submitter_id
                }],
                "data_type": data_type,
                "data_format": "Image File",
                "data_category": "X-Ray Image",
                "file_size": filesize,
                "md5sum": md5sum,
                "object_id": did,
            }
        else:
            print(
                f"subject references the file that doesn't exist as a file: {filepath}"
            )

        for k, (node, field, converter) in fields_mapping.items():
            value = row[headers.index(k)]
            if node in nodes and value:
                if converter:
                    nodes[node][field] = converter(value)
                else:
                    nodes[node][field] = value

        return nodes

    def submit_metadata(self):
        print("Submitting data...")

        for k, v in self.nodes.items():
            submitter_id_exist = []
            print(f"Submitting {k} data...")
            for node in v:
                node_record = {"type": k}
                node_record.update(node)
                submitter_id = node_record["submitter_id"]
                if submitter_id not in submitter_id_exist:
                    submitter_id_exist.append(submitter_id)
                    self.metadata_helper.add_record_to_submit(node_record)
            self.metadata_helper.batch_submit_records()
Ejemplo n.º 24
0
class CHESTXRAY8(base.BaseETL):
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "ChestX-ray8"

        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.file_helper = FileHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.cmc_submitter_id = format_submitter_id("cmc_chestxray8", {})
        self.core_metadata_collection = [{
            "submitter_id":
            self.cmc_submitter_id,
            "projects": [{
                "code": self.project_code
            }],
        }]
        self.imaging_file = []

    def files_to_submissions(self):
        for image_type in ("No_findings", "Pneumonia"):
            for image_filepath in (
                    Path(CHESTXRAY8_DATA_PATH).joinpath("COVID-19").joinpath(
                        "X-Ray Image DataSet").joinpath(image_type).iterdir()):
                did, rev, md5, size = self.file_helper.find_by_name(
                    image_filepath.name)
                if not did:
                    guid = self.file_helper.upload_file(image_filepath)
                    print(
                        f"file {image_filepath.name} uploaded with guid: {guid}"
                    )
                else:
                    print(
                        f"file {image_filepath.name} exists in indexd... skipping..."
                    )

                imaging_file_submitter_id = format_submitter_id(
                    "imaging_file_chestxray8",
                    {"filename": image_filepath.name})
                uploaded_imaging_file = {
                    "submitter_id":
                    imaging_file_submitter_id,
                    "core_metadata_collections": [{
                        "submitter_id":
                        self.cmc_submitter_id
                    }],
                    "data_type":
                    "PNG",
                    "data_format":
                    "Image File",
                    "data_category":
                    "X-Ray Image",
                    "file_name":
                    image_filepath.name,
                    "file_size":
                    size,
                    "md5sum":
                    md5,
                    "object_id":
                    did,
                    "clinical_notes":
                    image_type,
                }

                self.imaging_file.append(uploaded_imaging_file)

    def submit_metadata(self):
        print("Submitting data...")

        print("Submitting core_metadata_collection data")
        for cmc in self.core_metadata_collection:
            cmc_record = {"type": "core_metadata_collection"}
            cmc_record.update(cmc)
            self.metadata_helper.add_record_to_submit(cmc_record)
        self.metadata_helper.batch_submit_records()

        print("Submitting imaging_file data")
        for ifile in self.imaging_file:
            if_record = {"type": "imaging_file"}
            if_record.update(ifile)
            self.metadata_helper.add_record_to_submit(if_record)
        self.metadata_helper.batch_submit_records()
Ejemplo n.º 25
0
    def start_process(self):
        """
            Read data from tables: PATIENTS, ADMISSIONS, CPTEVENT
        """

        # Read admissions groupby date and
        df_pt_adm_icus = self.get_pt_hamd_icus()
        df_output_evs = self.get_outputevents()

        # Save the dico to dataframe
        pros_by_window = self.__grouppros_by_interval(
            df_pt_adm_icus,
            df_output_evs,
            ev_unit=self.config['CONST']['HUNIT_ICU'])
        dico_pros_dt, index = self.__pros_todico(pros_by_window)
        df_temp = pd.DataFrame(dico_pros_dt, index=index)
        # Save Admissions to file
        # Filename of compiling tables: PATIENTS, ADMISSIONS, ICUSTAYS
        filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][
            'OE_BY_DATE_INTERVAL']
        FileHelper.save_to_csv(df_temp, filename)

        # Merge Tables: PATIENTS, ADMISSIONS and ICUSTAYS
        # Conditions:
        #   SUBJECT_ID : Patient's ID
        #   HADM_SUBJECT_ID: Patient's ID of table ADMISSIONS
        col_subject_id = self.config['PREFIX_HADM'] + 'SUBJECT_ID'
        col_hadm_id = self.config['PREFIX_HADM'] + 'HADM_ID'
        col_icustay_id = self.config['PREFIX_ICU'] + 'ICUSTAY_ID'

        # Retrieve Outputevents
        # Conditions:
        # ----------
        #   col_subject_id, col_hadm_id, col_icustay_id
        left = [col_subject_id, col_hadm_id, col_icustay_id]
        right = ['subject_id', 'hadm_id', 'icustay_id']
        filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][
            'PT_ADM_ICU_OUTEVENT']
        df_pt_adm_icu_outevent = self.merge_df(df_pt_adm_icus, df_temp, left=left, right=right,\
            how='right', out_filename=filename)

        df_chart_evs = self.get_chartevents()
        # Save the dico to dataframe
        pros_by_window_2 = self.__grouppros_by_interval(df_pt_adm_icus, df_chart_evs,\
            ev_unit=self.config['CONST']['HUNIT_CHAREV'], outevent=False)
        dico_pros_dt2, index2 = self.__pros_todico(pros_by_window_2)
        df_temp2 = pd.DataFrame(dico_pros_dt2, index=index2)
        # Save Admissions to file
        filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][
            'CHEV_BY_DATE_INTERVAL']
        FileHelper.save_to_csv(df_temp2, filename)

        # Retrieve Chartevents
        # Conditions:
        # ----------
        #   col_subject_id, col_hadm_id, col_icustay_id
        left = [col_subject_id, col_hadm_id, col_icustay_id]
        right = ['subject_id', 'hadm_id', 'icustay_id']
        filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][
            'PT_ADM_ICUS_CHAREVS']
        df_pt_adm_icu_charevs = self.merge_df(df_pt_adm_icus, df_temp2, left=left, right=right,\
            how='right', out_filename=filename)

        # Append dataframe amd-icustay-outputevents to amd-icustay-chartevents
        # Conditions:
        # ----------
        #   OutputEvent (filtered by subject_id, hamd_id, icu_stay)
        #   & ChartEvents (filtered by subject_id, hamd_id, icu_stay)
        df_pt_adm_icu_outevs_charevs = df_pt_adm_icu_outevent.append(
            df_pt_adm_icu_charevs, sort=True)
        # Save df_pt_adm_icu_outevs_charevs to file
        filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][
            'OUTEVENT_CHAREVS']
        FileHelper.save_to_csv(df_pt_adm_icu_outevs_charevs, filename)

        # Retrieve all rows in Admissions where (subject_id, hamd_id, icu_id) do not match
        # (subject_id, hamd_id, icu_id) of U(outputevents, chartevents)
        # left = [col_subject_id, col_hadm_id, col_icustay_id]
        # right = ['subject_id', 'hadm_id', 'icustay_id']
        temp_df = df_pt_adm_icus[~df_pt_adm_icus[col_subject_id].isin(df_pt_adm_icu_outevs_charevs['subject_id'])\
            & ~df_pt_adm_icus[col_hadm_id].isin(df_pt_adm_icu_outevs_charevs['hadm_id'])\
            & ~df_pt_adm_icus[col_icustay_id].isin(df_pt_adm_icu_outevs_charevs['icustay_id'])]

        # Add columns to temp_df
        # Conditions
        # ---------
        # subject_id	hadm_id	icustay_id	unit	procedure
        # temp_df.loc[:, 'subject_id_1'] = temp_df[col_subject_id]
        # temp_df.loc[:, 'hadm_id_1'] = temp_df[col_hadm_id]
        # temp_df.loc[:, 'icustay_id_1'] = temp_df[col_icustay_id]
        # temp_df.loc[:, 'unit_1'] = ''
        # temp_df.loc[:, 'procedure_1'] = ''

        # Save Admissions to file
        filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][
            'TEMP_DF']
        FileHelper.save_to_csv(temp_df, filename)

        ##########
        # Final Merge
        ##########
        #
        df_pt_adm_icu_outevs_charevs = df_pt_adm_icu_outevs_charevs.append(
            temp_df, sort=True)
        #df_pt_adm_icu_outevs_charevs = temp_df.append(df_pt_adm_icu_outevs_charevs, sort=True)

        ### Count number of records
        self.config['PARAM']['NUM_ROWS'] = len(
            df_pt_adm_icu_outevs_charevs.index)

        # Save NUM_EVENTS_WINSIZE_24H to file
        # filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME']['OUT_NUM_EVENTS_WINSIZE_24H']
        filename = self.config['OUT_DIR_S2'] + str(self.config['PARAM']['LIMIT_NUM_PATIENT']) + '_' +\
            self.config['OUT_FNAME']['OUT_NUM_EVENTS_WINSIZE_24H']
        FileHelper.save_to_csv(df_pt_adm_icu_outevs_charevs, filename)

        ### Sharp number of patients for output based on the criteria
        self.__output_num_patient_by_limit(df_pt_adm_icu_outevs_charevs)
Ejemplo n.º 26
0
    def predict_youtubeauto(self):
        """
            Make predictiion for youtube auto
        """

        # Initialize decision tree
        dt = DecisionTreeYoutubeAuto(**self.config)

        # load json from file (feature data)
        filename = "youtube.dash.json"
        file_uri = FileHelper.dataset_path(self.config, filename)
        json_model = FileHelper.load_model_json(file_uri)

        # Generate X_test dataset
        X_test, df_userfbs = self.dataset_youtubeauto()

        # # Test with Mockup data
        # import random

        # dj_max = 124326180.9
        # uj_max = 128804961.5
        # ul_max = 1
        # dl_max = 1
        # uth_max = 29335149.8038362
        # dth_max = 18033919.2661197
        # rtt_max = 1000 #14226774236.96

        # X_test = []
        # number_X_test = 5
        # for i in np.arange(number_X_test):
        #     features_1 = None
        #     features_1 = {'idx': i,'RTT': random.uniform(0,rtt_max), 'DJ': random.uniform(0,dj_max), 'UJ':random.uniform(0, uj_max), 'DL': random.uniform(0,dl_max), 'UL': random.uniform(0, ul_max), 'DTH': random.uniform(0,dth_max), 'UTH': random.uniform(0,uth_max)}

        #     for k, v in features_1.items():
        #         if not k == 'idx':
        #             features_1[k] = round(v, 2)

        #     features_1['Userfeedback'] = 0
        #     features_1['Youtube_720P'] = 0

        #     # features_2 = {'DTH': randint(30000,4000000) ,'RTT': randint(500,300000), 'DJ': randint(0,1000), 'DL': randint(0,1000), 'UJ':randint(0,1000), 'UL': rand(), 'UTH': randint(1000,200000)}
        #     # features_3 = {'DTH': randint(1000000,4000000) ,'RTT': randint(1000,240000), 'DJ': randint(0,1000), 'DL': randint(0,1000), 'UJ':randint(0,1000), 'UL': rand(), 'UTH': randint(1000000,4000000)}
        #     X_test.append(features_1)

        # features_1 = {'idx': number_X_test,'RTT': 172211718, 'DL': 0.25, 'UL': 0.0, 'DJ': 1462940.373, 'UJ':967358.4, \
        #     'DTH': 6904033.241, 'UTH': 6688152.991, 'Userfeedback': 1, 'Youtube_720P': 1}
        # features_2 = {'idx': number_X_test+1,'RTT': 329342189.4, 'DL': 0.0, 'UL': 0.0, 'DJ': 2217979.68, 'UJ':4026196.84, \
        #     'DTH': 206766.1582, 'UTH':1152765.337, 'Userfeedback': 1, 'Youtube_720P': 1}
        # features_3 = {'idx': number_X_test+2,'RTT': 14226774237, 'DL': 0.0, 'UL': 0.0, 'DJ': 78439389.08, 'UJ':124548859.2, \
        # 'DTH': 41369.68321, 'UTH':34202.56526, 'Userfeedback': 1, 'Youtube_720P': 1}

        # features_4 = {'idx': number_X_test+3,'RTT': 14226774237, 'DL': 0.0, 'UL': 0.0, 'DJ': 78439389.08, 'UJ':124548859.2, \
        # 'DTH': 41369.68321, 'UTH':34202.56526, 'Userfeedback': 1, 'Youtube_720P': 1}

        # features_5 = {'idx': number_X_test+4, 'RTT': 3303547790, 'DL': 0.0, 'UL': 0.0, 'DJ': 11294340.32, 'UJ': 27797712.96, \
        # 'DTH': 110344.9385, 'UTH': 32443.88377, 'Userfeedback': 2, 'Youtube_720P': 2}

        # X_test.append(features_1)
        # X_test.append(features_2)
        # X_test.append(features_3)
        # X_test.append(features_4)
        # X_test.append(features_5)

        # Transform from List to Dataframe
        df_userfbs = pd.DataFrame(X_test)

        # Start our prediciton
        estimated_mos = dt.predict(json_model, X_test)

        # Create dataframe from estimated_mos
        # Save for testing purpose
        df_estimated_qoe = pd.DataFrame(estimated_mos)
        filename = self.config['OUT_DIR'] + 'Estimated_MOS_YoutubeAuto_New.csv'
        df_estimated_qoe.to_csv(filename)

        # Merge both X_test and estimated_QoE
        # Create dataframe from estimated_mos
        # Save for testing purpose
        df_merged = pd.merge(df_userfbs,
                             df_estimated_qoe,
                             left_index=True,
                             right_index=True)
        df_merged = df_merged.drop(['idx_x', 'idx_y'], axis='columns')
        filename = self.config[
            'OUT_DIR'] + 'DATASET_MOSUSERFEEDBACK_QOE_YOUTUBEAUTO.csv'
        df_merged.to_csv(filename)

        # Print the results
        dt.print(estimated_mos)
        return estimated_mos