Esempio n. 1
0
def run_retraction_cron():
    project_id = bq_utils.app_identity.get_application_id()
    hpo_id = bq_utils.get_retraction_hpo_id()
    person_ids_file = bq_utils.get_retraction_person_ids_file_name()
    research_ids_file = bq_utils.get_retraction_research_ids_file_name()
    person_ids = retract_data_bq.extract_pids_from_file(person_ids_file)
    research_ids = retract_data_bq.extract_pids_from_file(research_ids_file)
    logging.info('Running retraction on research_ids')
    retract_data_bq.run_retraction(project_id,
                                   research_ids,
                                   hpo_id,
                                   deid_flag=True)
    logging.info('Completed retraction on research_ids')
    logging.info('Running retraction on person_ids')
    retract_data_bq.run_retraction(project_id,
                                   person_ids,
                                   hpo_id,
                                   deid_flag=False)
    logging.info('Completed retraction on person_ids')
    bucket = gcs_utils.get_drc_bucket()
    hpo_bucket = gcs_utils.get_hpo_bucket(hpo_id)
    logging.info('Running retraction from bucket folders')
    retract_data_gcs.run_retraction(person_ids,
                                    bucket,
                                    hpo_id,
                                    hpo_bucket,
                                    folder=None,
                                    force_flag=True)
    logging.info('Completed retraction from bucket folders')
    return 'retraction-complete'
Esempio n. 2
0
def run_retraction_cron():
    project_id = bq_utils.app_identity.get_application_id()
    output_project_id = bq_utils.get_output_project_id()
    hpo_id = bq_utils.get_retraction_hpo_id()
    pid_table_id = bq_utils.get_retraction_pid_table_id()
    sandbox_dataset_id = bq_utils.get_retraction_sandbox_dataset_id()

    # retract from bq
    dataset_ids = bq_utils.get_retraction_dataset_ids()
    logging.info('Dataset id/s to target from env variable: %s' % dataset_ids)
    logging.info('Running retraction on BQ datasets')
    # retract from output dataset
    retract_data_bq.run_bq_retraction(output_project_id, sandbox_dataset_id,
                                      project_id, pid_table_id, hpo_id,
                                      dataset_ids)
    # retract from default dataset
    retract_data_bq.run_bq_retraction(project_id, sandbox_dataset_id,
                                      project_id, pid_table_id, hpo_id,
                                      dataset_ids)
    logging.info('Completed retraction on BQ datasets')

    # retract from gcs
    folder = bq_utils.get_retraction_submission_folder()
    logging.info('Submission folder/s to target from env variable: %s' %
                 folder)
    logging.info('Running retraction from internal bucket folders')
    retract_data_gcs.run_gcs_retraction(project_id,
                                        sandbox_dataset_id,
                                        pid_table_id,
                                        hpo_id,
                                        folder,
                                        force_flag=True)
    logging.info('Completed retraction from internal bucket folders')
    return 'retraction-complete'
Esempio n. 3
0
def run_retraction_cron():
    project_id = bq_utils.app_identity.get_application_id()
    hpo_id = bq_utils.get_retraction_hpo_id()
    person_ids_file = bq_utils.get_retraction_person_ids_file_name()
    research_ids_file = bq_utils.get_retraction_research_ids_file_name()
    person_ids = retract_data_bq.extract_pids_from_file(person_ids_file)
    research_ids = retract_data_bq.extract_pids_from_file(research_ids_file)
    logging.info('Running retraction on research_ids')
    retract_data_bq.run_retraction(project_id,
                                   research_ids,
                                   hpo_id,
                                   deid_flag=True)
    logging.info('Completed retraction on research_ids')
    logging.info('Running retraction on person_ids')
    retract_data_bq.run_retraction(project_id,
                                   person_ids,
                                   hpo_id,
                                   deid_flag=False)
    logging.info('Completed retraction on person_ids')
    return 'retraction-complete'