def run_retraction_cron(): project_id = bq_utils.app_identity.get_application_id() hpo_id = bq_utils.get_retraction_hpo_id() person_ids_file = bq_utils.get_retraction_person_ids_file_name() research_ids_file = bq_utils.get_retraction_research_ids_file_name() person_ids = retract_data_bq.extract_pids_from_file(person_ids_file) research_ids = retract_data_bq.extract_pids_from_file(research_ids_file) logging.info('Running retraction on research_ids') retract_data_bq.run_retraction(project_id, research_ids, hpo_id, deid_flag=True) logging.info('Completed retraction on research_ids') logging.info('Running retraction on person_ids') retract_data_bq.run_retraction(project_id, person_ids, hpo_id, deid_flag=False) logging.info('Completed retraction on person_ids') bucket = gcs_utils.get_drc_bucket() hpo_bucket = gcs_utils.get_hpo_bucket(hpo_id) logging.info('Running retraction from bucket folders') retract_data_gcs.run_retraction(person_ids, bucket, hpo_id, hpo_bucket, folder=None, force_flag=True) logging.info('Completed retraction from bucket folders') return 'retraction-complete'
def run_retraction_cron(): project_id = bq_utils.app_identity.get_application_id() output_project_id = bq_utils.get_output_project_id() hpo_id = bq_utils.get_retraction_hpo_id() pid_table_id = bq_utils.get_retraction_pid_table_id() sandbox_dataset_id = bq_utils.get_retraction_sandbox_dataset_id() # retract from bq dataset_ids = bq_utils.get_retraction_dataset_ids() logging.info('Dataset id/s to target from env variable: %s' % dataset_ids) logging.info('Running retraction on BQ datasets') # retract from output dataset retract_data_bq.run_bq_retraction(output_project_id, sandbox_dataset_id, project_id, pid_table_id, hpo_id, dataset_ids) # retract from default dataset retract_data_bq.run_bq_retraction(project_id, sandbox_dataset_id, project_id, pid_table_id, hpo_id, dataset_ids) logging.info('Completed retraction on BQ datasets') # retract from gcs folder = bq_utils.get_retraction_submission_folder() logging.info('Submission folder/s to target from env variable: %s' % folder) logging.info('Running retraction from internal bucket folders') retract_data_gcs.run_gcs_retraction(project_id, sandbox_dataset_id, pid_table_id, hpo_id, folder, force_flag=True) logging.info('Completed retraction from internal bucket folders') return 'retraction-complete'
def run_retraction_cron(): project_id = bq_utils.app_identity.get_application_id() hpo_id = bq_utils.get_retraction_hpo_id() person_ids_file = bq_utils.get_retraction_person_ids_file_name() research_ids_file = bq_utils.get_retraction_research_ids_file_name() person_ids = retract_data_bq.extract_pids_from_file(person_ids_file) research_ids = retract_data_bq.extract_pids_from_file(research_ids_file) logging.info('Running retraction on research_ids') retract_data_bq.run_retraction(project_id, research_ids, hpo_id, deid_flag=True) logging.info('Completed retraction on research_ids') logging.info('Running retraction on person_ids') retract_data_bq.run_retraction(project_id, person_ids, hpo_id, deid_flag=False) logging.info('Completed retraction on person_ids') return 'retraction-complete'