def main(user, google_cloud_credentials_file_path, pipeline_configuration_file_path, raw_data_dir): # Read the settings from the configuration file log.info("Loading Pipeline Configuration File...") with open(pipeline_configuration_file_path) as f: pipeline_configuration = PipelineConfiguration.from_configuration_file( f) Logger.set_project_name(pipeline_configuration.pipeline_name) log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}") log.info("Downloading Firestore UUID Table credentials...") firestore_uuid_table_credentials = json.loads( google_cloud_utils.download_blob_to_string( google_cloud_credentials_file_path, pipeline_configuration. phone_number_uuid_table.firebase_credentials_file_url)) phone_number_uuid_table = FirestoreUuidTable( pipeline_configuration.phone_number_uuid_table.table_name, firestore_uuid_table_credentials, "avf-phone-uuid-") log.info("Initialised the Firestore UUID table") log.info( f"Fetching data from {len(pipeline_configuration.raw_data_sources)} sources..." ) for i, raw_data_source in enumerate( pipeline_configuration.raw_data_sources): log.info( f"Fetching from source {i + 1}/{len(pipeline_configuration.raw_data_sources)}..." ) if isinstance(raw_data_source, RapidProSource): fetch_from_rapid_pro(user, google_cloud_credentials_file_path, raw_data_dir, phone_number_uuid_table, raw_data_source) elif isinstance(raw_data_source, GCloudBucketSource): fetch_from_gcloud_bucket(google_cloud_credentials_file_path, raw_data_dir, raw_data_source) elif isinstance(raw_data_source, RecoveryCSVSource): fetch_from_recovery_csv(user, google_cloud_credentials_file_path, raw_data_dir, phone_number_uuid_table, raw_data_source) else: assert False, f"Unknown raw_data_source type {type(raw_data_source)}"
def main(user, google_cloud_credentials_file_path, pipeline_configuration_file_path, raw_data_dir): # Read the settings from the configuration file log.info("Loading Pipeline Configuration File...") with open(pipeline_configuration_file_path) as f: pipeline_configuration = PipelineConfiguration.from_configuration_file( f) log.info("Downloading Firestore UUID Table credentials...") firestore_uuid_table_credentials = json.loads( google_cloud_utils.download_blob_to_string( google_cloud_credentials_file_path, pipeline_configuration. phone_number_uuid_table.firebase_credentials_file_url)) phone_number_uuid_table = FirestoreUuidTable( pipeline_configuration.phone_number_uuid_table.table_name, firestore_uuid_table_credentials, "avf-phone-uuid-") log.info("Initialised the Firestore UUID table") log.info( f"Fetching data from {len(pipeline_configuration.raw_data_sources)} sources..." ) for i, raw_data_source in enumerate( pipeline_configuration.raw_data_sources): log.info( f"Fetching from source {i + 1}/{len(pipeline_configuration.raw_data_sources)}..." ) if isinstance(raw_data_source, RapidProSource): fetch_from_rapid_pro(user, google_cloud_credentials_file_path, raw_data_dir, phone_number_uuid_table, raw_data_source) else: assert False, f"Unknown raw_data_source type {type(raw_data_source)}" # Fetch de-identified listening group CSVs log.info(f"Fetching listening group CSVs") fetch_listening_groups_csvs(google_cloud_credentials_file_path, pipeline_configuration, raw_data_dir)
log.info("Loading Pipeline Configuration File...") with open(pipeline_configuration_file_path) as f: pipeline_configuration = PipelineConfiguration.from_configuration_file( f) Logger.set_project_name(pipeline_configuration.pipeline_name) log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}") log.info("Downloading Firestore UUID Table credentials...") firestore_uuid_table_credentials = json.loads( google_cloud_utils.download_blob_to_string( google_cloud_credentials_file_path, pipeline_configuration. phone_number_uuid_table.firebase_credentials_file_url)) phone_number_uuid_table = FirestoreUuidTable( pipeline_configuration.phone_number_uuid_table.table_name, firestore_uuid_table_credentials, "avf-phone-uuid-") log.info("Initialised the Firestore UUID table") uuids = set() skipped_nr = 0 for path in traced_data_paths: # Load the traced data log.info(f"Loading previous traced data from file '{path}'...") with open(path) as f: data = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f) log.info(f"Loaded {len(data)} traced data objects") for td in data: if td["consent_withdrawn"] == Codes.TRUE: continue
log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}") log.info("Downloading Rapid Pro access token...") rapid_pro_token = google_cloud_utils.download_blob_to_string( google_cloud_credentials_file_path, rapid_pro_token_url).strip() rapid_pro = RapidProClient(rapid_pro_domain, rapid_pro_token) log.info("Downloading Firestore UUID Table credentials...") firestore_uuid_table_credentials = json.loads( google_cloud_utils.download_blob_to_string( google_cloud_credentials_file_path, pipeline_configuration. phone_number_uuid_table.firebase_credentials_file_url)) phone_number_uuid_table = FirestoreUuidTable( pipeline_configuration.phone_number_uuid_table.table_name, firestore_uuid_table_credentials, "avf-phone-uuid-") log.info("Initialised the Firestore UUID table") log.info(f"Loading the uuids that are safe to send to") with open(avf_uuid_file_path) as f: safe_uuids = json.load(f) log.info(f"Loaded {len(safe_uuids)} uuids") log.info(f"Re-identifying the uuids") safe_numbers = phone_number_uuid_table.uuid_to_data_batch( safe_uuids).values() safe_urns = {f"tel:+{number}" for number in safe_numbers} log.info(f"Re-identified {len(safe_urns)} uuids") log.info("Downloading the latest contacts fields from Rapid Pro")
csv_by_individual_output_path = args.csv_by_individual_output_path production_csv_output_path = args.production_csv_output_path # Load the pipeline configuration file log.info("Loading Pipeline Configuration File...") with open(pipeline_configuration_file_path) as f: pipeline_configuration = PipelineConfiguration.from_configuration_file( f) log.info("Downloading Firestore Uuid Table credentials...") firestore_uuid_table_credentials = json.loads( google_cloud_utils.download_blob_to_string( google_cloud_credentials_file_path, pipeline_configuration. phone_number_uuid_table.firebase_credentials_file_url)) phone_number_uuid_table = FirestoreUuidTable( pipeline_configuration.phone_number_uuid_table.table_name, firestore_uuid_table_credentials, "avf-phone-uuid-") if pipeline_configuration.drive_upload is not None: log.info(f"Downloading Google Drive service account credentials...") credentials_info = json.loads( google_cloud_utils.download_blob_to_string( google_cloud_credentials_file_path, pipeline_configuration. drive_upload.drive_credentials_file_url)) drive_client_wrapper.init_client_from_info(credentials_info) # Load messages messages_datasets = [] for i, activation_flow_name in enumerate( pipeline_configuration.activation_flow_names): raw_activation_path = f"{raw_data_dir}/{activation_flow_name}.json"
baidoa_output_path = args.baidoa_output_path # Read the settings from the configuration file log.info("Loading Pipeline Configuration File...") with open(pipeline_configuration_file_path) as f: pipeline_configuration = PipelineConfiguration.from_configuration_file( f) log.info("Downloading Firestore UUID Table credentials...") firestore_uuid_table_credentials = json.loads( google_cloud_utils.download_blob_to_string( google_cloud_credentials_file_path, pipeline_configuration. phone_number_uuid_table.firebase_credentials_file_url)) phone_number_uuid_table = FirestoreUuidTable( pipeline_configuration.phone_number_uuid_table.table_name, firestore_uuid_table_credentials, "avf-phone-uuid-") log.info("Initialised the Firestore UUID table") log.info(f"Loading UNDP-RCO traced data from file '{traced_data_path}'...") with open(traced_data_path, "r") as f: data = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f) log.info(f"Loaded {len(data)} traced data objects") # Search the TracedData for the bossaso/baidoa contacts bossaso_uuids = set() baidoa_uuids = set() log.info("Searching for participants from Bossaso or Baidoa") for td in data: if td["district_coded"] == "STOP": continue