log.info(f"Removed {removed} uuids; {len(uuids)} remain") # Convert the uuids to phone numbers log.info(f"Converting {len(uuids)} uuids to phone numbers...") uuid_phone_number_lut = phone_number_uuid_table.uuid_to_data_batch(uuids) phone_numbers = set() skipped_uuids = set() for uuid in uuids: # Some uuids are no longer re-identifiable due to a uuid table consistency issue between OCHA and WorldBank-PLR if uuid in uuid_phone_number_lut: phone_numbers.add(f"+{uuid_phone_number_lut[uuid]}") else: skipped_uuids.add(uuid) log.info( f"Successfully converted {len(phone_numbers)} uuids to phone numbers.") log.warning(f"Unable to re-identify {len(skipped_uuids)} uuids") # Export contacts CSV log.warning( f"Exporting {len(phone_numbers)} phone numbers to {csv_output_file_path}..." ) with open(csv_output_file_path, "w") as f: writer = csv.DictWriter(f, fieldnames=["URN:Tel", "Name"], lineterminator="\n") writer.writeheader() for n in phone_numbers: writer.writerow({"URN:Tel": n}) log.info( f"Wrote {len(phone_numbers)} contacts to {csv_output_file_path}")
title="Participants/Episode", width=len(engagement_counts) * 20 + 150) fig.update_xaxes(tickangle=-60) fig.write_image( f"{automated_analysis_output_dir}/graphs/participants_per_episode.png", scale=IMG_SCALE_FACTOR) log.info("Graphing the demographic distributions...") for plan in PipelineConfiguration.DEMOG_CODING_PLANS: for cc in plan.coding_configurations: if cc.analysis_file_key is None: continue if len(cc.code_scheme.codes) > 200: log.warning( f"Skipping graphing the distribution of codes for {cc.analysis_file_key}, because it " f"contains too many columns to graph (has {len(cc.code_scheme.codes)} columns; " f"limit is 200).") continue log.info( f"Graphing the distribution of codes for {cc.analysis_file_key}..." ) fig = px.bar([{ "Label": code.string_value, "Number of Participants": demographic_distributions[cc.analysis_file_key][code.code_id] } for code in cc.code_scheme.codes if code.control_code != Codes.STOP], x="Label", y="Number of Participants",
uploaded_memory_logs = google_cloud_utils.list_blobs( google_cloud_credentials_file_path, pipeline_configuration.memory_profile_upload_bucket, pipeline_configuration.bucket_dir_path, ) uploaded_memory_log_dates = get_uploaded_file_dates( uploaded_memory_logs, date_pattern) uploaded_data_archives = google_cloud_utils.list_blobs( google_cloud_credentials_file_path, pipeline_configuration.data_archive_upload_bucket, pipeline_configuration.bucket_dir_path) uploaded_data_archives_dates = get_uploaded_file_dates( uploaded_data_archives, date_pattern) log.warning(f"Deleting old memory profile files from local disk...") delete_old_log_files(memory_profile_dir_path, uploaded_memory_log_dates) log.warning(f"Deleting old data archives files from local disk...") delete_old_log_files(data_archive_dir_path, uploaded_data_archives_dates) log.info(f"Uploading memory profile files...") memory_log_files_by_date = get_files_by_date(memory_profile_dir_path, uploaded_memory_log_dates) for file_date in memory_log_files_by_date: latest_memory_log_file_path = max(memory_log_files_by_date[file_date], key=os.path.getmtime) memory_profile_upload_location = f"{pipeline_configuration.memory_profile_upload_bucket}/" \ f"{pipeline_configuration.bucket_dir_path}/{os.path.basename(latest_memory_log_file_path)}" log.info( f"Uploading memory profile from {latest_memory_log_file_path} to {memory_profile_upload_location}..." )
x="Episode", y="Total Messages with Opt-Ins", template="plotly_white", title="Messages/Episode", width=len(engagement_counts) * 20 + 150) fig.update_xaxes(tickangle=-60) fig.write_image(f"{output_dir}/graphs/messages_per_episode.png", scale=IMG_SCALE_FACTOR) # Graph the number of participants in each episode fig = px.bar([x for x in engagement_counts.values() if x["Episode"] != "Total"], x="Episode", y="Total Participants with Opt-Ins", template="plotly_white", title="Participants/Episode", width=len(engagement_counts) * 20 + 150) fig.update_xaxes(tickangle=-60) fig.write_image(f"{output_dir}/graphs/participants_per_episode.png", scale=IMG_SCALE_FACTOR) log.info("Graphing the demographic distributions...") for demographic, counts in demographic_distributions.items(): if len(counts) > 200: log.warning(f"Skipping graphing the distribution of codes for {demographic}, but is contains too many " f"columns to graph (has {len(counts)} columns; limit is 200).") continue log.info(f"Graphing the distribution of codes for {demographic}...") fig = px.bar([{"Label": code_string_value, "Number of Participants": number_of_participants} for code_string_value, number_of_participants in counts.items()], x="Label", y="Number of Participants", template="plotly_white", title=f"Season Distribution: {demographic}", width=len(counts) * 20 + 150) fig.update_xaxes(type="category", tickangle=-60, dtick=1) fig.write_image(f"{output_dir}/graphs/season_distribution_{demographic}.png", scale=IMG_SCALE_FACTOR) # Plot the per-season distribution of responses for each survey question, per individual for plan in PipelineConfiguration.RQA_CODING_PLANS + PipelineConfiguration.SURVEY_CODING_PLANS: for cc in plan.coding_configurations: if cc.analysis_file_key is None: continue
google_cloud_credentials_file_path = args.google_cloud_credentials_file_path rapid_pro_domain = args.rapid_pro_domain rapid_pro_token_file_url = args.rapid_pro_token_file_url output_file_path = args.output_file_path log.info("Downloading the Rapid Pro access token...") rapid_pro_token = google_cloud_utils.download_blob_to_string( google_cloud_credentials_file_path, rapid_pro_token_file_url).strip() rapid_pro = RapidProClient(rapid_pro_domain, rapid_pro_token) all_messages = rapid_pro.get_raw_messages() inbound_messages = [msg for msg in all_messages if msg.direction == "in"] inbound_phone_numbers = set() for msg in inbound_messages: if msg.urn.startswith("tel:"): phone_number = msg.urn.split(":")[1] inbound_phone_numbers.add(phone_number) else: log.warning(f"Skipped non-telephone URN type {msg.urn.split(':')[0]}") log.warning(f"Exporting {len(inbound_phone_numbers)} inbound phone numbers to {output_file_path}...") with open(output_file_path, "w") as f: writer = csv.DictWriter(f, fieldnames=["URN:Tel", "Name"]) writer.writeheader() for number in inbound_phone_numbers: writer.writerow({"URN:Tel": number, "Name": ""}) log.info(f"Done. Wrote {len(inbound_phone_numbers)} inbound phone numbers to {output_file_path}")
log.info("Fetching all contacts from the source instance...") contacts = source_instance.get_raw_contacts() log.info(f"Fetched {len(contacts)} contacts") log.info("Updating contacts in the target instance...") # Update each contact's name and fields. # Language, groups, blocked, and stopped properties are not touched. multiple_urns_count = 0 telephone_with_no_country_code_count = 0 updated_count = 0 for i, contact in enumerate(contacts): log.debug(f"Updating contact {i + 1}/{len(contacts)}...") if len(contact.urns) != 1: log.warning( f"Found a contact in the source instance with multiple URNS. " f"The RapidPro UUID is '{contact.uuid}'") multiple_urns_count += 1 continue if contact.urns[0].startswith( "tel:") and not contact.urns[0].startswith("tel:+"): log.warning( f"Found a contact in the source instance with a telephone number that has no country " f"code; skipping. The RapidPro UUID is '{contact.uuid}'") telephone_with_no_country_code_count += 1 continue if contact.name == "": contact.name = None target_instance.update_contact(contact.urns[0], contact.name, contact.fields) updated_count += 1
# Load the REACH traced data log.info(f"Loading REACH traced data from file '{traced_data_path}'...") with open(traced_data_path, "r") as f: data = TracedDataJsonIO.import_json_to_traced_data_iterable(f) log.info(f"Loaded {len(data)} traced data objects") # Search the TracedData for consenting contacts log.info("Searching for consenting uuids...") consenting_uuids = set() for td in data: if td["withdrawn_consent"] == Codes.TRUE: continue consenting_uuids.add(td["UID"]) log.info(f"Found {len(consenting_uuids)} consenting uuids") # Convert the uuids to phone numbers log.info("Converting the uuids to phone numbers...") phone_numbers = [f"+{phone_number_uuid_table.get_phone(uuid)}" for uuid in consenting_uuids] log.warning(f"Exporting {len(phone_numbers)} phone numbers to {output_path}...") with open(output_path, "w") as f: writer = csv.DictWriter(f, fieldnames=["URN:Tel", "Name"], lineterminator="\n") writer.writeheader() for n in phone_numbers: writer.writerow({ "URN:Tel": n }) log.info(f"Wrote {len(phone_numbers)} contacts to {output_path}")
contact_v1 = instance_1_contacts_lut[urn] contact_v2 = instance_2_contacts_lut[urn] if contact_v1.name == contact_v2.name and contact_v1.fields == contact_v2.fields: log.info( f"Synchronising contacts in both instances {i + 1}/{len(urns_in_both_instances)}: " f"Contacts identical." f"Rapid Pro UUIDs are '{contact_v1.uuid}' in instance 1; '{contact_v2.uuid}' in instance 2" ) continue # Contacts differ if not force_update: log.warning( f"Synchronising contacts in both instances {i + 1}/{len(urns_in_both_instances)}: " f"Contacts differ, but not overwriting. Use --force to write the latest everywhere. " f"Rapid Pro UUIDs are '{contact_v1.uuid}' in instance 1; '{contact_v2.uuid}' in instance 2" ) continue # Assume the most recent contact is correct # IMPORTANT: If the same contact has been changed on both Rapid Pro instances since the last sync was performed, # the older changes will be overwritten. if contact_v1.modified_on > contact_v2.modified_on: log.info( f"Synchronising contacts in both instances {i + 1}/{len(urns_in_both_instances)}: " f"Contacts differ, overwriting the contact in instance 2 with the more recent one in instance 1. " f"Rapid Pro UUIDs are '{contact_v1.uuid}' in instance 1; '{contact_v2.uuid}' in instance 2" ) instance_2.update_contact(urn, contact_v1.name, contact_v1.fields) else:
individuals = TracedDataJsonIO.import_jsonl_to_traced_data_iterable( f) log.info(f"Loaded {len(individuals)} individuals") for ind in individuals: if ind["consent_withdrawn"] == Codes.TRUE: continue advert_uids.add(ind['uid']) # Convert the uuids to phone numbers log.info("Converting the uuids to phone numbers...") uuids_to_phone_numbers = phone_number_uuid_table.uuid_to_data_batch( list(advert_uids)) advert_contacts = [ f"+{uuids_to_phone_numbers[uuid]}" for uuid in advert_uids ] # Export contacts CSV log.warning( f"Exporting {len(advert_contacts)} phone numbers to {csv_output_file_path}..." ) with open(csv_output_file_path, "w") as f: writer = csv.DictWriter(f, fieldnames=["URN:Tel", "Name"], lineterminator="\n") writer.writeheader() for n in advert_contacts: writer.writerow({"URN:Tel": n}) log.info( f"Wrote {len(advert_contacts)} contacts to {csv_output_file_path}")
contact_v2 = workspace_2_contacts_lut[urn] if contact_v1.name == contact_v2.name and contact_v1.fields == contact_v2.fields: log.debug( f"Synchronising contacts in both workspaces {i + 1}/{len(urns_in_both_workspaces)}: " f"Contacts identical. " f"(Rapid Pro UUIDs are '{contact_v1.uuid}' in {workspace_1_name}; " f"'{contact_v2.uuid}' in {workspace_2_name})") identical_contacts += 1 continue # Contacts differ if not force_update: log.warning( f"Synchronising contacts in both workspaces {i + 1}/{len(urns_in_both_workspaces)}: " f"Contacts differ, but not overwriting. Use --force to write the latest everywhere. " f"(Rapid Pro UUIDs are '{contact_v1.uuid}' in {workspace_1_name}; " f"'{contact_v2.uuid}' in {workspace_2_name})") skipped_contacts += 1 continue # Assume the most recent contact is correct # IMPORTANT: If the same contact has been changed on both Rapid Pro workspaces since the last sync was # performed, the older changes will be overwritten. if contact_v1.modified_on > contact_v2.modified_on: if workspaces_to_update in {"2", "both"}: log.info( f"Synchronising contacts in both workspaces {i + 1}/{len(urns_in_both_workspaces)}: " f"Contacts differ, overwriting the contact in {workspace_2_name} with the more recent one in " f"{workspace_1_name}. " f"(Rapid Pro UUIDs are '{contact_v1.uuid}' in {workspace_1_name}; "
id_tables = FirestoreUuidInfrastructure.init_from_credentials(firestore_uuid_table_credentials) if len(table_names) == 0: table_names = id_tables.list_table_names() log.info(f"Found {len(table_names)} uuid tables to export") export = dict() # of table_name -> {mappings: dict of data -> uuid} for i, table_name in enumerate(table_names): log.info(f"Fetching mappings from table {i + 1}/{len(table_names)}: {table_name}...") mappings = id_tables.get_table(table_name, None).get_all_mappings() export[table_name] = { "mappings": mappings } log.info(f"Fetched {len(mappings)} mappings") log.info(f"Converting fetched data to zipped json for export...") json_blob = json.dumps(export) export_compressed = gzip.compress(bytes(json_blob, "utf-8")) if gzip_export_file_path is not None: log.warning(f"Writing mappings to local disk at '{gzip_export_file_path}'...") with open(gzip_export_file_path, "wb") as f: f.write(export_compressed) if gcs_upload_path is not None: log.info(f"Uploading the mappings to {gcs_upload_path}...") google_cloud_utils.upload_string_to_blob(google_cloud_credentials_file_path, gcs_upload_path, export_compressed) log.info(f"Export complete ({len(table_names)} table(s))")
for phone_number in somali_phone_numbers: advert_contacts[phone_number] = { "URN:Tel": phone_number, "Name": None, "Language": 'som' } for phone_number in english_phone_numbers: advert_contacts[phone_number] = { "URN:Tel": phone_number, "Name": None, "Language": 'eng' } for phone_number in swahili_phone_numbers: advert_contacts[phone_number] = { "URN:Tel": phone_number, "Name": None, "Language": 'swh' } log.warning( f"Exporting {len(advert_contacts)} contacts to {contacts_csv_path}") with open(contacts_csv_path, "w") as f: headers = ["URN:Tel", "Name", "Language"] writer = csv.DictWriter(f, fieldnames=headers, lineterminator="\n") writer.writeheader() for phone_number in advert_contacts.values(): writer.writerow(phone_number) log.info( f"Wrote {len(advert_contacts)} contacts to {contacts_csv_path}")