automated_analysis_output_dir = args.automated_analysis_output_dir IOUtils.ensure_dirs_exist(automated_analysis_output_dir) IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/maps/regions") IOUtils.ensure_dirs_exist( f"{automated_analysis_output_dir}/maps/districts") IOUtils.ensure_dirs_exist( f"{automated_analysis_output_dir}/maps/mogadishu") IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/graphs") log.info("Loading Pipeline Configuration File...") with open(pipeline_configuration_file_path) as f: pipeline_configuration = PipelineConfiguration.from_configuration_file( f) Logger.set_project_name(pipeline_configuration.pipeline_name) log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}") sys.setrecursionlimit(30000) # Read the messages dataset log.info( f"Loading the messages dataset from {messages_json_input_path}...") with open(messages_json_input_path) as f: messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f) for i in range(len(messages)): messages[i] = dict(messages[i].items()) log.info(f"Loaded {len(messages)} messages") # Read the individuals dataset log.info( f"Loading the individuals dataset from {individuals_json_input_path}..." )
for msg in messages: if msg.urn.startswith("tel:"): operator = PhoneCleaner.clean_operator(msg.urn.split(":")[1]) else: operator = msg.urn.split(":")[0] if operator == target_operator and msg.direction == target_message_direction: msg_sent_on_timestamps.append(msg.sent_on) msg_sent_on_timestamps.append(end_date) computed_windows_of_downtime = [] # Compute the time difference between two consecutive messages i.e `PreviousMessageTimestamp` and # `NextMessageTimestamp` to get the window of time without a message and relate each time difference # with the operator and the message direction. for index, time_in_range in enumerate(msg_sent_on_timestamps): log.debug( f"Computing window of time without messages {index + 1}/{len(msg_sent_on_timestamps)}..." ) max_allowable_index = len(msg_sent_on_timestamps) - 1 if index < max_allowable_index: next_index = index + 1 else: continue time_diff = msg_sent_on_timestamps[next_index] - \ msg_sent_on_timestamps[index] computed_windows_of_downtime.append({ "Operator": target_operator, "MessageDirection": target_message_direction,
if field.key not in target_field_keys: target_instance.create_field(field.label) log.info("Contact fields copied") log.info("Fetching all contacts from the source instance...") contacts = source_instance.get_raw_contacts() log.info(f"Fetched {len(contacts)} contacts") log.info("Updating contacts in the target instance...") # Update each contact's name and fields. # Language, groups, blocked, and stopped properties are not touched. multiple_urns_count = 0 telephone_with_no_country_code_count = 0 updated_count = 0 for i, contact in enumerate(contacts): log.debug(f"Updating contact {i + 1}/{len(contacts)}...") if len(contact.urns) != 1: log.warning( f"Found a contact in the source instance with multiple URNS. " f"The RapidPro UUID is '{contact.uuid}'") multiple_urns_count += 1 continue if contact.urns[0].startswith( "tel:") and not contact.urns[0].startswith("tel:+"): log.warning( f"Found a contact in the source instance with a telephone number that has no country " f"code; skipping. The RapidPro UUID is '{contact.uuid}'") telephone_with_no_country_code_count += 1 continue if contact.name == "": contact.name = None
drive_client_wrapper.init_client_from_info(credentials_info) # Load phone number <-> UUID table print("Loading Phone Number <-> UUID Table...") with open(phone_number_uuid_table_path, "r") as f: phone_number_uuid_table = PhoneNumberUuidTable.load(f) # Load messages messages_datasets = [] for i, activation_flow_name in enumerate( pipeline_configuration.activation_flow_names): raw_activation_path = f"{raw_data_dir}/{activation_flow_name}.jsonl" log.info(f"Loading {raw_activation_path}...") with open(raw_activation_path, "r") as f: messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f) log.debug(f"Loaded {len(messages)} messages") messages_datasets.append(messages) # Load surveys survey_datasets = [] for i, survey_flow_name in enumerate( pipeline_configuration.survey_flow_names): raw_survey_path = f"{raw_data_dir}/{survey_flow_name}.jsonl" log.info(f"Loading {raw_survey_path}...") with open(raw_survey_path, "r") as f: messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f) log.debug(f"Loaded {len(messages)} messages") survey_datasets.append(messages) # Add survey data to the messages print("Combining Datasets...")
# Load the pipeline configuration file print("Loading Pipeline Configuration File...") with open(pipeline_configuration_file_path) as f: pipeline_configuration = PipelineConfiguration.from_configuration_file(f) # Load phone number <-> UUID table print("Loading Phone Number <-> UUID Table...") with open(phone_number_uuid_table_path, "r") as f: phone_number_uuid_table = PhoneNumberUuidTable.load(f) # Load demographics log.info("Loading Demographics...") with open(demog_input_path, "r") as f: demographics = TracedDataJsonIO.import_json_to_traced_data_iterable(f) log.debug(f"Loaded {len(demographics)} contacts") # Load messages messages_datasets = [] for i, path in enumerate(message_paths): print("Loading Episode {}/{}...".format(i + 1, len(message_paths))) with open(path, "r") as f: messages_datasets.append(TracedDataJsonIO.import_json_to_traced_data_iterable(f)) # Add survey data to the messages print("Combining Datasets...") # The "demographics" dataset contains the full list of participants as the original contact dataset, so, confusingly, it is passed # as the "messages" parameter and will be updated with the messages. data = CombineRawDatasets.combine_raw_datasets(user, [demographics], messages_datasets) print("Translating Rapid Pro Keys...")
# Update contacts present in both workspaces identical_contacts = 0 skipped_contacts = 0 updated_contacts_in_workspace_1 = 0 updated_contacts_in_workspace_2 = 0 urns_in_both_workspaces = workspace_1_contacts_lut.keys( ) & workspace_2_contacts_lut.keys() for i, urn in enumerate(sorted(urns_in_both_workspaces)): contact_v1 = workspace_1_contacts_lut[urn] contact_v2 = workspace_2_contacts_lut[urn] if contact_v1.name == contact_v2.name and contact_v1.fields == contact_v2.fields: log.debug( f"Synchronising contacts in both workspaces {i + 1}/{len(urns_in_both_workspaces)}: " f"Contacts identical. " f"(Rapid Pro UUIDs are '{contact_v1.uuid}' in {workspace_1_name}; " f"'{contact_v2.uuid}' in {workspace_2_name})") identical_contacts += 1 continue # Contacts differ if not force_update: log.warning( f"Synchronising contacts in both workspaces {i + 1}/{len(urns_in_both_workspaces)}: " f"Contacts differ, but not overwriting. Use --force to write the latest everywhere. " f"(Rapid Pro UUIDs are '{contact_v1.uuid}' in {workspace_1_name}; " f"'{contact_v2.uuid}' in {workspace_2_name})") skipped_contacts += 1 continue