コード例 #1
0
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path

    raw_data_dir = args.raw_data_dir
    prev_coded_dir_path = args.prev_coded_dir_path

    messages_json_output_path = args.messages_json_output_path
    individuals_json_output_path = args.individuals_json_output_path
    icr_output_dir = args.icr_output_dir
    coded_dir_path = args.coded_dir_path
    csv_by_message_output_path = args.csv_by_message_output_path
    csv_by_individual_output_path = args.csv_by_individual_output_path
    production_csv_output_path = args.production_csv_output_path

    # Load the pipeline configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(f)

    if pipeline_configuration.drive_upload is not None:
        log.info(f"Downloading Google Drive service account credentials...")
        credentials_info = json.loads(google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.drive_upload.drive_credentials_file_url))
        drive_client_wrapper.init_client_from_info(credentials_info)

    # Load the input datasets
    def load_datasets(flow_names):
        datasets = []
        for i, flow_name in enumerate(flow_names):
            raw_flow_path = f"{raw_data_dir}/{flow_name}.jsonl"
            log.info(f"Loading {i + 1}/{len(flow_names)}: {raw_flow_path}...")
コード例 #2
0
    args = parser.parse_args()

    user = args.user
    pipeline_configuration_file_path = args.pipeline_configuration_file_path

    messages_json_input_path = args.messages_json_input_path
    individuals_json_input_path = args.individuals_json_input_path
    automated_analysis_output_dir = args.automated_analysis_output_dir

    IOUtils.ensure_dirs_exist(automated_analysis_output_dir)
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/maps/counties")
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/maps/constituencies")
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/graphs")

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    sys.setrecursionlimit(30000)
    # Read the messages dataset
    log.info(f"Loading the messages dataset from {messages_json_input_path}...")
    with open(messages_json_input_path) as f:
        messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f)
        for i in range (len(messages)):
            messages[i] = dict(messages[i].items())
    log.info(f"Loaded {len(messages)} messages")

    # Read the individuals dataset
コード例 #3
0
    parser.add_argument("-t", "--time_frame", metavar="time-frame", type=lambda s: datetime.strptime(s, '%H:%M:%S'),
                        default="00:00:10", help="The time frame (HH:MM:SS) to generate dates in intervals between the start and end date")

    args = parser.parse_args()

    raw_messages_input_file_path = args.raw_messages_input_file_path
    messages_difference_per_two_firebase_time_period_output_file_path = args.messages_difference_per_two_firebase_time_period_output_file_path
    target_operator = args.target_operator
    target_message_direction = args.target_message_direction
    start_date = args.start_date
    end_date = args.end_date
    if args.time_frame:
        time_frame = args.time_frame

    with open(raw_messages_input_file_path, mode="r") as f:
        log.info(f"Loading messages from {raw_messages_input_file_path}...")
        input = json.load(f)
        messages = [Message.deserialize(val) for val in input]
        log.info(f"Loaded {len(messages)} messages")

    # Filter messages based on the target operator and target direction of the message
    log.info(f"Filtering messages based on {target_operator} and "
             f"message direction as '{target_message_direction}' from {len(messages)} total messages ")
    filtered_messages = []
    for msg in messages:
        if msg.urn.startswith("tel:"):
            operator = PhoneCleaner.clean_operator(msg.urn.split(":")[1])
        else:
            operator = msg.urn.split(":")[0]
        if operator == target_operator and msg.direction == target_message_direction:
            msg_direction = msg.direction
コード例 #4
0
    user = args.user
    pipeline_configuration_file_path = args.pipeline_configuration_file_path
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path

    raw_data_dir = args.raw_data_dir
    prev_coded_dir_path = args.prev_coded_dir_path

    json_output_path = args.json_output_path
    icr_output_dir = args.icr_output_dir
    coded_dir_path = args.coded_dir_path
    csv_by_message_output_path = args.csv_by_message_output_path
    csv_by_individual_output_path = args.csv_by_individual_output_path
    production_csv_output_path = args.production_csv_output_path

    # Load the pipeline configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)

    log.info("Downloading Firestore Uuid Table credentials...")
    firestore_uuid_table_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.
            phone_number_uuid_table.firebase_credentials_file_url))
    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials, "avf-phone-uuid-")

    if pipeline_configuration.drive_upload is not None:
        log.info(f"Downloading Google Drive service account credentials...")
コード例 #5
0
                        metavar="data-archive-file-path",
                        help="Path to the data archive file to upload")

    args = parser.parse_args()

    user = args.user
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path
    run_id = args.run_id
    production_csv_input_path = args.production_csv_input_path
    messages_csv_input_path = args.messages_csv_input_path
    individuals_csv_input_path = args.individuals_csv_input_path
    memory_profile_file_path = args.memory_profile_file_path
    data_archive_file_path = args.data_archive_file_path

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    # Upload to Google Drive, if requested.
    if pipeline_configuration.drive_upload is not None:
        log.info(f"Downloading Google Drive service account credentials...")
        credentials_info = json.loads(
            google_cloud_utils.download_blob_to_string(
                google_cloud_credentials_file_path, pipeline_configuration.
                drive_upload.drive_credentials_file_url))
        drive_client_wrapper.init_client_from_info(credentials_info)
コード例 #6
0
    parser.add_argument("output_dir", metavar="output-dir",
                        help="Directory to write the output graphs to")

    args = parser.parse_args()

    user = args.user
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path

    messages_json_input_path = args.messages_json_input_path
    individuals_json_input_path = args.individuals_json_input_path
    output_dir = args.output_dir

    IOUtils.ensure_dirs_exist(output_dir)

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    if pipeline_configuration.drive_upload is not None:
        log.info(f"Downloading Google Drive service account credentials...")
        credentials_info = json.loads(google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.drive_upload.drive_credentials_file_url))
        drive_client_wrapper.init_client_from_info(credentials_info)

    # Read the messages dataset
    log.info(f"Loading the messages dataset from {messages_json_input_path}...")
    with open(messages_json_input_path) as f:
        messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f)
コード例 #7
0
        type=lambda s: isoparse(s),
        help=
        "The end date as ISO 8601 string to which the window of downtime computation will end"
    )

    args = parser.parse_args()

    raw_messages_file_path = args.raw_messages_file_path
    window_of_downtimes_output_file_path = args.window_of_downtimes_output_file_path
    target_operator = args.target_operator
    target_message_direction = args.target_message_direction
    start_date = args.start_date
    end_date = args.end_date

    with open(raw_messages_file_path, mode="r") as f:
        log.info(f"Loading messages from {raw_messages_file_path}...")
        raw_messages_data = json.load(f)
        messages = [Message.deserialize(val) for val in raw_messages_data]
        log.info(f"Loaded {len(messages)} messages")

    msg_sent_on_timestamps = []
    msg_sent_on_timestamps.append(start_date)
    # Append `sent_on` timestamps to `msg_sent_on_timestamps` list
    # based on the target operator and target direction of the message
    for msg in messages:
        if msg.urn.startswith("tel:"):
            operator = PhoneCleaner.clean_operator(msg.urn.split(":")[1])
        else:
            operator = msg.urn.split(":")[0]
        if operator == target_operator and msg.direction == target_message_direction:
            msg_sent_on_timestamps.append(msg.sent_on)
コード例 #8
0
                        help="Path to a Google Cloud service account credentials file to use to access the "
                             "credentials bucket")
    parser.add_argument("rapid_pro_domain", help="URL of the Rapid Pro server to download data from")
    parser.add_argument("rapid_pro_token_file_url", metavar="rapid-pro-token-file-url",
                        help="GS URLs of a text file containing the authorisation token for the Rapid Pro server")
    parser.add_argument("output_file_path", metavar="output-file-path",
                        help="Output CSV file to write the phone numbers to")

    args = parser.parse_args()

    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    rapid_pro_domain = args.rapid_pro_domain
    rapid_pro_token_file_url = args.rapid_pro_token_file_url
    output_file_path = args.output_file_path

    log.info("Downloading the Rapid Pro access token...")
    rapid_pro_token = google_cloud_utils.download_blob_to_string(
        google_cloud_credentials_file_path, rapid_pro_token_file_url).strip()

    rapid_pro = RapidProClient(rapid_pro_domain, rapid_pro_token)

    all_messages = rapid_pro.get_raw_messages()
    inbound_messages = [msg for msg in all_messages if msg.direction == "in"]

    inbound_phone_numbers = set()
    for msg in inbound_messages:
        if msg.urn.startswith("tel:"):
            phone_number = msg.urn.split(":")[1]
            inbound_phone_numbers.add(phone_number)
        else:
            log.warning(f"Skipped non-telephone URN type {msg.urn.split(':')[0]}")
コード例 #9
0
        help="CSV file to write the ADSS contacts from Bossaso to")
    parser.add_argument(
        "baidoa_output_path",
        metavar="baidoa-output-path",
        help="CSV file to write the ADSS contacts from Baidoa to")

    args = parser.parse_args()

    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path
    traced_data_path = args.traced_data_path
    bossaso_output_path = args.bossaso_output_path
    baidoa_output_path = args.baidoa_output_path

    # Read the settings from the configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.
            phone_number_uuid_table.firebase_credentials_file_url))

    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials, "avf-phone-uuid-")
    log.info("Initialised the Firestore UUID table")
コード例 #10
0
    args = parser.parse_args()

    user = args.user
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path

    messages_json_input_path = args.messages_json_input_path
    individuals_json_input_path = args.individuals_json_input_path
    output_dir = args.output_dir

    IOUtils.ensure_dirs_exist(output_dir)
    IOUtils.ensure_dirs_exist(f"{output_dir}/maps")
    IOUtils.ensure_dirs_exist(f"{output_dir}/graphs")

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    if pipeline_configuration.drive_upload is not None:
        log.info(f"Downloading Google Drive service account credentials...")
        credentials_info = json.loads(google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.drive_upload.drive_credentials_file_url))
        drive_client_wrapper.init_client_from_info(credentials_info)

    # Read the messages dataset
    log.info(f"Loading the messages dataset from {messages_json_input_path}...")
    with open(messages_json_input_path) as f:
        messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f)
コード例 #11
0
        help=
        "Path to a Google Cloud service account credentials file to use to access the "
        "credentials bucket")
    parser.add_argument(
        "firestore_credentials_url",
        metavar="firestore-credentials-url",
        help=
        "GS URL to the credentials file to use to access the Firestore instance containing "
        "the operations statistics")

    args = parser.parse_args()

    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    firestore_credentials_url = args.firestore_credentials_url

    log.info("Initialising the Firestore client...")
    firestore_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, firestore_credentials_url))
    firestore_wrapper = FirestoreWrapper(firestore_credentials)

    log.info("Loading the active project details...")
    active_projects = firestore_wrapper.get_active_projects()
    log.info(f"Loaded the details for {len(active_projects)} active projects")

    for project in active_projects:
        if project.flow_definitions_upload_url_prefix is None:
            log.info(
                f"Not archiving flow definitions for project {project.project_name} because its "
                f"'flow_definitions_upload_url_prefix' is unspecified.")
            continue
コード例 #12
0
    parser.add_argument("phone_number_uuid_table_path", metavar="phone-number-uuid-table-path",
                        help="JSON file containing the phone number <-> UUID lookup table for the messages/surveys "
                             "datasets")
    parser.add_argument("output_path", metavar="output-path",
                        help="CSV file to write the REACH contacts to")

    args = parser.parse_args()

    traced_data_path = args.traced_data_path
    phone_number_uuid_table_path = args.phone_number_uuid_table_path
    output_path = args.output_path

    sys.setrecursionlimit(15000)

    # Load the phone number <-> uuid table
    log.info(f"Loading the phone number <-> uuid table from file '{phone_number_uuid_table_path}'...")
    with open(phone_number_uuid_table_path, "r") as f:
        phone_number_uuid_table = PhoneNumberUuidTable.load(f)
    log.info(f"Loaded {len(phone_number_uuid_table.numbers())} contacts")
    
    # Load the REACH traced data
    log.info(f"Loading REACH traced data from file '{traced_data_path}'...")
    with open(traced_data_path, "r") as f:
        data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)
    log.info(f"Loaded {len(data)} traced data objects")

    # Search the TracedData for consenting contacts
    log.info("Searching for consenting uuids...")
    consenting_uuids = set()
    for td in data:
        if td["withdrawn_consent"] == Codes.TRUE:
コード例 #13
0
    user = args.user
    pipeline_configuration_file_path = args.pipeline_configuration_file_path

    messages_json_input_path = args.messages_json_input_path
    individuals_json_input_path = args.individuals_json_input_path
    automated_analysis_output_dir = args.automated_analysis_output_dir

    IOUtils.ensure_dirs_exist(automated_analysis_output_dir)
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/maps/regions")
    IOUtils.ensure_dirs_exist(
        f"{automated_analysis_output_dir}/maps/districts")
    IOUtils.ensure_dirs_exist(
        f"{automated_analysis_output_dir}/maps/mogadishu")
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/graphs")

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    # Read the messages dataset
    log.info(
        f"Loading the messages dataset from {messages_json_input_path}...")
    with open(messages_json_input_path) as f:
        messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f)
    log.info(f"Loaded {len(messages)} messages")

    # Read the individuals dataset
    log.info(
コード例 #14
0
        help=
        "Path to analysis dataset CSV where respondents are the unit for analysis (i.e. one "
        "respondent per row, with all their messages joined into a single cell)"
    ),

    args = parser.parse_args()

    user = args.user
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path
    run_id = args.run_id
    production_csv_input_path = args.production_csv_input_path
    messages_csv_input_path = args.messages_csv_input_path
    individuals_csv_input_path = args.individuals_csv_input_path

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    # Upload to Google Drive, if requested.
    if pipeline_configuration.drive_upload is not None:
        log.info(f"Downloading Google Drive service account credentials...")
        credentials_info = json.loads(
            google_cloud_utils.download_blob_to_string(
                google_cloud_credentials_file_path, pipeline_configuration.
                drive_upload.drive_credentials_file_url))
        drive_client_wrapper.init_client_from_info(credentials_info)
コード例 #15
0
    parser.add_argument(
        "data_archive_dir_path",
        metavar="data-archive-dir-path",
        help="Path to the data archive directory with file to upload")

    args = parser.parse_args()

    user = args.user
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path
    memory_profile_dir_path = args.memory_profile_dir_path
    data_archive_dir_path = args.data_archive_dir_path

    date_pattern = r'\d{4}-\d{2}-\d{2}'

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    uploaded_memory_logs = google_cloud_utils.list_blobs(
        google_cloud_credentials_file_path,
        pipeline_configuration.memory_profile_upload_bucket,
        pipeline_configuration.bucket_dir_path,
    )
    uploaded_memory_log_dates = get_uploaded_file_dates(
        uploaded_memory_logs, date_pattern)

    uploaded_data_archives = google_cloud_utils.list_blobs(
コード例 #16
0
            raise
        doc_count += 1

    time_end = time.perf_counter_ns()
    ms_elapsed = (time_end - time_start) / (1000 * 1000)
    log.info(f"validated {doc_count} ids in {ms_elapsed} ms")


def usage():
    print("Usage python validate_firebase.py crypto_token")


if len(sys.argv) != 2:
    usage()
    exit(1)

crypto_token_path = sys.argv[1]
if not os.path.isfile(crypto_token_path):
    print(f"Expected crypto token file {crypto_token_path}")
    usage()
    exit(1)

firebase_client = firebase_util.init_firebase_client(crypto_token_path)
validate_documents("systemMessages", model.validate_SystemMessage_doc)
validate_documents("suggestedReplies", model.validate_SuggestedReply_doc)
validate_documents("conversationTags", custom.validate_ConversationTag)
validate_documents("messageTags", custom.validate_MessageTag)
validate_documents("nook_conversations", custom.validate_Conversation)

log.info(f"Validation complete")
コード例 #17
0
        help="Directory to read the automated analysis outputs from")

    args = parser.parse_args()

    user = args.user
    pipeline_run_mode = args.pipeline_run_mode
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path
    run_id = args.run_id

    production_csv_input_path = args.production_csv_input_path
    messages_csv_input_path = args.messages_csv_input_path
    individuals_csv_input_path = args.individuals_csv_input_path
    automated_analysis_input_dir = args.automated_analysis_input_dir

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    # Upload to Google Drive, if requested.
    if pipeline_configuration.drive_upload is not None:
        log.info(f"Downloading Google Drive service account credentials...")
        credentials_info = json.loads(
            google_cloud_utils.download_blob_to_string(
                google_cloud_credentials_file_path, pipeline_configuration.
                drive_upload.drive_credentials_file_url))
        drive_client_wrapper.init_client_from_info(credentials_info)
コード例 #18
0
                        help="Identifier of this pipeline run")
    parser.add_argument("memory_profile_file_path", metavar="memory-profile-file-path",
                        help="Path to the memory profile log file to upload")
    parser.add_argument("data_archive_file_path", metavar="data-archive-file-path",
                        help="Path to the data archive file to upload")

    args = parser.parse_args()

    user = args.user
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path
    run_id = args.run_id
    memory_profile_file_path = args.memory_profile_file_path
    data_archive_file_path = args.data_archive_file_path

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(f)
        
    memory_profile_upload_location = f"{pipeline_configuration.memory_profile_upload_url_prefix}{run_id}.profile"
    log.info(f"Uploading the memory profile from {memory_profile_file_path} to "
             f"{memory_profile_upload_location}...")
    with open(memory_profile_file_path, "rb") as f:
        google_cloud_utils.upload_file_to_blob(
            google_cloud_credentials_file_path, memory_profile_upload_location, f
        )

    data_archive_upload_location = f"{pipeline_configuration.data_archive_upload_url_prefix}{run_id}.tar.gzip"
    log.info(f"Uploading the data archive from {data_archive_file_path} to "
             f"{data_archive_upload_location}...")
    with open(data_archive_file_path, "rb") as f:
コード例 #19
0
    args = parser.parse_args()

    user = args.user
    pipeline_configuration_file_path = args.pipeline_configuration_file_path

    messages_json_input_path = args.messages_json_input_path
    individuals_json_input_path = args.individuals_json_input_path
    automated_analysis_output_dir = args.automated_analysis_output_dir

    IOUtils.ensure_dirs_exist(automated_analysis_output_dir)
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/maps/regions")
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/maps/districts")
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/maps/mogadishu")
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/graphs")

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    sys.setrecursionlimit(30000)
    # Read the messages dataset
    log.info(f"Loading the messages dataset from {messages_json_input_path}...")
    with open(messages_json_input_path) as f:
        messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f)
        for i in range(len(messages)):
            messages[i] = dict(messages[i].items())
    log.info(f"Loaded {len(messages)} messages")

    # Read the individuals dataset
コード例 #20
0
                                                   IGNORE_STOP)
    # prepare for writing to a json file that can be uploaded to firebase
    daily_metrics_list = []
    for day in daily_metrics:
        day_metrics = daily_metrics[day]
        day_metrics["__id"] = day
        day_metrics[
            "__reference_path"] = f"{DAILY_TAG_METRICS_COLLECTION_KEY}/{day}"
        day_metrics["__subcollections"] = []
        daily_metrics_list.append(day_metrics)
    daily_metrics_json = {DAILY_TAG_METRICS_COLLECTION_KEY: daily_metrics_list}

    daily_metrics_file = f"{OUTPUT_FOLDER}/nook-analysis-daily_metrics.json"
    with open(daily_metrics_file, mode="w", encoding='utf-8') as output_file:
        json.dump(daily_metrics_json, output_file, indent=2)
        log.info(
            f"compute_daily_tag_distribution saved to {daily_metrics_file}")

    total_counts = compute_total_counts(nook_conversations, IGNORE_STOP)
    # prepare for writing to a json file that can be uploaded to firebase
    total_counts["__id"] = TOTAL_COUNTS_METRICS_COLLECTION_KEY
    total_counts[
        "__reference_path"] = f"{TOTAL_COUNTS_METRICS_COLLECTION_KEY}/{TOTAL_COUNTS_METRICS_COLLECTION_KEY}"
    total_counts["__subcollections"] = []
    total_counts_json = {TOTAL_COUNTS_METRICS_COLLECTION_KEY: [total_counts]}

    total_counts_file = f"{OUTPUT_FOLDER}/nook-analysis-total_counts.json"
    with open(total_counts_file, mode="w", encoding='utf-8') as output_file:
        json.dump(total_counts_json, output_file, indent=2)
        log.info(f"compute_total_counts saved to {total_counts_file}")

    needs_reply_metrics = compute_needs_reply_metrics(nook_conversations)
コード例 #21
0
ファイル: migrate_firebase.py プロジェクト: larksystems/nook
    if (not reset_flag and not replay_flag):
        print(f"Unknown flag: {sys.argv[3]}")
        usage()
        exit(1)

if len(sys.argv) > 4:
    print(f"Unexpected argument {sys.argv[4]}")
    usage()
    exit(1)

doc_count = 0
skip_count = 0
migration_count = 0
firebase_client = firebase_util.init_firebase_client(crypto_token_path)
# migrate_collection("suggestedReplies",   migrate_nook_model.migrate_SuggestedReply)
migrate_collection("nook_conversations",
                   migrate_nook_model.migrate_Conversation)
migrate_collection("conversationTags", migrate_nook_model.migrate_Tag)
migrate_collection("messageTags", migrate_nook_model.migrate_Tag)

log.info(f"Migration complete")
log.info(f"  {migration_count} documents migrated")
log.info(f"  {skip_count} documents already migrated")
log.info(f"  {doc_count - skip_count - migration_count} documents unchanged")
if migrate_nook_model.warning_count == 0:
    log.info(f"  no warnings")
else:
    log.info(f"")
    log.info(f"  {migrate_nook_model.warning_count} WARNINGS")
    log.info(f"")
コード例 #22
0
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path

    raw_data_dir = args.raw_data_dir
    prev_coded_dir_path = args.prev_coded_dir_path

    messages_json_output_path = args.messages_json_output_path
    individuals_json_output_path = args.individuals_json_output_path
    icr_output_dir = args.icr_output_dir
    coded_dir_path = args.coded_dir_path
    csv_by_message_output_path = args.csv_by_message_output_path
    csv_by_individual_output_path = args.csv_by_individual_output_path
    production_csv_output_path = args.production_csv_output_path

    # Load the pipeline configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
        log.info(f"Running {pipeline_configuration.pipeline_name}")

    if pipeline_configuration.drive_upload is not None:
        log.info(f"Downloading Google Drive service account credentials...")
        credentials_info = json.loads(
            google_cloud_utils.download_blob_to_string(
                google_cloud_credentials_file_path, pipeline_configuration.
                drive_upload.drive_credentials_file_url))
        drive_client_wrapper.init_client_from_info(credentials_info)

    log.info("Loading the raw data...")
    data = LoadData.load_raw_data(user, raw_data_dir, pipeline_configuration)
コード例 #23
0
                        metavar="pipeline-configuration-file",
                        help="Path to the pipeline configuration json file")
    parser.add_argument(
        "avf_uuid_file_path",
        metavar="avf-uuid-file-path",
        help=
        "Path to a json file containing a list of avf uuids that it's safe to trigger "
        "the demog flow to")

    args = parser.parse_args()

    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path
    avf_uuid_file_path = args.avf_uuid_file_path

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    log.info("Downloading Rapid Pro access token...")
    rapid_pro_token = google_cloud_utils.download_blob_to_string(
        google_cloud_credentials_file_path, rapid_pro_token_url).strip()

    rapid_pro = RapidProClient(rapid_pro_domain, rapid_pro_token)

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
コード例 #24
0
ファイル: code_merge.py プロジェクト: lukechurch/CodaV2
                    metavar="merged-code-id",
                    help="Id of the code to merge the source codes to")
parser.add_argument(
    "messages_output_file_path",
    metavar="messages-output-file-path",
    help=
    "Path to the Coda messages file to write the messages to after performing the code merge"
)

args = parser.parse_args()
messages_input_file_path = args.messages_input_file_path
code_ids_to_merge = args.code_ids_to_merge
merged_code_id = args.merged_code_id
messages_output_file_path = args.messages_output_file_path

log.info(f"Loading Coda messages from '{messages_input_file_path}'...")
with open(messages_input_file_path) as f:
    messages = [Message.from_firebase_map(d) for d in json.load(f)]
log.info(f"Loaded {len(messages)} messages")

log.info(f"Performing merge ({code_ids_to_merge} -> '{merged_code_id}')...")
merged_count = 0  # A count of the number of labels that were remapped to the merged value, for sense-check logging
for msg in messages:
    processed_scheme_ids = set()
    for label in list(msg.labels):
        # Skip labels that are not the latest assignment under each scheme
        if label.scheme_id in processed_scheme_ids:
            continue
        processed_scheme_ids.add(label.scheme_id)

        if label.code_id in code_ids_to_merge:
コード例 #25
0
    pipeline_configuration_file_path = args.pipeline_configuration_file_path

    raw_data_dir = args.raw_data_dir
    prev_coded_dir_path = args.prev_coded_dir_path

    auto_coding_json_output_path = args.auto_coding_json_output_path
    messages_json_output_path = args.messages_json_output_path
    individuals_json_output_path = args.individuals_json_output_path
    icr_output_dir = args.icr_output_dir
    coded_dir_path = args.coded_dir_path
    csv_by_message_output_path = args.csv_by_message_output_path
    csv_by_individual_output_path = args.csv_by_individual_output_path
    production_csv_output_path = args.production_csv_output_path

    # Load the pipeline configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    log.info("Loading the raw data...")
    data = LoadData.load_raw_data(user, raw_data_dir, pipeline_configuration)

    log.info("Translating source Keys...")
    data = TranslateSourceKeys.translate_source_keys(user, data,
                                                     pipeline_configuration)

    if pipeline_configuration.move_ws_messages:
        log.info("Pre-filtering empty message objects...")
コード例 #26
0
    user = args.user
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path

    messages_json_input_path = args.messages_json_input_path
    individuals_json_input_path = args.individuals_json_input_path
    automated_analysis_output_dir = args.automated_analysis_output_dir

    IOUtils.ensure_dirs_exist(automated_analysis_output_dir)
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/maps/counties")
    IOUtils.ensure_dirs_exist(
        f"{automated_analysis_output_dir}/maps/constituencies")
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/maps/urban")
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/graphs")

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    # Read the messages dataset
    log.info(
        f"Loading the messages dataset from {messages_json_input_path}...")
    with open(messages_json_input_path) as f:
        messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f)
    log.info(f"Loaded {len(messages)} messages")

    # Read the individuals dataset
    log.info(
コード例 #27
0
    parser.add_argument(
        "csv_output_file_path",
        metavar="csv-output-file-path",
        help=
        "Path to a CSV file to write the contacts from the locations of interest to. "
        "Exported file is in a format suitable for direct upload to Rapid Pro")

    args = parser.parse_args()

    exclusion_list_file_path = args.exclusion_list_file_path
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path
    traced_data_paths = args.traced_data_paths
    csv_output_file_path = args.csv_output_file_path

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.
            phone_number_uuid_table.firebase_credentials_file_url))

    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials, "avf-phone-uuid-")
コード例 #28
0
                        help="Path to the pipeline configuration json file")
    parser.add_argument("column_to_de_identify", metavar="column-to-de-identify",
                        help="Name of the column containing phone numbers to be de-identified")
    parser.add_argument("de_identified_csv_output_path", metavar="de-identified-csv-output-path",
                        help="Path to write the de-identified CSV to")

    args = parser.parse_args()

    csv_input_path = args.csv_input_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    column_to_de_identify = args.column_to_de_identify
    de_identified_csv_output_path = args.de_identified_csv_output_path

    # Read the settings from the configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(f)

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(google_cloud_utils.download_blob_to_string(
        google_cloud_credentials_file_path,
        pipeline_configuration.phone_number_uuid_table.firebase_credentials_file_url
    ))

    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials,
        "avf-phone-uuid-"
    )
    log.info("Initialised the Firestore UUID table")
コード例 #29
0
    user = args.user
    pipeline_configuration_file_path = args.pipeline_configuration_file_path

    messages_json_input_path = args.messages_json_input_path
    individuals_json_input_path = args.individuals_json_input_path
    automated_analysis_output_dir = args.automated_analysis_output_dir

    IOUtils.ensure_dirs_exist(automated_analysis_output_dir)
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/maps/regions")
    IOUtils.ensure_dirs_exist(
        f"{automated_analysis_output_dir}/maps/districts")
    IOUtils.ensure_dirs_exist(
        f"{automated_analysis_output_dir}/maps/mogadishu")
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/graphs")

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    sys.setrecursionlimit(30000)
    # Read the messages dataset
    log.info(
        f"Loading the messages dataset from {messages_json_input_path}...")
    with open(messages_json_input_path) as f:
        messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f)
        for i in range(len(messages)):
            messages[i] = dict(messages[i].items())
    log.info(f"Loaded {len(messages)} messages")
コード例 #30
0
                        help="Directory to write the output graphs to")

    args = parser.parse_args()

    user = args.user
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path

    messages_json_input_path = args.messages_json_input_path
    individuals_json_input_path = args.individuals_json_input_path
    output_dir = args.output_dir

    IOUtils.ensure_dirs_exist(output_dir)
    IOUtils.ensure_dirs_exist(f"{output_dir}/graphs")

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)

    if pipeline_configuration.drive_upload is not None:
        log.info(f"Downloading Google Drive service account credentials...")
        credentials_info = json.loads(
            google_cloud_utils.download_blob_to_string(
                google_cloud_credentials_file_path, pipeline_configuration.
                drive_upload.drive_credentials_file_url))
        drive_client_wrapper.init_client_from_info(credentials_info)

    # Read the messages dataset
    log.info(
        f"Loading the messages dataset from {messages_json_input_path}...")