Ejemplos de IOUtils.ensure_dirs_exist_for_file en Python, ejemplos de core_data_modules.util.IOUtils.ensure_dirs_exist_for_file en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: fetch_raw_data.py Proyecto: AfricasVoices/Project-WorldVision

def fetch_from_recovery_csv(user, google_cloud_credentials_file_path,
                            raw_data_dir, phone_number_uuid_table,
                            recovery_csv_source):
    log.info("Fetching data from a Recovery CSV...")
    for blob_url in recovery_csv_source.activation_flow_urls + recovery_csv_source.survey_flow_urls:
        flow_name = blob_url.split('/')[-1].split('.')[
            0]  # Takes the name between the last '/' and the '.csv' ending
        traced_runs_output_path = f"{raw_data_dir}/{flow_name}.jsonl"
        if os.path.exists(traced_runs_output_path):
            log.info(
                f"File '{traced_runs_output_path}' for blob '{blob_url}' already exists; skipping download"
            )
            continue

        log.info(f"Downloading recovered data from '{blob_url}'...")
        raw_csv_string = StringIO(
            google_cloud_utils.download_blob_to_string(
                google_cloud_credentials_file_path, blob_url))
        raw_data = list(csv.DictReader(raw_csv_string))
        log.info(f"Downloaded {len(raw_data)} recovered messages")

        log.info("Converting the recovered messages to TracedData...")
        traced_runs = []
        for i, row in enumerate(raw_data):
            raw_date = row["ReceivedOn"]
            if len(raw_date) == len("dd/mm/YYYY HH:MM"):
                parsed_raw_date = datetime.strptime(raw_date, "%d/%m/%Y %H:%M")
            else:
                parsed_raw_date = datetime.strptime(raw_date,
                                                    "%d/%m/%Y %H:%M:%S")
            localized_date = pytz.timezone("Africa/Mogadishu").localize(
                parsed_raw_date)

            assert row["Sender"].startswith("avf-phone-uuid-"), \
                f"The 'Sender' column for '{blob_url} contains an item that has not been de-identified " \
                f"into Africa's Voices Foundation's de-identification format. This may be done with de_identify_csv.py."

            d = {
                "avf_phone_id": row["Sender"],
                "message": row["Message"],
                "received_on": localized_date.isoformat(),
                "run_id": SHAUtils.sha_dict(row)
            }

            traced_runs.append(
                TracedData(
                    d,
                    Metadata(user, Metadata.get_call_location(),
                             TimeUtils.utc_now_as_iso_string())))
        log.info("Converted the recovered messages to TracedData")

        log.info(
            f"Exporting {len(traced_runs)} TracedData items to {traced_runs_output_path}..."
        )
        IOUtils.ensure_dirs_exist_for_file(traced_runs_output_path)
        with open(traced_runs_output_path, "w") as f:
            TracedDataJsonIO.export_traced_data_iterable_to_jsonl(
                traced_runs, f)
        log.info(f"Exported TracedData")

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_io_utils.py Proyecto: AfricasVoices/CoreDataModules

    def test_ensure_dirs_exist_for_file(self):
        IOUtils.ensure_dirs_exist_for_file(path.join(self.test_dir, "x/y/test.txt"))
        self.assertTrue(path.exists(path.join(self.test_dir, "x/y")))
        self.assertFalse(path.exists(path.join(self.test_dir, "x/y/test.txt")))

        # Test method doesn't fail if no parent directories provided
        IOUtils.ensure_dirs_exist_for_file(path.join(self.test_dir, "test.txt"))
        IOUtils.ensure_dirs_exist_for_file("test.txt")

Ejemplo n.º 3

0

Mostrar archivo

Archivo: fetch_raw_data.py Proyecto: AfricasVoices/Project-WorldVision

def fetch_from_rapid_pro(user, google_cloud_credentials_file_path,
                         raw_data_dir, phone_number_uuid_table,
                         rapid_pro_source):
    log.info("Fetching data from Rapid Pro...")
    log.info("Downloading Rapid Pro access token...")
    rapid_pro_token = google_cloud_utils.download_blob_to_string(
        google_cloud_credentials_file_path,
        rapid_pro_source.token_file_url).strip()

    rapid_pro = RapidProClient(rapid_pro_source.domain, rapid_pro_token)

    # Load the previous export of contacts if it exists, otherwise fetch all contacts from Rapid Pro.
    raw_contacts_path = f"{raw_data_dir}/{rapid_pro_source.contacts_file_name}_raw.json"
    contacts_log_path = f"{raw_data_dir}/{rapid_pro_source.contacts_file_name}_log.jsonl"
    try:
        log.info(f"Loading raw contacts from file '{raw_contacts_path}'...")
        with open(raw_contacts_path) as raw_contacts_file:
            raw_contacts = [
                Contact.deserialize(contact_json)
                for contact_json in json.load(raw_contacts_file)
            ]
        log.info(f"Loaded {len(raw_contacts)} contacts")
    except FileNotFoundError:
        log.info(
            f"File '{raw_contacts_path}' not found, will fetch all contacts from the Rapid Pro server"
        )
        with open(contacts_log_path, "a") as contacts_log_file:
            raw_contacts = rapid_pro.get_raw_contacts(
                raw_export_log_file=contacts_log_file)

    # Download all the runs for each of the radio shows
    for flow in rapid_pro_source.activation_flow_names + rapid_pro_source.survey_flow_names:
        runs_log_path = f"{raw_data_dir}/{flow}_log.jsonl"
        raw_runs_path = f"{raw_data_dir}/{flow}_raw.json"
        traced_runs_output_path = f"{raw_data_dir}/{flow}.jsonl"
        log.info(f"Exporting flow '{flow}' to '{traced_runs_output_path}'...")

        flow_id = rapid_pro.get_flow_id(flow)

        # Load the previous export of runs for this flow, and update them with the newest runs.
        # If there is no previous export for this flow, fetch all the runs from Rapid Pro.
        with open(runs_log_path, "a") as raw_runs_log_file:
            try:
                log.info(f"Loading raw runs from file '{raw_runs_path}'...")
                with open(raw_runs_path) as raw_runs_file:
                    raw_runs = [
                        Run.deserialize(run_json)
                        for run_json in json.load(raw_runs_file)
                    ]
                log.info(f"Loaded {len(raw_runs)} runs")
                raw_runs = rapid_pro.update_raw_runs_with_latest_modified(
                    flow_id,
                    raw_runs,
                    raw_export_log_file=raw_runs_log_file,
                    ignore_archives=True)
            except FileNotFoundError:
                log.info(
                    f"File '{raw_runs_path}' not found, will fetch all runs from the Rapid Pro server for flow '{flow}'"
                )
                raw_runs = rapid_pro.get_raw_runs_for_flow_id(
                    flow_id, raw_export_log_file=raw_runs_log_file)

        # Fetch the latest contacts from Rapid Pro.
        with open(contacts_log_path, "a") as raw_contacts_log_file:
            raw_contacts = rapid_pro.update_raw_contacts_with_latest_modified(
                raw_contacts, raw_export_log_file=raw_contacts_log_file)

        # Convert the runs to TracedData.
        traced_runs = rapid_pro.convert_runs_to_traced_data(
            user, raw_runs, raw_contacts, phone_number_uuid_table,
            rapid_pro_source.test_contact_uuids)

        log.info(f"Saving {len(raw_runs)} raw runs to {raw_runs_path}...")
        with open(raw_runs_path, "w") as raw_runs_file:
            json.dump([run.serialize() for run in raw_runs], raw_runs_file)
        log.info(f"Saved {len(raw_runs)} raw runs")

        log.info(
            f"Saving {len(traced_runs)} traced runs to {traced_runs_output_path}..."
        )
        IOUtils.ensure_dirs_exist_for_file(traced_runs_output_path)
        with open(traced_runs_output_path, "w") as traced_runs_output_file:
            TracedDataJsonIO.export_traced_data_iterable_to_jsonl(
                traced_runs, traced_runs_output_file)
        log.info(f"Saved {len(traced_runs)} traced runs")

    log.info(
        f"Saving {len(raw_contacts)} raw contacts to file '{raw_contacts_path}'..."
    )
    with open(raw_contacts_path, "w") as raw_contacts_file:
        json.dump([contact.serialize() for contact in raw_contacts],
                  raw_contacts_file)
    log.info(f"Saved {len(raw_contacts)} contacts")

Ejemplo n.º 4

0

Mostrar archivo

Archivo: messages.py Proyecto: AfricasVoices/Project-MCF

        td.append_data({eat_key: eat_time},
                       Metadata(user, Metadata.get_call_location(),
                                time.time()))

        if START_TIME <= utc_time <= END_TIME:
            inside_time_window.append(td)
        else:
            print("Dropping: {}".format(utc_time))

    print("{}:{} Dropped as outside time/Total".format(
        len(show_messages) - len(inside_time_window), len(show_messages)))
    show_messages = inside_time_window

    # Output messages to a CSV file
    IOUtils.ensure_dirs_exist_for_file(csv_output_path)
    run_id_key = "{} (Run ID) - {}".format(variable_name, flow_name)
    raw_text_key = "{} (Text) - {}".format(variable_name, flow_name)
    with open(csv_output_path, "w") as f:
        TracedDataCSVIO.export_traced_data_iterable_to_csv(
            show_messages,
            f,
            headers=["avf_phone_id", run_id_key, raw_text_key])

    # Output messages to Coda
    IOUtils.ensure_dirs_exist_for_file(coda_output_path)
    if os.path.exists(prev_coda_path):
        # TODO: Modifying this line once the coding frame has been developed to include lots of Nones feels a bit
        # TODO: cumbersome. We could instead modify export_traced_data_iterable_to_coda to support a prev_f argument.
        # TODO: Modify by adding code scheme keys once they are ready
        scheme_keys = {

Ejemplo n.º 5

0

Mostrar archivo

Archivo: generate_outputs.py Proyecto: AfricasVoices/Project-TIS-Plus

    data = ProductionFile.generate(data, production_csv_output_path)

    if pipeline_run_mode == "all-stages":
        log.info("Running post labelling pipeline stages...")

        log.info("Applying Manual Codes from Coda...")
        data = ApplyManualCodes.apply_manual_codes(user, data,
                                                   prev_coded_dir_path)

        log.info("Generating Analysis CSVs...")
        messages_data, individuals_data = AnalysisFile.generate(
            user, data, csv_by_message_output_path,
            csv_by_individual_output_path)

        log.info("Writing messages TracedData to file...")
        IOUtils.ensure_dirs_exist_for_file(messages_json_output_path)
        with open(messages_json_output_path, "w") as f:
            TracedDataJsonIO.export_traced_data_iterable_to_jsonl(
                messages_data, f)

        log.info("Writing individuals TracedData to file...")
        IOUtils.ensure_dirs_exist_for_file(individuals_json_output_path)
        with open(individuals_json_output_path, "w") as f:
            TracedDataJsonIO.export_traced_data_iterable_to_jsonl(
                individuals_data, f)
    else:
        assert pipeline_run_mode == "auto-code-only", "pipeline run mode must be either auto-code-only or all-stages"
        log.info("Writing Auto-Coding TracedData to file...")
        IOUtils.ensure_dirs_exist_for_file(auto_coding_json_output_path)
        with open(auto_coding_json_output_path, "w") as f:
            TracedDataJsonIO.export_traced_data_iterable_to_jsonl(data, f)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: generate_outputs.py Proyecto: AfricasVoices/Project-LQ

    data = ProductionFile.generate(data, production_csv_output_path)

    log.info("Auto Coding Surveys...")
    data = AutoCodeSurveys.auto_code_surveys(user, data,
                                             phone_number_uuid_table,
                                             coded_dir_path)

    log.info("Applying Manual Codes from Coda...")
    data = ApplyManualCodes.apply_manual_codes(user, data, prev_coded_dir_path)

    log.info("Generating Analysis CSVs...")
    data = AnalysisFile.generate(user, data, csv_by_message_output_path,
                                 csv_by_individual_output_path)

    log.info("Writing TracedData to file...")
    IOUtils.ensure_dirs_exist_for_file(json_output_path)
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(data,
                                                             f,
                                                             pretty_print=True)

    # Upload to Google Drive, if requested.
    # Note: This should happen as late as possible in order to reduce the risk of the remainder of the pipeline failing
    # after a Drive upload has occurred. Failures could result in inconsistent outputs or outputs with no
    # traced data log.
    if pipeline_configuration.drive_upload is not None:
        log.info("Uploading CSVs to Google Drive...")

        production_csv_drive_dir = os.path.dirname(
            pipeline_configuration.drive_upload.production_upload_path)
        production_csv_drive_file_name = os.path.basename(

Ejemplo n.º 7

0

Mostrar archivo

Archivo: participation_maps.py Proyecto: AfricasVoices/CoreDataModules

def export_participation_maps(individuals, consent_withdrawn_field, theme_configurations, admin_region_configuration,
                              mapper, file_prefix, export_by_theme=True):
    """
    Computes and exports a map showing participation by administrative region.

    Optionally exports maps showing the participation broken down by theme.

    :param individuals: Individuals to export participation maps for.
    :type individuals: iterable of core_data_modules.traced_data.TracedData
    :param consent_withdrawn_field: Field in each individuals object which records if consent is withdrawn.
    :type consent_withdrawn_field: str
    :param theme_configurations: Configuration for the theme datasets.
    :type theme_configurations: iterable of core_data_modules.analysis.AnalysisConfiguration
    :param admin_region_configuration: Configuration for the administrative region labels, used to count the engagement
                                       by admin region for each map.
    :type admin_region_configuration: iterable of core_data_modules.analysis.AnalysisConfiguration
    :param mapper: A function which, given participation frequencies and a file name to export to, renders a map
                   of those frequencies to disk. For standard maps, see the mapper functions provided in
                   `core_data_modules.analysis.mapping`.
    :type mapper: func of (dict of str -> int, str) -> void
    :param file_prefix: The prefix of the path to write the files to, e.g. "/data/maps/mogadishu_"
    :type file_prefix: str
    :param export_by_theme: Whether to export a map of participation for each theme.
    :type export_by_theme: bool
    """
    IOUtils.ensure_dirs_exist_for_file(file_prefix)

    # Export a map showing the total participations
    log.info(f"Exporting map to '{file_prefix}total_participants.png'...")
    region_distributions = theme_distributions.compute_theme_distributions(
        individuals,
        consent_withdrawn_field,
        [admin_region_configuration],
        []
    )[admin_region_configuration.dataset_name]

    total_frequencies = dict()
    for region_code in _normal_codes(admin_region_configuration.code_scheme.codes):
        total_frequencies[region_code.string_value] = region_distributions[region_code.string_value]["Total Participants"]

    mapper(total_frequencies, f"{file_prefix}total_participants.png")

    if not export_by_theme:
        return

    # For each theme_configuration, export:
    #  1. A map showing the totals for individuals relevant to that episode.
    #  2. A map showing the totals for each theme
    distributions = theme_distributions.compute_theme_distributions(
        individuals, consent_withdrawn_field,
        theme_configurations,
        [admin_region_configuration]
    )

    for config in theme_configurations:
        map_index = 1
        log.info(f"Exporting map to '{file_prefix}{config.dataset_name}_{map_index}_total_relevant.png'...")
        config_total_frequencies = dict()
        for region_code in _normal_codes(admin_region_configuration.code_scheme.codes):
            config_total_frequencies[region_code.string_value] = distributions[config.dataset_name][
                "Total Relevant Participants"][f"{admin_region_configuration.dataset_name}:{region_code.string_value}"]

        mapper(config_total_frequencies, f"{file_prefix}{config.dataset_name}_{map_index}_total_relevant.png")

        for theme in _normal_codes(config.code_scheme.codes):
            map_index += 1
            log.info(f"Exporting map to '{file_prefix}{config.dataset_name}_{map_index}_{theme.string_value}.png'...")
            theme_frequencies = dict()
            for region_code in _normal_codes(admin_region_configuration.code_scheme.codes):
                theme_frequencies[region_code.string_value] = distributions[config.dataset_name][theme.string_value][
                    f"{admin_region_configuration.dataset_name}:{region_code.string_value}"]

            mapper(theme_frequencies, f"{file_prefix}{config.dataset_name}_{map_index}_{theme.string_value}.png")

Ejemplo n.º 8

0

Mostrar archivo

def fetch_from_rapid_pro(user, google_cloud_credentials_file_path, raw_data_dir, phone_number_uuid_table,
                         rapid_pro_source):
    log.info("Fetching data from Rapid Pro...")
    log.info("Downloading Rapid Pro access token...")
    rapid_pro_token = google_cloud_utils.download_blob_to_string(
        google_cloud_credentials_file_path, rapid_pro_source.token_file_url).strip()

    rapid_pro = RapidProClient(rapid_pro_source.domain, rapid_pro_token)

    # Load the previous export of contacts if it exists, otherwise fetch all contacts from Rapid Pro.
    raw_contacts_path = f"{raw_data_dir}/{rapid_pro_source.contacts_file_name}_raw.json"
    contacts_log_path = f"{raw_data_dir}/{rapid_pro_source.contacts_file_name}_log.jsonl"
    try:
        log.info(f"Loading raw contacts from file '{raw_contacts_path}'...")
        with open(raw_contacts_path) as raw_contacts_file:
            raw_contacts = [Contact.deserialize(contact_json) for contact_json in json.load(raw_contacts_file)]
        log.info(f"Loaded {len(raw_contacts)} contacts")
    except FileNotFoundError:
        log.info(f"File '{raw_contacts_path}' not found, will fetch all contacts from the Rapid Pro server")
        with open(contacts_log_path, "a") as contacts_log_file:
            raw_contacts = rapid_pro.get_raw_contacts(raw_export_log_file=contacts_log_file)

    # Download all the runs for each of the radio shows
    for flow in rapid_pro_source.activation_flow_names + rapid_pro_source.survey_flow_names:
        runs_log_path = f"{raw_data_dir}/{flow}_log.jsonl"
        raw_runs_path = f"{raw_data_dir}/{flow}_raw.json"
        traced_runs_output_path = f"{raw_data_dir}/{flow}.jsonl"
        log.info(f"Exporting flow '{flow}' to '{traced_runs_output_path}'...")

        flow_id = rapid_pro.get_flow_id(flow)

        # Load the previous export of runs for this flow, and update them with the newest runs.
        # If there is no previous export for this flow, fetch all the runs from Rapid Pro.
        with open(runs_log_path, "a") as raw_runs_log_file:
            try:
                log.info(f"Loading raw runs from file '{raw_runs_path}'...")
                with open(raw_runs_path) as raw_runs_file:
                    raw_runs = [Run.deserialize(run_json) for run_json in json.load(raw_runs_file)]
                log.info(f"Loaded {len(raw_runs)} runs")
                raw_runs = rapid_pro.update_raw_runs_with_latest_modified(
                    flow_id, raw_runs, raw_export_log_file=raw_runs_log_file, ignore_archives=True)
            except FileNotFoundError:
                log.info(f"File '{raw_runs_path}' not found, will fetch all runs from the Rapid Pro server for flow '{flow}'")
                raw_runs = rapid_pro.get_raw_runs_for_flow_id(flow_id, raw_export_log_file=raw_runs_log_file)

        # Fetch the latest contacts from Rapid Pro.
        with open(contacts_log_path, "a") as raw_contacts_log_file:
            raw_contacts = rapid_pro.update_raw_contacts_with_latest_modified(raw_contacts,
                                                                              raw_export_log_file=raw_contacts_log_file)

        # Convert the runs to TracedData.
        traced_runs = rapid_pro.convert_runs_to_traced_data(
            user, raw_runs, raw_contacts, phone_number_uuid_table, rapid_pro_source.test_contact_uuids)

        if flow in rapid_pro_source.activation_flow_names:
            # Append the Rapid Pro source name to each run.
            # Only do this for activation flows because this is the only place where this is interesting.
            # Also, demogs may come from either instance, which causes problems downstream.
            for td in traced_runs:
                td.append_data({
                    "source_raw": rapid_pro_source.source_name,
                    "source_coded": CleaningUtils.make_label_from_cleaner_code(
                        CodeSchemes.SOURCE, CodeSchemes.SOURCE.get_code_with_match_value(rapid_pro_source.source_name),
                        Metadata.get_call_location()
                    ).to_dict()
                }, Metadata(user, Metadata.get_call_location(), TimeUtils.utc_now_as_iso_string()))

        log.info(f"Saving {len(raw_runs)} raw runs to {raw_runs_path}...")
        with open(raw_runs_path, "w") as raw_runs_file:
            json.dump([run.serialize() for run in raw_runs], raw_runs_file)
        log.info(f"Saved {len(raw_runs)} raw runs")

        log.info(f"Saving {len(traced_runs)} traced runs to {traced_runs_output_path}...")
        IOUtils.ensure_dirs_exist_for_file(traced_runs_output_path)
        with open(traced_runs_output_path, "w") as traced_runs_output_file:
            TracedDataJsonIO.export_traced_data_iterable_to_jsonl(traced_runs, traced_runs_output_file)
        log.info(f"Saved {len(traced_runs)} traced runs")

    log.info(f"Saving {len(raw_contacts)} raw contacts to file '{raw_contacts_path}'...")
    with open(raw_contacts_path, "w") as raw_contacts_file:
        json.dump([contact.serialize() for contact in raw_contacts], raw_contacts_file)
    log.info(f"Saved {len(raw_contacts)} contacts")

Ejemplo n.º 9

0

Mostrar archivo

Archivo: fetch_raw_data.py Proyecto: AfricasVoices/Project-TIS-Plus

def fetch_from_facebook(user, google_cloud_credentials_file_path, raw_data_dir,
                        facebook_uuid_table, facebook_source):
    log.info("Fetching data from Facebook...")
    log.info("Downloading Facebook access token...")
    facebook_token = google_cloud_utils.download_blob_to_string(
        google_cloud_credentials_file_path,
        facebook_source.token_file_url).strip()

    facebook = FacebookClient(facebook_token)

    for dataset in facebook_source.datasets:
        log.info(f"Exporting comments for dataset {dataset.name}...")
        raw_comments_output_path = f"{raw_data_dir}/{dataset.name}_raw.json"
        traced_comments_output_path = f"{raw_data_dir}/{dataset.name}.jsonl"

        # Download all the comments on all the posts in this dataset, logging the raw data returned by Facebook.
        raw_comments = []
        for post_id in dataset.post_ids:
            comments_log_path = f"{raw_data_dir}/{post_id}_comments_log.jsonl"
            with open(comments_log_path, "a") as raw_comments_log_file:
                post_comments = facebook.get_all_comments_on_post(
                    post_id,
                    raw_export_log_file=raw_comments_log_file,
                    fields=[
                        "from{id}", "parent", "attachments", "created_time",
                        "message"
                    ])

            # Download the post and add it as context to all the comments. Adding a reference to the post under
            # which a comment was made enables downstream features such as post-type labelling and comment context
            # in Coda, as well as allowing us to track how many comments were made on each post.
            post = facebook.get_post(post_id, fields=["attachments"])
            for comment in post_comments:
                comment["post"] = post

            raw_comments.extend(post_comments)

        # Facebook only returns a parent if the comment is a reply to another comment.
        # If there is no parent, set one to the empty-dict.
        for comment in raw_comments:
            if "parent" not in comment:
                comment["parent"] = {}

        # Convert the comments to TracedData.
        traced_comments = facebook.convert_facebook_comments_to_traced_data(
            user, dataset.name, raw_comments, facebook_uuid_table)

        # Export to disk.
        log.info(
            f"Saving {len(raw_comments)} raw comments to {raw_comments_output_path}..."
        )
        IOUtils.ensure_dirs_exist_for_file(raw_comments_output_path)
        with open(raw_comments_output_path, "w") as raw_comments_output_file:
            json.dump(raw_comments, raw_comments_output_file)
        log.info(f"Saved {len(raw_comments)} raw comments")

        log.info(
            f"Saving {len(traced_comments)} traced comments to {traced_comments_output_path}..."
        )
        IOUtils.ensure_dirs_exist_for_file(traced_comments_output_path)
        with open(traced_comments_output_path,
                  "w") as traced_comments_output_file:
            TracedDataJsonIO.export_traced_data_iterable_to_jsonl(
                traced_comments, traced_comments_output_file)
        log.info(f"Saved {len(traced_comments)} traced comments")

Ejemplo n.º 10

0

Mostrar archivo

                raw_runs = rapid_pro.update_raw_runs_with_latest_modified(
                    flow_id, raw_runs, raw_export_log_file=raw_runs_log_file)
            except FileNotFoundError:
                log.info(f"File '{raw_runs_path}' not found, will fetch all runs from the Rapid Pro server for flow '{flow}'")
                raw_runs = rapid_pro.get_raw_runs_for_flow_id(flow_id, raw_export_log_file=raw_runs_log_file)

        # Fetch the latest contacts from Rapid Pro.
        with open(contacts_log_path, "a") as raw_contacts_log_file:
            raw_contacts = rapid_pro.update_raw_contacts_with_latest_modified(raw_contacts,
                                                                              raw_export_log_file=raw_contacts_log_file)

        # Convert the runs to TracedData.
        traced_runs = rapid_pro.convert_runs_to_traced_data(
            user, raw_runs, raw_contacts, phone_number_uuid_table, pipeline_configuration.rapid_pro_test_contact_uuids)

        log.info(f"Saving {len(raw_runs)} raw runs to {raw_runs_path}...")
        with open(raw_runs_path, "w") as raw_runs_file:
            json.dump([run.serialize() for run in raw_runs], raw_runs_file)
        log.info(f"Saved {len(raw_runs)} raw runs")

        log.info(f"Saving {len(traced_runs)} traced runs to {traced_runs_output_path}...")
        IOUtils.ensure_dirs_exist_for_file(traced_runs_output_path)
        with open(traced_runs_output_path, "w") as traced_runs_output_file:
            TracedDataJsonIO.export_traced_data_iterable_to_json(traced_runs, traced_runs_output_file, pretty_print=True)
        log.info(f"Saved {len(traced_runs)} traced runs")

    log.info(f"Saving {len(raw_contacts)} raw contacts to file '{raw_contacts_path}'...")
    with open(raw_contacts_path, "w") as raw_contacts_file:
        json.dump([contact.serialize() for contact in raw_contacts], raw_contacts_file)
    log.info(f"Saved {len(raw_contacts)} contacts")