Esempio n. 1
0
            "Code 4": None
        }
        with open(coda_output_path, "w") as f, open(prev_coda_path,
                                                    "r") as prev_f:
            TracedDataCodaIO.export_traced_data_iterable_to_coda_with_scheme(
                show_messages, show_message_key, scheme_keys, f, prev_f=prev_f)
    else:
        with open(coda_output_path, "w") as f:
            TracedDataCodaIO.export_traced_data_iterable_to_coda(
                show_messages, show_message_key, f)

    # Randomly select some messages to export for ICR
    random.seed(0)
    random.shuffle(show_messages)
    icr_messages = show_messages[:ICR_MESSAGES_COUNT]

    # Output ICR data to a CSV file
    run_id_key = "{} (Run ID) - {}".format(variable_name, flow_name)
    raw_text_key = "{} (Text) - {}".format(variable_name, flow_name)
    IOUtils.ensure_dirs_exist_for_file(icr_output_path)
    with open(icr_output_path, "w") as f:
        TracedDataCSVIO.export_traced_data_iterable_to_csv(
            icr_messages, f, headers=[run_id_key, raw_text_key])

    # Output to JSON
    IOUtils.ensure_dirs_exist_for_file(json_output_path)
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(show_messages,
                                                             f,
                                                             pretty_print=True)
Esempio n. 2
0
    # Load data from JSON file
    with open(input_path_messages, "r") as f:
        messages_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Load data from JSON file
    with open(input_path_adverts, "r") as f:
        adverts_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Map "QUESTION_R" => "Message"
    for td in adverts_data:
        assert "QUESTION_R" in td.keys()
        td.append_data(
            {
                "Message": td["QUESTION_R"],
                "Date": td["start_date"],
                "Group": group
            }, Metadata(user, Metadata.get_call_location(), time.time()))

    merged_data = list(messages_data)
    merged_data.extend(adverts_data)

    # Write json output
    if os.path.dirname(json_output_path) is not "" and not os.path.exists(
            os.path.dirname(json_output_path)):
        os.makedirs(os.path.dirname(json_output_path))
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(merged_data,
                                                             f,
                                                             pretty_print=True)
Esempio n. 3
0
                raw_runs = rapid_pro.update_raw_runs_with_latest_modified(
                    flow_id, raw_runs, raw_export_log_file=raw_runs_log_file)
            except FileNotFoundError:
                log.info(f"File '{raw_runs_path}' not found, will fetch all runs from the Rapid Pro server for flow '{flow}'")
                raw_runs = rapid_pro.get_raw_runs_for_flow_id(flow_id, raw_export_log_file=raw_runs_log_file)

        # Fetch the latest contacts from Rapid Pro.
        with open(contacts_log_path, "a") as raw_contacts_log_file:
            raw_contacts = rapid_pro.update_raw_contacts_with_latest_modified(raw_contacts,
                                                                              raw_export_log_file=raw_contacts_log_file)

        # Convert the runs to TracedData.
        traced_runs = rapid_pro.convert_runs_to_traced_data(
            user, raw_runs, raw_contacts, phone_number_uuid_table, pipeline_configuration.rapid_pro_test_contact_uuids)

        log.info(f"Saving {len(raw_runs)} raw runs to {raw_runs_path}...")
        with open(raw_runs_path, "w") as raw_runs_file:
            json.dump([run.serialize() for run in raw_runs], raw_runs_file)
        log.info(f"Saved {len(raw_runs)} raw runs")

        log.info(f"Saving {len(traced_runs)} traced runs to {traced_runs_output_path}...")
        IOUtils.ensure_dirs_exist_for_file(traced_runs_output_path)
        with open(traced_runs_output_path, "w") as traced_runs_output_file:
            TracedDataJsonIO.export_traced_data_iterable_to_json(traced_runs, traced_runs_output_file, pretty_print=True)
        log.info(f"Saved {len(traced_runs)} traced runs")

    log.info(f"Saving {len(raw_contacts)} raw contacts to file '{raw_contacts_path}'...")
    with open(raw_contacts_path, "w") as raw_contacts_file:
        json.dump([contact.serialize() for contact in raw_contacts], raw_contacts_file)
    log.info(f"Saved {len(raw_contacts)} contacts")
            # Create a set of all the keys appearing in the data
            keys = {key for message in messages for key in message.keys()}
            keys = list(keys)
            keys.remove("avf_phone_id")

            # Add group name to each key
            group_name = filename.split("_with_id.json")[0]
            create_unique_keys(messages, keys, group_name)

        # Output updated td-s
        message_output_path = os.path.join(
            messages_traced_json_output_path,
            "{}_updated_keys.json".format(group_name))
        with open(message_output_path, "w") as f:
            TracedDataJsonIO.export_traced_data_iterable_to_json(
                messages, f, pretty_print=True)

    # Load surveys
    for filename in os.listdir(surveys_input_path):
        with open(os.path.join(surveys_input_path, filename)) as f:
            surveys = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

            # Create a set of all the keys appearing in the data
            keys = {key for survey in surveys for key in survey.keys()}
            keys = list(keys)
            keys.remove("avf_phone_id")

            # Add group name to each key
            group_name = filename.split("_with_id.json")[0]
            create_unique_keys(surveys, keys, group_name)
                               Metadata(user, Metadata.get_call_location(),
                                        time.time()))

    # Mark missing entries in the raw data as true missing
    for td in all_survey_data:
        for key in cleaning_plan:
            if key not in td:
                td.append_data({key: Codes.TRUE_MISSING},
                               Metadata(user, Metadata.get_call_location(),
                                        time.time()))

    # Write json output
    IOUtils.ensure_dirs_exist_for_file(json_output_path)
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(all_survey_data,
                                                             f,
                                                             pretty_print=True)

    # Output for manual verification + coding
    IOUtils.ensure_dirs_exist(coded_output_path)
    # TODO: Tidy up the usage of keys here once the format of the keys has been updated.
    for key in cleaning_plan.keys():
        coded_output_file_path = path.join(coded_output_path,
                                           "{}.csv".format(key.split(" ")[0]))
        prev_coded_output_file_path = path.join(
            prev_coded_path, "{}_coded.csv".format(key.split(" ")[0]))

        if os.path.exists(prev_coded_output_file_path):
            with open(coded_output_file_path,
                      "w") as f, open(prev_coded_output_file_path,
                                      "r") as prev_f: