"json_output",
        metavar="json-output",
        help=
        "Path to write results of merging to, as a serialised TracedData JSON file",
        nargs=1)

    args = parser.parse_args()
    user = args.user[0]
    input_path_messages = args.input_messages[0]
    group = args.group[0]
    input_path_adverts = args.input_adverts[0]
    json_output_path = args.json_output[0]

    # Load data from JSON file
    with open(input_path_messages, "r") as f:
        messages_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Load data from JSON file
    with open(input_path_adverts, "r") as f:
        adverts_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Map "QUESTION_R" => "Message"
    for td in adverts_data:
        assert "QUESTION_R" in td.keys()
        td.append_data(
            {
                "Message": td["QUESTION_R"],
                "Date": td["start_date"],
                "Group": group
            }, Metadata(user, Metadata.get_call_location(), time.time()))
    fixup_table = json.load(open(fixup_table_path, 'r'))

    # print ("Loading code schemes")
    code_scheme_paths_list = [
        os.path.join(code_schemes_in_folder, f)
        for f in os.listdir(code_schemes_in_folder)
        if os.path.isfile(os.path.join(code_schemes_in_folder, f))
    ]

    code_schemes = {}
    for code_scheme_path in code_scheme_paths_list:
        scheme = json.load(open(code_scheme_path, 'r'))
        code_schemes[scheme["SchemeID"]] = scheme

    # print ("Loading demog data")
    demog_td = TracedDataJsonIO.import_json_to_traced_data_iterable(
        open(os.path.join(demogs_in_folder, "Demog_survey_with_id.json"), 'r'))

    # print ("Remapping demogs")
    for msg in demog_td:
        for demog_map in demog_maps:
            id = msg[demog_map["MessageId"]]
            dataset_in_principle = demog_map["Coda-Dataset"]
            remap(id, dataset_in_principle, fixup_table, code_schemes)

    # print ("Remapping messages")
    for message_map in message_maps:
        # print ("Loading message_map: {}".format(message_map["FileName"]))
        messages_td = TracedDataJsonIO.import_json_to_traced_data_iterable(
            open(os.path.join(messages_in_folder, message_map["FileName"]),
                 'r'))
    parser.add_argument("demogs_traced_json_output_path",
                        help="Path to updted survey TraceData JSON")

    args = parser.parse_args()
    user = args.user
    messages_input_path = args.messages_input_path
    surveys_input_path = args.surveys_input_path
    demogs_input_path = args.demogs_input_path
    messages_traced_json_output_path = args.messages_traced_json_output_path
    surveys_traced_json_output_path = args.surveys_traced_json_output_path
    demogs_traced_json_output_path = args.demogs_traced_json_output_path

    # Load messages
    for filename in os.listdir(messages_input_path):
        with open(os.path.join(messages_input_path, filename)) as f:
            messages = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

            # Create a set of all the keys appearing in the data
            keys = {key for message in messages for key in message.keys()}
            keys = list(keys)
            keys.remove("avf_phone_id")

            # Add group name to each key
            group_name = filename.split("_with_id.json")[0]
            create_unique_keys(messages, keys, group_name)

        # Output updated td-s
        message_output_path = os.path.join(
            messages_traced_json_output_path,
            "{}_updated_keys.json".format(group_name))
        with open(message_output_path, "w") as f:
    if pipeline_configuration.drive_upload is not None:
        log.info(f"Downloading Google Drive service account credentials...")
        credentials_info = json.loads(
            google_cloud_utils.download_blob_to_string(
                google_cloud_credentials_file_path, pipeline_configuration.
                drive_upload.drive_credentials_file_url))
        drive_client_wrapper.init_client_from_info(credentials_info)

    # Load messages
    messages_datasets = []
    for i, activation_flow_name in enumerate(
            pipeline_configuration.activation_flow_names):
        raw_activation_path = f"{raw_data_dir}/{activation_flow_name}.json"
        log.info(f"Loading {raw_activation_path}...")
        with open(raw_activation_path, "r") as f:
            messages = TracedDataJsonIO.import_json_to_traced_data_iterable(f)
        log.info(f"Loaded {len(messages)} messages")
        messages_datasets.append(messages)

    log.info("Loading surveys datasets:")
    surveys_datasets = []
    for i, survey_flow_name in enumerate(
            pipeline_configuration.survey_flow_names):
        raw_survey_path = f"{raw_data_dir}/{survey_flow_name}.json"
        log.info(f"Loading {raw_survey_path}...")
        with open(raw_survey_path, "r") as f:
            contacts = TracedDataJsonIO.import_json_to_traced_data_iterable(f)
        log.info(f"Loaded {len(contacts)} contacts")
        surveys_datasets.append(contacts)

    # Add survey data to the messages
Example #5
0
        "Path to input file to concatenate, containing a list of TracedData objects as JSON",
        nargs=1)
    parser.add_argument("json_output",
                        metavar="json-output",
                        help="Path to write results of cleaning to",
                        nargs=1)

    args = parser.parse_args()
    user = args.user[0]
    json_input_path_1 = args.json_input_1[0]
    json_input_path_2 = args.json_input_2[0]
    json_output_path = args.json_output[0]

    # Load data from JSON file
    with open(json_input_path_1, "r") as f:
        input_data_1 = TracedDataJsonIO.import_json_to_traced_data_iterable(f)
    with open(json_input_path_2, "r") as f:
        input_data_2 = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Concatenate files
    output_data = list(input_data_1)
    output_data.extend(input_data_2)

    # Write json output
    if os.path.dirname(json_output_path) is not "" and not os.path.exists(
            os.path.dirname(json_output_path)):
        os.makedirs(os.path.dirname(json_output_path))
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(output_data,
                                                             f,
                                                             pretty_print=True)
Example #6
0
        metavar="json-input-path",
        help=
        "Path to the input JSON file, containing a list of serialized TracedData objects"
    )
    parser.add_argument(
        "json_output_path",
        metavar="json-output-path",
        help="Path to a JSON file to write processed TracedData messages to")

    args = parser.parse_args()
    user = args.user
    json_input_path = args.json_input_path
    json_output_path = args.json_output_path

    # Load data from JSON file
    with open(json_input_path, "r") as f:
        data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # FIXME: Clean/filter messages

    # Write json output
    if os.path.dirname(json_output_path) is not "" and not os.path.exists(
            os.path.dirname(json_output_path)):
        os.makedirs(os.path.dirname(json_output_path))
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(data,
                                                             f,
                                                             pretty_print=True)

    # FIXME: Output to other formats for analysis, using the TracedData exporters in core_data_modules.traced_data.io
Example #7
0
    message_paths = [s02e01_input_path]

    # Load the pipeline configuration file
    print("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(f)

    # Load phone number <-> UUID table
    print("Loading Phone Number <-> UUID Table...")
    with open(phone_number_uuid_table_path, "r") as f:
        phone_number_uuid_table = PhoneNumberUuidTable.load(f)

    # Load demographics
    print("Loading Demographics 1/1...")
    with open(demog_input_path, "r") as f:
        demographics = TracedDataJsonIO.import_json_to_traced_data_iterable(f)
    print(f"Loaded {len(demographics)} contacts")

    # Load messages
    messages_datasets = []
    for i, path in enumerate(message_paths):
        print("Loading Episode {}/{}...".format(i + 1, len(message_paths)))
        with open(path, "r") as f:
            messages_datasets.append(TracedDataJsonIO.import_json_to_traced_data_iterable(f))
    
    # Add survey data to the messages
    print("Combining Datasets...")
    import argparse
import os
import random
    json_path = args.json_path

    rapid_pro = TembaClient(server, token)

    # Load the existing phone number <-> UUID table.
    if not os.path.exists(phone_uuid_path):
        raise FileNotFoundError(
            "No such phone uuid table file '{}'. "
            "To create a new, empty UUID table, "
            "run $ echo \"{{}}\" > <target-json-file>".format(phone_uuid_path))
    with open(phone_uuid_path, "r") as f:
        phone_uuids = PhoneNumberUuidTable.load(f)

    # Load the existing messages for week 4
    with open(json_path, "r") as f:
        week_4_messages = TracedDataJsonIO.import_json_to_traced_data_iterable(
            f)

    # Fetch messages in the S06e04 resend group
    mislocated_messages = rapid_pro.get_messages(label="S06e04_resend").all(
        retry_on_rate_exceed=True)
    print("Number of mislocated messages found: {}".format(
        len(mislocated_messages)))

    # Sort by ascending order of modification date.
    mislocated_messages = list(mislocated_messages)
    mislocated_messages.reverse()

    # Convert the mislocated messages to de-identified TracedData
    traced_messages = []
    for message in mislocated_messages:
        traced_messages.append(
    json_output_path = args.json_output_path
    fgd_csv_output_path = args.fgd_csv_output_path
    cc_csv_output_path = args.cc_csv_output_path
    prev_exports_path = args.prev_exports_path

    MINIMUM_AGE = 18
    TOTAL_CC_CONTACTS = 160
    TOTAL_FGD_CONTACTS = 100

    # Load phone uuid table
    with open(phone_uuid_table_path, "r") as f:
        phone_uuids = PhoneNumberUuidTable.load(f)

    # Load FGD/CC survey responses
    with open(fgd_cc_input_path, "r") as f:
        fgd_cc_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Load the previous export
    prev_exports = []
    if prev_exports_path is not None:
        with open(prev_exports_path, "r") as f:
            prev_exports = list(
                TracedDataCSVIO.import_csv_to_traced_data_iterable(user, f))

    # Load coded demog surveys
    with open(demog_surveys_input_path, "r") as f:
        surveys = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Filter out people who haven't answered the fgd_cc consent question
    fgd_cc_consent_key = "Response_1 (Category) - wt_fgd_cc"
    fgd_cc_data = [td for td in fgd_cc_data if fgd_cc_consent_key in td]
Example #10
0
        metavar="json-output-path",
        help="Path to a JSON file to write processed messages to")

    args = parser.parse_args()
    user = args.user
    messages_input_path = args.messages_input_path
    survey_input_path = args.survey_input_path
    demog_input_path = args.demog_input_path
    json_output_path = args.json_output_path

    # Load messages
    messages_datasets = []
    for filename in os.listdir(messages_input_path):
        with open(os.path.join(messages_input_path, filename)) as f:
            messages_datasets.append(
                TracedDataJsonIO.import_json_to_traced_data_iterable(f))

    # Load followup surveys
    survey_datasets = []
    for filename in os.listdir(survey_input_path):
        with open(os.path.join(survey_input_path, filename)) as f:
            survey_datasets.append(
                TracedDataJsonIO.import_json_to_traced_data_iterable(f))

    # Load demogs
    print("Loading Demographics...")
    with open(demog_input_path, "r") as f:
        demographics = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Add survey data to the messages
    print("Combining Datasets...")
        "Radio_Station (Text) - wt_demog_2": None,
        "Age (Text) - wt_demog_2": somali.DemographicCleaner.clean_age,
        "Education_Level (Text) - wt_demog_2": None,
        "Idp (Text) - wt_demog_2": somali.DemographicCleaner.clean_yes_no,
        "Origin_District (Text) - wt_demog_2":
        somali.DemographicCleaner.clean_somalia_district,
        "Household_Sickness (Text) - wt_practice":
        somali.DemographicCleaner.clean_yes_no,
        "Cholera_Vaccination (Text) - wt_practice":
        somali.DemographicCleaner.clean_yes_no,
        "Trustworthy_Advisors (Text) - wt_practice": None
    }

    # Load data from JSON file
    with open(demog_1_input_path, "r") as f:
        demog_1_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)
    with open(demog_2_input_path, "r") as f:
        demog_2_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)
    with open(practice_input_path, "r") as f:
        practice_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Join the survey data on "avf_phone_id"
    demog_data = TracedData.join_iterables(user, "avf_phone_id", demog_1_data,
                                           demog_2_data, "wt_demog_2")
    all_survey_data = TracedData.join_iterables(user, "avf_phone_id",
                                                demog_data, practice_data,
                                                "wt_practice")

    # Clean the survey responses
    for td in all_survey_data:
        for key, cleaner in cleaning_plan.items():