Example #1
0
    def test_update_iterable(self):
        data_dicts = [
            {"id": "A", "message": "hello"},
            {"id": "B", "message": "hello"},
            {"id": "A", "message": "hi"}
        ]
        data = [
            TracedData(d, Metadata("test_user", "data_generator", time.time()))
            for d in data_dicts
        ]

        updates_dicts = [
            {"id": "A", "gender": "male"},
            {"id": "B", "gender": "female", "age": 20}
        ]
        updates = [
            TracedData(d, Metadata("test_user", "data_generator", time.time()))
            for d in updates_dicts
        ]

        TracedData.update_iterable("test_user", "id", data, updates, "demographics")

        expected_dicts = [
            {"id": "A", "message": "hello", "gender": "male"},
            {"id": "B", "message": "hello", "gender": "female", "age": 20},
            {"id": "A", "message": "hi", "gender": "male"}
        ]

        for td, expected_dict in zip(data, expected_dicts):
            self.assertDictEqual(dict(td.items()), expected_dict)
    def combine_raw_datasets(user, messages_datasets, surveys_datasets):
        data = []

        for messages_dataset in messages_datasets:
            data.extend(messages_dataset)

        for surveys_dataset in surveys_datasets:
            TracedData.update_iterable(user, "avf_phone_id", data, surveys_dataset, "survey_responses")

        return data
    def combine_raw_datasets(user, shows_datasets, survey_datasets):
        data = []

        for show_dataset in shows_datasets:
            data.extend(show_dataset)

        for survey in survey_datasets:
            TracedData.update_iterable(user, "avf_phone_id", data, survey,
                                       "survey_responses")

        return data
    with open(demog_surveys_input_path, "r") as f:
        surveys = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Filter out people who haven't answered the fgd_cc consent question
    fgd_cc_consent_key = "Response_1 (Category) - wt_fgd_cc"
    fgd_cc_data = [td for td in fgd_cc_data if fgd_cc_consent_key in td]

    # Filter out people that we have exported in the past
    prev_contacts = {td["Phone Number"] for td in prev_exports}
    fgd_cc_data = [
        td for td in fgd_cc_data if "+{}".format(
            phone_uuids.get_phone(td["avf_phone_id"])) not in prev_contacts
    ]

    # Apply the demog surveys to the fgd_cc data
    TracedData.update_iterable(user, "avf_phone_id", fgd_cc_data, surveys,
                               "surveys")

    # Annotate fgd_cc_data with whether or not the respondent's is from Mogadishu
    mogadishu_districts = [
        "mogadishu",
        "mogadisho",  # TODO: Remove need for this by correcting Coda file
        "boondheere",
        "cabdiasis",
        "daynile",
        "dharkenley",
        "heliwa",
        "hodan",
        "hawl wadaag",
        "karaan",
        "shangaani",
        "shibis",
        1: "wt_s06e1_activation",
        2: "wt_s06e2_activation",
        3: "wt_s06e03_activation",
        4: "wt_s06e04_activation",
        5: "wt_s06e05_activation"
    }

    # Produce output columns for each input message
    all_messages = []
    all_show_keys = {1: set(), 2: set(), 3: set(), 4: set(), 5: set()}
    trustworthy_advisors_keys = set()
    outbreak_keys = set()
    trustworthy_advisors_raw_key = "Trustworthy_Advisors (Text) - wt_practice"
    for show_number, show_name in shows.items():
        show_messages = load_show(show_name)
        TracedData.update_iterable(user, "avf_phone_id", show_messages,
                                   surveys, "surveys")

        for td in show_messages:
            AnalysisKeys.set_analysis_keys(user, show_number, td)
            AnalysisKeys.set_matrix_analysis_keys(user,
                                                  all_show_keys[show_number],
                                                  show_number, td)

            AnalysisKeys.set_matrix_keys(
                user, td, trustworthy_advisors_keys,
                "{}_coded".format(trustworthy_advisors_raw_key),
                "trustworthy_advisors_clean")

            AnalysisKeys.set_matrix_keys(
                user, td, outbreak_keys,
                "{}_outbreak_coded".format(trustworthy_advisors_raw_key),
    )
    parser.add_argument(
        "json_output_path",
        metavar="json-output-path",
        help="Path to a JSON file to write processed messages to")

    args = parser.parse_args()
    user = args.user
    json_input_path = args.json_input_path
    survey_input_path = args.survey_input_path
    json_output_path = args.json_output_path

    # Load messages
    with open(json_input_path, "r") as f:
        messages = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Load surveys
    with open(survey_input_path, "r") as f:
        surveys = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Add survey data to the messages
    TracedData.update_iterable(user, "avf_phone_id", messages, surveys,
                               "survey_responses")

    # Write json output
    IOUtils.ensure_dirs_exist_for_file(json_output_path)
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(messages,
                                                             f,
                                                             pretty_print=True)