コード例 #1
0
    def test_join_iterables(self):
        data_1 = [
            TracedData(
                {"id": "A", "gender": "male", "age": 55},
                Metadata("test_user", Metadata.get_call_location(), time.time())
            ),
            TracedData(
                {"id": "B", "age": 19},
                Metadata("test_user", Metadata.get_call_location(), time.time())
            )
        ]

        data_2 = [
            TracedData(
                {"id": "C", "country": "Somalia"},
                Metadata("test_user", Metadata.get_call_location(), time.time())
            ),
            TracedData(
                {"id": "A", "country": "Kenya", "gender": "female"},
                Metadata("test_user", Metadata.get_call_location(), time.time())
            )
        ]

        # Joining should file because item with id 'A' has conflicting genders
        self.assertRaises(AssertionError, lambda: TracedData.join_iterables("test_user", "id", data_1, data_2, "data_2"))

        # Fix the gender conflict problem, and test that the join now works as expected.
        data_2[1].append_data({"gender": "male"}, Metadata("test_user", Metadata.get_call_location(), time.time()))
        merged = TracedData.join_iterables("test_user", "id", data_1, data_2, "data_2")

        merged_dicts = [dict(td.items()) for td in merged]
        expected_dicts = [
            {"id": "B", "age": 19},
            {"id": "C", "country": "Somalia"},
            {"id": "A", "gender": "male", "age": 55, "country": "Kenya"}
        ]
        
        self.assertEqual(len(merged_dicts), len(expected_dicts))

        for merged, expected in zip(merged_dicts, expected_dicts):
            self.assertDictEqual(merged, expected)

        # Modify data_1 to include multiple TracedData objects with the same join key, and ensure joining then fails.
        data_1[0].append_data({"id": "B"}, Metadata("test_user", Metadata.get_call_location(), time.time()))
        self.assertRaises(AssertionError, lambda: TracedData.join_iterables("test_user", "id", data_1, data_2, "data_2"))
コード例 #2
0
        somali.DemographicCleaner.clean_yes_no,
        "Cholera_Vaccination (Text) - wt_practice":
        somali.DemographicCleaner.clean_yes_no,
        "Trustworthy_Advisors (Text) - wt_practice": None
    }

    # Load data from JSON file
    with open(demog_1_input_path, "r") as f:
        demog_1_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)
    with open(demog_2_input_path, "r") as f:
        demog_2_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)
    with open(practice_input_path, "r") as f:
        practice_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Join the survey data on "avf_phone_id"
    demog_data = TracedData.join_iterables(user, "avf_phone_id", demog_1_data,
                                           demog_2_data, "wt_demog_2")
    all_survey_data = TracedData.join_iterables(user, "avf_phone_id",
                                                demog_data, practice_data,
                                                "wt_practice")

    # Clean the survey responses
    for td in all_survey_data:
        for key, cleaner in cleaning_plan.items():
            if cleaner is not None and key in td:
                td.append_data({"{}_clean".format(key): cleaner(td[key])},
                               Metadata(user, Metadata.get_call_location(),
                                        time.time()))

    # Mark missing entries in the raw data as true missing
    for td in all_survey_data:
        for key in cleaning_plan: