def test_join_iterables(self): data_1 = [ TracedData( {"id": "A", "gender": "male", "age": 55}, Metadata("test_user", Metadata.get_call_location(), time.time()) ), TracedData( {"id": "B", "age": 19}, Metadata("test_user", Metadata.get_call_location(), time.time()) ) ] data_2 = [ TracedData( {"id": "C", "country": "Somalia"}, Metadata("test_user", Metadata.get_call_location(), time.time()) ), TracedData( {"id": "A", "country": "Kenya", "gender": "female"}, Metadata("test_user", Metadata.get_call_location(), time.time()) ) ] # Joining should file because item with id 'A' has conflicting genders self.assertRaises(AssertionError, lambda: TracedData.join_iterables("test_user", "id", data_1, data_2, "data_2")) # Fix the gender conflict problem, and test that the join now works as expected. data_2[1].append_data({"gender": "male"}, Metadata("test_user", Metadata.get_call_location(), time.time())) merged = TracedData.join_iterables("test_user", "id", data_1, data_2, "data_2") merged_dicts = [dict(td.items()) for td in merged] expected_dicts = [ {"id": "B", "age": 19}, {"id": "C", "country": "Somalia"}, {"id": "A", "gender": "male", "age": 55, "country": "Kenya"} ] self.assertEqual(len(merged_dicts), len(expected_dicts)) for merged, expected in zip(merged_dicts, expected_dicts): self.assertDictEqual(merged, expected) # Modify data_1 to include multiple TracedData objects with the same join key, and ensure joining then fails. data_1[0].append_data({"id": "B"}, Metadata("test_user", Metadata.get_call_location(), time.time())) self.assertRaises(AssertionError, lambda: TracedData.join_iterables("test_user", "id", data_1, data_2, "data_2"))
somali.DemographicCleaner.clean_yes_no, "Cholera_Vaccination (Text) - wt_practice": somali.DemographicCleaner.clean_yes_no, "Trustworthy_Advisors (Text) - wt_practice": None } # Load data from JSON file with open(demog_1_input_path, "r") as f: demog_1_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f) with open(demog_2_input_path, "r") as f: demog_2_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f) with open(practice_input_path, "r") as f: practice_data = TracedDataJsonIO.import_json_to_traced_data_iterable(f) # Join the survey data on "avf_phone_id" demog_data = TracedData.join_iterables(user, "avf_phone_id", demog_1_data, demog_2_data, "wt_demog_2") all_survey_data = TracedData.join_iterables(user, "avf_phone_id", demog_data, practice_data, "wt_practice") # Clean the survey responses for td in all_survey_data: for key, cleaner in cleaning_plan.items(): if cleaner is not None and key in td: td.append_data({"{}_clean".format(key): cleaner(td[key])}, Metadata(user, Metadata.get_call_location(), time.time())) # Mark missing entries in the raw data as true missing for td in all_survey_data: for key in cleaning_plan: