def test_csv_export_with_df_size_limit(self): """ To fix pandas limitation of 30k rows on csv export, we specify a max number of records in a dataframe on export - lets test it """ self._publish_single_level_repeat_form() # submit 7 instances for i in range(4): self._submit_fixture_instance("new_repeats", "01") self._submit_fixture_instance("new_repeats", "02") for i in range(2): self._submit_fixture_instance("new_repeats", "01") csv_df_builder = CSVDataFrameBuilder(self.user.username, self.xform.id_string) record_count = csv_df_builder._query_mongo(count=True) self.assertEqual(record_count, 7) temp_file = NamedTemporaryFile(suffix=".csv", delete=False) csv_df_builder.export_to(temp_file.name, data_frame_max_size=3) csv_file = open(temp_file.name) csv_reader = csv.reader(csv_file) header = csv_reader.next() self.assertEqual( len(header), 17 + len(AbstractDataFrameBuilder.ADDITIONAL_COLUMNS)) rows = [] for row in csv_reader: rows.append(row) self.assertEqual(len(rows), 7) self.assertEqual(rows[4][5], NA_REP) # close and delete file csv_file.close() os.unlink(temp_file.name)
def test_csv_export_with_df_size_limit(self): """ To fix pandas limitation of 30k rows on csv export, we specify a max number of records in a dataframe on export - lets test it """ self._publish_single_level_repeat_form() # submit 7 instances for i in range(4): self._submit_fixture_instance("new_repeats", "01") self._submit_fixture_instance("new_repeats", "02") for i in range(2): self._submit_fixture_instance("new_repeats", "01") csv_df_builder = CSVDataFrameBuilder(self.user.username, self.xform.id_string) record_count = csv_df_builder._query_mongo(count=True) self.assertEqual(record_count, 7) temp_file = NamedTemporaryFile(suffix=".csv", delete=False) csv_df_builder.export_to(temp_file.name, data_frame_max_size=3) csv_file = open(temp_file.name) csv_reader = csv.reader(csv_file) header = csv_reader.next() self.assertEqual( len(header), 17 + len(AbstractDataFrameBuilder.ADDITIONAL_COLUMNS)) rows = [] for row in csv_reader: rows.append(row) self.assertEqual(len(rows), 7) self.assertEqual(rows[4][5], NA_REP) # close and delete file csv_file.close() os.unlink(temp_file.name)
def test_split_select_multiples(self): self._publish_nested_repeats_form() dd = self.xform.data_dictionary() self._submit_fixture_instance("nested_repeats", "01") csv_df_builder = CSVDataFrameBuilder(self.user.username, self.xform.id_string) cursor = csv_df_builder._query_mongo() record = cursor[0] select_multiples = CSVDataFrameBuilder._collect_select_multiples(dd) result = CSVDataFrameBuilder._split_select_multiples(record, select_multiples) expected_result = { u'web_browsers/ie': True, u'web_browsers/safari': True, u'web_browsers/firefox': False, u'web_browsers/chrome': False } # build a new dictionary only composed of the keys we want to use in # the comparison result = dict([(key, result[key]) for key in result.keys() if key in expected_result.keys()]) self.assertEqual(expected_result, result) csv_df_builder = CSVDataFrameBuilder(self.user.username, self.xform.id_string, binary_select_multiples=True) result = csv_df_builder._split_select_multiples(record, select_multiples) expected_result = { u'web_browsers/ie': 1, u'web_browsers/safari': 1, u'web_browsers/firefox': 0, u'web_browsers/chrome': 0 } # build a new dictionary only composed of the keys we want to use in # the comparison result = dict([(key, result[key]) for key in result.keys() if key in expected_result.keys()]) self.assertEqual(expected_result, result)
def test_split_select_multiples(self): self._publish_nested_repeats_form() dd = self.xform.data_dictionary() self._submit_fixture_instance("nested_repeats", "01") csv_df_builder = CSVDataFrameBuilder(self.user.username, self.xform.id_string) cursor = csv_df_builder._query_mongo() record = cursor[0] select_multiples = CSVDataFrameBuilder._collect_select_multiples(dd) result = CSVDataFrameBuilder._split_select_multiples(record, select_multiples) expected_result = { u'web_browsers/ie': True, u'web_browsers/safari': True, u'web_browsers/firefox': False, u'web_browsers/chrome': False } # build a new dictionary only composed of the keys we want to use in # the comparison result = dict([(key, result[key]) for key in result.keys() if key in expected_result.keys()]) self.assertEqual(expected_result, result) csv_df_builder = CSVDataFrameBuilder(self.user.username, self.xform.id_string, binary_select_multiples=True) result = csv_df_builder._split_select_multiples(record, select_multiples) expected_result = { u'web_browsers/ie': 1, u'web_browsers/safari': 1, u'web_browsers/firefox': 0, u'web_browsers/chrome': 0 } # build a new dictionary only composed of the keys we want to use in # the comparison result = dict([(key, result[key]) for key in result.keys() if key in expected_result.keys()]) self.assertEqual(expected_result, result)
def _csv_data_for_dataframe(self): csv_df_builder = CSVDataFrameBuilder(self.user.username, self.xform.id_string) cursor = csv_df_builder._query_mongo() return csv_df_builder._format_for_dataframe(cursor)
def _csv_data_for_dataframe(self): csv_df_builder = CSVDataFrameBuilder(self.user.username, self.xform.id_string) cursor = csv_df_builder._query_mongo() return csv_df_builder._format_for_dataframe(cursor)