Ejemplo n.º 1
0
 def test_csv_export_with_df_size_limit(self):
     """
     To fix pandas limitation of 30k rows on csv export, we specify a max
     number of records in a dataframe on export - lets test it
     """
     self._publish_single_level_repeat_form()
     # submit 7 instances
     for i in range(4):
         self._submit_fixture_instance("new_repeats", "01")
     self._submit_fixture_instance("new_repeats", "02")
     for i in range(2):
         self._submit_fixture_instance("new_repeats", "01")
     csv_df_builder = CSVDataFrameBuilder(self.user.username,
                                          self.xform.id_string)
     record_count = csv_df_builder._query_mongo(count=True)
     self.assertEqual(record_count, 7)
     temp_file = NamedTemporaryFile(suffix=".csv", delete=False)
     csv_df_builder.export_to(temp_file.name, data_frame_max_size=3)
     csv_file = open(temp_file.name)
     csv_reader = csv.reader(csv_file)
     header = csv_reader.next()
     self.assertEqual(
         len(header), 17 + len(AbstractDataFrameBuilder.ADDITIONAL_COLUMNS))
     rows = []
     for row in csv_reader:
         rows.append(row)
     self.assertEqual(len(rows), 7)
     self.assertEqual(rows[4][5], NA_REP)
     # close and delete file
     csv_file.close()
     os.unlink(temp_file.name)
Ejemplo n.º 2
0
 def test_csv_export_with_df_size_limit(self):
     """
     To fix pandas limitation of 30k rows on csv export, we specify a max
     number of records in a dataframe on export - lets test it
     """
     self._publish_single_level_repeat_form()
     # submit 7 instances
     for i in range(4):
         self._submit_fixture_instance("new_repeats", "01")
     self._submit_fixture_instance("new_repeats", "02")
     for i in range(2):
         self._submit_fixture_instance("new_repeats", "01")
     csv_df_builder = CSVDataFrameBuilder(self.user.username,
                                          self.xform.id_string)
     record_count = csv_df_builder._query_mongo(count=True)
     self.assertEqual(record_count, 7)
     temp_file = NamedTemporaryFile(suffix=".csv", delete=False)
     csv_df_builder.export_to(temp_file.name, data_frame_max_size=3)
     csv_file = open(temp_file.name)
     csv_reader = csv.reader(csv_file)
     header = csv_reader.next()
     self.assertEqual(
         len(header), 17 + len(AbstractDataFrameBuilder.ADDITIONAL_COLUMNS))
     rows = []
     for row in csv_reader:
         rows.append(row)
     self.assertEqual(len(rows), 7)
     self.assertEqual(rows[4][5], NA_REP)
     # close and delete file
     csv_file.close()
     os.unlink(temp_file.name)
Ejemplo n.º 3
0
    def to_flat_csv_export(
            self, path, data, username, id_string, filter_query):
        # TODO resolve circular import
        from onadata.apps.viewer.pandas_mongo_bridge import\
            CSVDataFrameBuilder

        csv_builder = CSVDataFrameBuilder(
            username, id_string, filter_query, self.GROUP_DELIMITER,
            self.SPLIT_SELECT_MULTIPLES, self.BINARY_SELECT_MULTIPLES)
        csv_builder.export_to(path)
Ejemplo n.º 4
0
    def to_flat_csv_export(
            self, path, data, username, id_string, filter_query):
        # TODO resolve circular import
        from onadata.apps.viewer.pandas_mongo_bridge import\
            CSVDataFrameBuilder

        csv_builder = CSVDataFrameBuilder(
            username, id_string, filter_query, self.GROUP_DELIMITER,
            self.SPLIT_SELECT_MULTIPLES, self.BINARY_SELECT_MULTIPLES)
        csv_builder.export_to(path)
Ejemplo n.º 5
0
 def test_csv_dataframe_export_to(self):
     self._publish_nested_repeats_form()
     self._submit_fixture_instance(
         "nested_repeats", "01", submission_time=self._submission_time)
     self._submit_fixture_instance(
         "nested_repeats", "02", submission_time=self._submission_time)
     csv_df_builder = CSVDataFrameBuilder(self.user.username,
                                          self.xform.id_string)
     temp_file = NamedTemporaryFile(suffix=".csv", delete=False)
     csv_df_builder.export_to(temp_file.name)
     csv_fixture_path = os.path.join(
         os.path.dirname(os.path.abspath(__file__)),
         "fixtures", "nested_repeats", "nested_repeats.csv"
     )
     temp_file.close()
     fixture, output = '', ''
     with open(csv_fixture_path) as f:
         fixture = f.read()
     with open(temp_file.name) as f:
         output = f.read()
     os.unlink(temp_file.name)
     self.assertEqual(fixture, output)
Ejemplo n.º 6
0
 def test_csv_dataframe_export_to(self):
     self._publish_nested_repeats_form()
     self._submit_fixture_instance(
         "nested_repeats", "01", submission_time=self._submission_time)
     self._submit_fixture_instance(
         "nested_repeats", "02", submission_time=self._submission_time)
     csv_df_builder = CSVDataFrameBuilder(self.user.username,
                                          self.xform.id_string)
     temp_file = NamedTemporaryFile(suffix=".csv", delete=False)
     csv_df_builder.export_to(temp_file.name)
     csv_fixture_path = os.path.join(
         os.path.dirname(os.path.abspath(__file__)),
         "fixtures", "nested_repeats", "nested_repeats.csv"
     )
     temp_file.close()
     fixture, output = '', ''
     with open(csv_fixture_path) as f:
         fixture = f.read()
     with open(temp_file.name) as f:
         output = f.read()
     os.unlink(temp_file.name)
     self.assertEqual(fixture, output)
Ejemplo n.º 7
0
def get_csv_data(xform, force_last=False):
    def getbuff():
        return StringIO.StringIO()

    def get_headers_from(csv_data):
        csv_data.seek(0)
        header_row = csv_data.readline()
        csv_data.read()
        return header_row.split(',')

    def get_csv_data_manual(xform,
                            only_last=False,
                            with_header=True,
                            headers_to_use=None):
        # TODO: find out a better way to handle this
        # when form has only one submission, CSVDFB is empty.
        # we still want to create the BB ds with row 1
        # so we extract is and CSV it.
        instances = Instance.objects.filter(
            xform=xform).order_by('-date_modified')

        if instances.count() == 0:
            raise NoRecordsFoundError
        else:
            # we should only do it for count == 1 but eh.

            csv_buf = getbuff()

            if only_last:
                instances = instances[0:1]

            rows = [instance.get_full_dict() for instance in instances]

            if headers_to_use is None:
                headers_to_use = [
                    key for key in rows[0].keys() if not key.startswith('_')
                ]

            w = unicodecsv.DictWriter(csv_buf,
                                      fieldnames=headers_to_use,
                                      extrasaction='ignore',
                                      lineterminator='\n',
                                      encoding='utf-8')
            if with_header:
                w.writeheader()
            w.writerows(rows)
            csv_buf.flush()

            if not csv_buf.len:
                raise NoRecordsFoundError

            return csv_buf.getvalue()

    # setup an IO stream
    buff = getbuff()

    # prepare/generate a standard CSV export.
    # note that it omits the current submission (if called from rest)
    csv_dataframe_builder = CSVDataFrameBuilder(xform.user.username,
                                                xform.id_string)
    try:
        csv_dataframe_builder.export_to(buff)
        if force_last:
            # requested to add last submission to the buffer
            buff.write(
                get_csv_data_manual(xform,
                                    only_last=True,
                                    with_header=False,
                                    headers_to_use=get_headers_from(buff)))
    except NoRecordsFoundError:
        # verify that we don't have a single submission before giving up
        get_csv_data_manual(xform, with_header=True)

    if buff.len:
        # rewrite CSV header so that meta fields (starting with _ or meta)
        # are prefixed to ensure that the dataset will be joinable to
        # another formhub dataset

        prefix = (u'%(id_string)s_%(id)s' % {
            'id_string': xform.id_string,
            'id': xform.id
        })

        new_buff = getbuff()
        buff.seek(0)
        reader = unicodecsv.reader(buff, encoding='utf-8')
        writer = unicodecsv.writer(new_buff, encoding='utf-8')

        is_header = True

        for row in reader:
            if is_header:
                is_header = False
                for idx, col in enumerate(row):
                    if col.startswith('_') or col.startswith('meta_')\
                            or col.startswith('meta/'):
                        row[idx] = (u'%(prefix)s%(col)s' % {
                            'prefix': prefix,
                            'col': col
                        })
            writer.writerow(row)

        return new_buff.getvalue()
    else:
        raise NoRecordsFoundError
Ejemplo n.º 8
0
def get_csv_data(xform, force_last=False):

    def getbuff():
        return StringIO.StringIO()

    def get_headers_from(csv_data):
        csv_data.seek(0)
        header_row = csv_data.readline()
        csv_data.read()
        return header_row.split(',')

    def get_csv_data_manual(xform,
                            only_last=False, with_header=True,
                            headers_to_use=None):
        # TODO: find out a better way to handle this
        # when form has only one submission, CSVDFB is empty.
        # we still want to create the BB ds with row 1
        # so we extract is and CSV it.
        instances = Instance.objects.filter(xform=xform).order_by(
            '-date_modified')

        if instances.count() == 0:
            raise NoRecordsFoundError
        else:
            # we should only do it for count == 1 but eh.

            csv_buf = getbuff()

            if only_last:
                instances = instances[0:1]

            rows = [instance.get_full_dict() for instance in instances]

            if headers_to_use is None:
                headers_to_use = [key for key in rows[0].keys()
                                  if not key.startswith('_')]

            w = unicodecsv.DictWriter(csv_buf, fieldnames=headers_to_use,
                                      extrasaction='ignore',
                                      lineterminator='\n',
                                      encoding='utf-8')
            if with_header:
                w.writeheader()
            w.writerows(rows)
            csv_buf.flush()

            if not csv_buf.len:
                raise NoRecordsFoundError

            return csv_buf.getvalue()

    # setup an IO stream
    buff = getbuff()

    # prepare/generate a standard CSV export.
    # note that it omits the current submission (if called from rest)
    csv_dataframe_builder = CSVDataFrameBuilder(xform.user.username,
                                                xform.id_string)
    try:
        csv_dataframe_builder.export_to(buff)
        if force_last:
            # requested to add last submission to the buffer
            buff.write(get_csv_data_manual(xform,
                                           only_last=True, with_header=False,
                                           headers_to_use=
                                           get_headers_from(buff)))
    except NoRecordsFoundError:
        # verify that we don't have a single submission before giving up
        get_csv_data_manual(xform, with_header=True)

    if buff.len:
        # rewrite CSV header so that meta fields (starting with _ or meta)
        # are prefixed to ensure that the dataset will be joinable to
        # another formhub dataset

        prefix = (u'%(id_string)s_%(id)s'
                  % {'id_string': xform.id_string, 'id': xform.id})

        new_buff = getbuff()
        buff.seek(0)
        reader = unicodecsv.reader(buff, encoding='utf-8')
        writer = unicodecsv.writer(new_buff, encoding='utf-8')

        is_header = True

        for row in reader:
            if is_header:
                is_header = False
                for idx, col in enumerate(row):
                    if col.startswith('_') or col.startswith('meta_')\
                            or col.startswith('meta/'):
                        row[idx] = (u'%(prefix)s%(col)s'
                                    % {'prefix': prefix, 'col': col})
            writer.writerow(row)

        return new_buff.getvalue()
    else:
        raise NoRecordsFoundError