Ejemplo n.º 1
0
    def download_data(self, format="", previous_export=None, filter=None):
        """
        If there is data, return an HTTPResponse with the appropriate data. 
        If there is not data returns None.
        """
        from couchexport.shortcuts import export_response
        from couchexport.export import get_writer, get_schema_new, format_tables, create_intermediate_tables

        if not format:
            format = self.default_format or Format.XLS_2007

        from couchexport.export import ExportConfiguration

        database = get_db()
        config = ExportConfiguration(database, self.index, previous_export, util.intersect_filters(self.filter, filter))

        # get and checkpoint the latest schema
        updated_schema = get_schema_new(config)
        export_schema_checkpoint = ExportSchema(
            seq=config.current_seq, schema=updated_schema, index=config.schema_index
        )
        export_schema_checkpoint.save()
        # transform docs onto output and save
        writer = get_writer(format)

        # open the doc and the headers
        formatted_headers = self.get_table_headers()
        tmp = StringIO()
        writer.open(formatted_headers, tmp)

        for doc in config.get_docs():
            writer.write(self.trim(format_tables(create_intermediate_tables(doc, updated_schema), separator=".")))
        writer.close()

        return export_response(tmp, format, self.name)
Ejemplo n.º 2
0
    def get_export_files(self, format='', previous_export_id=None, filter=None,
                         use_cache=True, max_column_size=2000, separator='|', process=None, **kwargs):
        # the APIs of how these methods are broken down suck, but at least
        # it's DRY
        from couchexport.export import get_writer, get_export_components, get_headers, get_formatted_rows
        from django.core.cache import cache
        import hashlib

        export_tag = self.index

        CACHE_TIME = 1 * 60 * 60 # cache for 1 hour, in seconds

        def _build_cache_key(tag, prev_export_id, format, max_column_size):
            def _human_readable_key(tag, prev_export_id, format, max_column_size):
                return "couchexport_:%s:%s:%s:%s" % (tag, prev_export_id, format, max_column_size)
            return hashlib.md5(_human_readable_key(tag, prev_export_id,
                format, max_column_size)).hexdigest()

        # check cache, only supported for filterless queries, currently
        cache_key = _build_cache_key(export_tag, previous_export_id, format, max_column_size)
        if use_cache and filter is None:
            cached_data = cache.get(cache_key)
            if cached_data:
                (tmp, checkpoint) = cached_data
                return ExportFiles(tmp, checkpoint)

        fd, path = tempfile.mkstemp()
        with os.fdopen(fd, 'wb') as tmp:
            schema_index = export_tag
            config, updated_schema, export_schema_checkpoint = get_export_components(schema_index,
                                                                                     previous_export_id, filter)
            if config:
                writer = get_writer(format)

                # get cleaned up headers
                formatted_headers = self.remap_tables(get_headers(updated_schema, separator=separator))
                writer.open(formatted_headers, tmp, max_column_size=max_column_size)

                total_docs = len(config.potentially_relevant_ids)
                if process:
                    DownloadBase.set_progress(process, 0, total_docs)
                for i, doc in config.enum_docs():
                    if self.transform:
                        doc = self.transform(doc)

                    writer.write(self.remap_tables(get_formatted_rows(
                        doc, updated_schema, include_headers=False,
                        separator=separator)))
                    if process:
                        DownloadBase.set_progress(process, i + 1, total_docs)
                writer.close()

            checkpoint = export_schema_checkpoint

        if checkpoint:
            if use_cache:
                cache.set(cache_key, (path, checkpoint), CACHE_TIME)
            return ExportFiles(path, checkpoint)

        return None
Ejemplo n.º 3
0
    def generate_bulk_file(self):
        configs = list()
        schemas = list()
        checkpoints = list()

        for export_object in self.export_objects:
            config, schema, checkpoint = export_object.get_export_components(filter=self.export_filter)
            configs.append(config)
            schemas.append(schema)
            checkpoints.append(checkpoint)

        writer = get_writer(self.format)

        # generate the headers for the bulk excel file
        headers = self.generate_table_headers(schemas, checkpoints)

        fd, path = tempfile.mkstemp()
        with os.fdopen(fd, 'wb') as tmp:
            writer.open(headers, tmp)

            # now that the headers are set, lets build the rows
            for i, config in enumerate(configs):
                for doc in config.get_docs():
                    if self.export_objects[i].transform:
                        doc = self.export_objects[i].transform(doc)
                    table = format_tables(create_intermediate_tables(doc, schemas[i]),
                                        include_headers=isinstance(self, CustomBulkExport), separator=self.separator)
                    if isinstance(self, CustomBulkExport):
                        table = self.export_objects[i].trim(table, doc)
                    table = self.export_objects[i].parse_tables(table)
                    writer.write(table)

            writer.close()
        return path
Ejemplo n.º 4
0
def _get_writer(export_instances):
    """
    Return a new _Writer
    """
    format = Format.XLS_2007
    if len(export_instances) == 1:
        format = export_instances[0].export_format

    legacy_writer = get_writer(format)
    writer = _Writer(legacy_writer)
    return writer
Ejemplo n.º 5
0
    def generate_bulk_file(self, update_progress=None):
        update_progress = update_progress or (lambda x: x)
        configs = list()
        schemas = list()
        checkpoints = list()

        for export_object in self.export_objects:
            config, schema, checkpoint = export_object.get_export_components(filter=self.export_filter)
            configs.append(config)
            schemas.append(schema)
            checkpoints.append(checkpoint)

        writer = get_writer(self.format)

        # generate the headers for the bulk excel file
        headers = self.generate_table_headers(schemas, checkpoints)

        fd, path = tempfile.mkstemp()
        with os.fdopen(fd, 'wb') as tmp:
            writer.open(headers, tmp)

            # now that the headers are set, lets build the rows
            for i, config in enumerate(configs):
                try:
                    for doc in config.get_docs():
                        if self.export_objects[i].transform:
                            doc = self.export_objects[i].transform(doc)
                        table = get_formatted_rows(
                            doc, schemas[i], separator=self.separator,
                            include_headers=isinstance(self, CustomBulkExport))
                        if isinstance(self, CustomBulkExport):
                            table = self.export_objects[i].trim(table, doc)
                        if table and table[0]:
                            # if an export only contains data from repeats and a form has no repeats
                            # then the table list will be empty
                            table = self.export_objects[i].parse_tables(table)
                            writer.write(table)
                except SchemaMismatchException:
                    # fire off a delayed force update to prevent this from happening again
                    rebuild_schemas.delay(self.export_objects[i].index)
                    writer.write(
                        [(self.export_objects[i].table_name, [
                            FormattedRow([
                                ugettext(
                                    'There was an error generating this export. '
                                    'If the problem persists please report an issue.'
                                )],
                                separator=self.separator)
                        ])]
                    )
                update_progress(i+1)
            writer.close()
        return path
Ejemplo n.º 6
0
def get_export_writer(export_instances, temp_path, allow_pagination=True):
    """
    Return a new _Writer
    """
    format = Format.XLS_2007
    if len(export_instances) == 1:
        format = export_instances[0].export_format

    legacy_writer = get_writer(format)
    if allow_pagination and PAGINATED_EXPORTS.enabled(export_instances[0].domain):
        writer = _PaginatedExportWriter(legacy_writer, temp_path)
    else:
        writer = _ExportWriter(legacy_writer, temp_path)
    return writer
Ejemplo n.º 7
0
def get_export_writer(export_instances, allow_pagination=True):
    """
    Return a new _Writer
    """
    format = Format.XLS_2007
    if len(export_instances) == 1:
        format = export_instances[0].export_format

    legacy_writer = get_writer(format)
    if allow_pagination and PAGINATED_EXPORTS.enabled(
            export_instances[0].domain):
        writer = _PaginatedExportWriter(legacy_writer)
    else:
        writer = _ExportWriter(legacy_writer)
    return writer
Ejemplo n.º 8
0
    def get_export_files(self, format=None, previous_export=None, filter=None, process=None, max_column_size=None,
                         apply_transforms=True, limit=0, **kwargs):
        from couchexport.export import get_writer, get_formatted_rows
        if not format:
            format = self.default_format or Format.XLS_2007

        config, updated_schema, export_schema_checkpoint = self.get_export_components(previous_export, filter)

        # transform docs onto output and save
        writer = get_writer(format)

        # open the doc and the headers
        formatted_headers = list(self.get_table_headers())
        fd, path = tempfile.mkstemp()
        with os.fdopen(fd, 'wb') as tmp:
            writer.open(
                formatted_headers,
                tmp,
                max_column_size=max_column_size,
                table_titles=dict([
                    (table.index, table.display)
                    for table in self.tables if table.display
                ])
            )

            total_docs = len(config.potentially_relevant_ids)
            if process:
                DownloadBase.set_progress(process, 0, total_docs)
            for i, doc in config.enum_docs():
                if limit and i > limit:
                    break
                if self.transform and apply_transforms:
                    doc = self.transform(doc)
                formatted_tables = self.trim(
                    get_formatted_rows(doc, updated_schema, separator="."),
                    doc,
                    apply_transforms=apply_transforms
                )
                writer.write(formatted_tables)
                if process:
                    DownloadBase.set_progress(process, i + 1, total_docs)

            writer.close()

        if format == Format.PYTHON_DICT:
            return writer.get_preview()

        return ExportFiles(path, export_schema_checkpoint, format)
Ejemplo n.º 9
0
    def get_export_files(self, format=None, previous_export=None, filter=None, process=None, max_column_size=None,
                         apply_transforms=True, limit=0, **kwargs):
        from couchexport.export import get_writer, format_tables, create_intermediate_tables
        if not format:
            format = self.default_format or Format.XLS_2007

        config, updated_schema, export_schema_checkpoint = self.get_export_components(previous_export, filter)

        # transform docs onto output and save
        writer = get_writer(format)

        # open the doc and the headers
        formatted_headers = list(self.get_table_headers())
        fd, path = tempfile.mkstemp()
        with os.fdopen(fd, 'wb') as tmp:
            writer.open(
                formatted_headers,
                tmp,
                max_column_size=max_column_size,
                table_titles=dict([
                    (table.index, table.display)
                    for table in self.tables if table.display
                ])
            )

            total_docs = len(config.potentially_relevant_ids)
            if process:
                DownloadBase.set_progress(process, 0, total_docs)
            for i, doc in config.enum_docs():
                if limit and i > limit:
                    break
                if self.transform and apply_transforms:
                    doc = self.transform(doc)
                formatted_tables = self.trim(
                    format_tables(
                        create_intermediate_tables(doc, updated_schema),
                        separator="."
                    ),
                    doc,
                    apply_transforms=apply_transforms
                )
                writer.write(formatted_tables)
                if process:
                    DownloadBase.set_progress(process, i + 1, total_docs)

            writer.close()

        return ExportFiles(path, export_schema_checkpoint, format)
Ejemplo n.º 10
0
def get_export_writer(export_instances, temp_path, allow_pagination=True):
    """
    Return a new _Writer
    """
    format = Format.XLS_2007
    format_data_in_excel = False

    if len(export_instances) == 1:
        format = export_instances[0].export_format
        format_data_in_excel = export_instances[0].format_data_in_excel

    legacy_writer = get_writer(format, use_formatted_cells=format_data_in_excel)

    if allow_pagination and PAGINATED_EXPORTS.enabled(export_instances[0].domain):
        writer = _PaginatedExportWriter(legacy_writer, temp_path)
    else:
        writer = _ExportWriter(legacy_writer, temp_path)

    return writer
Ejemplo n.º 11
0
    def __init__(self, export_instance, total_docs, num_processes):
        self.export_instance = export_instance
        self.results = []
        self.progress_queue = multiprocessing.Queue()
        self.progress = multiprocessing.Process(target=_output_progress,
                                                args=(self.progress_queue,
                                                      total_docs))

        self.export_function = run_export_with_logging

        def _set_queue(queue):
            """Set the progress queue as an attribute on the function
            You can't pass this as an arg"""
            self.export_function.queue = queue

        self.pool = multiprocessing.Pool(processes=num_processes,
                                         initializer=_set_queue,
                                         initargs=[self.progress_queue])

        self.is_zip = isinstance(get_writer(export_instance.export_format),
                                 ZippedExportWriter)
        self.premature_exit = False
Ejemplo n.º 12
0
    def generate_bulk_file(self):
        configs = list()
        schemas = list()
        checkpoints = list()

        for export_object in self.export_objects:
            config, schema, checkpoint = export_object.get_export_components(
                filter=self.export_filter)
            configs.append(config)
            schemas.append(schema)
            checkpoints.append(checkpoint)

        writer = get_writer(self.format)

        # generate the headers for the bulk excel file
        headers = self.generate_table_headers(schemas, checkpoints)

        fd, path = tempfile.mkstemp()
        with os.fdopen(fd, 'wb') as tmp:
            writer.open(headers, tmp)

            # now that the headers are set, lets build the rows
            for i, config in enumerate(configs):
                for doc in config.get_docs():
                    if self.export_objects[i].transform:
                        doc = self.export_objects[i].transform(doc)
                    table = format_tables(
                        create_intermediate_tables(doc, schemas[i]),
                        include_headers=isinstance(self, CustomBulkExport),
                        separator=self.separator)
                    if isinstance(self, CustomBulkExport):
                        table = self.export_objects[i].trim(table, doc)
                    table = self.export_objects[i].parse_tables(table)
                    writer.write(table)

            writer.close()
        return path
Ejemplo n.º 13
0
 def __init__(self, file, format):
     self._headers = {}
     self.writer = get_writer(format.slug)
     self.file = file
     self.writer.open((), file)
Ejemplo n.º 14
0
    def test_multiple_write_export_instance_calls(self):
        """
        Confirm that calling _write_export_instance() multiple times
        (as part of a bulk export) works as expected.
        """
        export_instances = [
            ExportInstance(
                # export_format=Format.JSON,
                tables=[
                    TableConfiguration(
                        label="My table",
                        path=[],
                        columns=[
                            ExportColumn(
                                label="Q3",
                                item=ScalarItem(
                                    path=['form', 'q3'],
                                ),
                                selected=True,
                            ),
                        ]
                    ),
                ]
            ),
            ExportInstance(
                # export_format=Format.JSON,
                tables=[
                    TableConfiguration(
                        label="My other table",
                        path=['form', 'q2'],
                        columns=[
                            ExportColumn(
                                label="Q4",
                                item=ScalarItem(
                                    path=['form', 'q2', 'q4'],
                                ),
                                selected=True,
                            ),
                        ]
                    )
                ]
            )

        ]

        writer = _Writer(get_writer(Format.JSON))
        with writer.open(_get_tables(export_instances)):
            _write_export_instance(writer, export_instances[0], self.docs)
            _write_export_instance(writer, export_instances[1], self.docs)

        with ExportFile(writer.path, writer.format) as export:
            self.assertEqual(
                json.loads(export),
                {
                    u'My table': {
                        u'headers': [u'Q3'],
                        u'rows': [[u'baz'], [u'bop']],

                    },
                    u'My other table': {
                        u'headers': [u'Q4'],
                        u'rows': [[u'bar'], [u'boop']],
                    }
                }
            )
Ejemplo n.º 15
0
    def test_multiple_write_export_instance_calls(self):
        """
        Confirm that calling _write_export_instance() multiple times
        (as part of a bulk export) works as expected.
        """
        export_instances = [
            FormExportInstance(
                # export_format=Format.JSON,
                tables=[
                    TableConfiguration(
                        label="My table",
                        selected=True,
                        path=[],
                        columns=[
                            ExportColumn(
                                label="Q3",
                                item=ScalarItem(
                                    path=[PathNode(name='form'), PathNode(name='q3')],
                                ),
                                selected=True,
                            ),
                        ]
                    ),
                ]
            ),
            FormExportInstance(
                # export_format=Format.JSON,
                tables=[
                    TableConfiguration(
                        label="My other table",
                        selected=True,
                        path=[PathNode(name="form", is_repeat=False), PathNode(name="q2", is_repeat=False)],
                        columns=[
                            ExportColumn(
                                label="Q4",
                                item=ScalarItem(
                                    path=[PathNode(name='form'), PathNode(name='q2'), PathNode(name='q4')],
                                ),
                                selected=True,
                            ),
                        ]
                    )
                ]
            )

        ]

        writer = _Writer(get_writer(Format.JSON))
        with writer.open(export_instances):
            _write_export_instance(writer, export_instances[0], self.docs)
            _write_export_instance(writer, export_instances[1], self.docs)

        with ExportFile(writer.path, writer.format) as export:
            self.assertEqual(
                json.loads(export.read()),
                {
                    u'Export1-My table': {
                        u'headers': [u'Q3'],
                        u'rows': [[u'baz'], [u'bop']],

                    },
                    u'Export2-My other table': {
                        u'headers': [u'Q4'],
                        u'rows': [[u'bar'], [u'boop']],
                    }
                }
            )
Ejemplo n.º 16
0
    def test_multiple_write_export_instance_calls(self, export_save):
        """
        Confirm that calling _write_export_instance() multiple times
        (as part of a bulk export) works as expected.
        """
        export_instances = [
            FormExportInstance(tables=[
                TableConfiguration(label="My table",
                                   selected=True,
                                   path=[],
                                   columns=[
                                       ExportColumn(
                                           label="Q3",
                                           item=ScalarItem(path=[
                                               PathNode(name='form'),
                                               PathNode(name='q3')
                                           ], ),
                                           selected=True,
                                       ),
                                   ]),
            ]),
            FormExportInstance(tables=[
                TableConfiguration(label="My other table",
                                   selected=True,
                                   path=[
                                       PathNode(name="form", is_repeat=False),
                                       PathNode(name="q2", is_repeat=False)
                                   ],
                                   columns=[
                                       ExportColumn(
                                           label="Q4",
                                           item=ScalarItem(path=[
                                               PathNode(name='form'),
                                               PathNode(name='q2'),
                                               PathNode(name='q4')
                                           ], ),
                                           selected=True,
                                       ),
                                   ])
            ]),
            FormExportInstance(tables=[
                TableConfiguration(label="My other table",
                                   selected=True,
                                   path=[
                                       PathNode(name="form", is_repeat=False),
                                       PathNode(name="q2", is_repeat=False)
                                   ],
                                   columns=[
                                       ExportColumn(
                                           label="Q4",
                                           item=ScalarItem(path=[
                                               PathNode(name='form'),
                                               PathNode(name='q2'),
                                               PathNode(name='q4')
                                           ], ),
                                           selected=True,
                                       ),
                                   ])
            ])
        ]

        with TransientTempfile() as temp_path:
            writer = _ExportWriter(get_writer(Format.JSON), temp_path)
            with writer.open(export_instances):
                write_export_instance(writer, export_instances[0], self.docs)
                write_export_instance(writer, export_instances[1], self.docs)
                write_export_instance(writer, export_instances[2], self.docs)

            with ExportFile(writer.path, writer.format) as export:
                self.assertEqual(
                    json.loads(export.read()), {
                        'My table': {
                            'headers': ['Q3'],
                            'rows': [['baz'], ['bop']],
                        },
                        'Export2-My other table': {
                            'headers': ['Q4'],
                            'rows': [['bar'], ['boop']],
                        },
                        'Export3-My other table': {
                            'headers': ['Q4'],
                            'rows': [['bar'], ['boop']],
                        },
                    })
        self.assertTrue(export_save.called)
Ejemplo n.º 17
0
    def test_multiple_write_export_instance_calls(self, export_save):
        """
        Confirm that calling _write_export_instance() multiple times
        (as part of a bulk export) works as expected.
        """
        export_instances = [
            FormExportInstance(
                tables=[
                    TableConfiguration(
                        label="My table",
                        selected=True,
                        path=[],
                        columns=[
                            ExportColumn(
                                label="Q3",
                                item=ScalarItem(
                                    path=[PathNode(name='form'), PathNode(name='q3')],
                                ),
                                selected=True,
                            ),
                        ]
                    ),
                ]
            ),
            FormExportInstance(
                tables=[
                    TableConfiguration(
                        label="My other table",
                        selected=True,
                        path=[PathNode(name="form", is_repeat=False), PathNode(name="q2", is_repeat=False)],
                        columns=[
                            ExportColumn(
                                label="Q4",
                                item=ScalarItem(
                                    path=[PathNode(name='form'), PathNode(name='q2'), PathNode(name='q4')],
                                ),
                                selected=True,
                            ),
                        ]
                    )
                ]
            ),
            FormExportInstance(
                tables=[
                    TableConfiguration(
                        label="My other table",
                        selected=True,
                        path=[PathNode(name="form", is_repeat=False), PathNode(name="q2", is_repeat=False)],
                        columns=[
                            ExportColumn(
                                label="Q4",
                                item=ScalarItem(
                                    path=[PathNode(name='form'), PathNode(name='q2'), PathNode(name='q4')],
                                ),
                                selected=True,
                            ),
                        ]
                    )
                ]
            )

        ]

        with TransientTempfile() as temp_path:
            writer = _ExportWriter(get_writer(Format.JSON), temp_path)
            with writer.open(export_instances):
                write_export_instance(writer, export_instances[0], self.docs)
                write_export_instance(writer, export_instances[1], self.docs)
                write_export_instance(writer, export_instances[2], self.docs)

            with ExportFile(writer.path, writer.format) as export:
                self.assertEqual(
                    json.loads(export.read()),
                    {
                        'My table': {
                            'headers': ['Q3'],
                            'rows': [['baz'], ['bop']],

                        },
                        'Export2-My other table': {
                            'headers': ['Q4'],
                            'rows': [['bar'], ['boop']],
                        },
                        'Export3-My other table': {
                            'headers': ['Q4'],
                            'rows': [['bar'], ['boop']],
                        },
                    }
                )
        self.assertTrue(export_save.called)
Ejemplo n.º 18
0
    def get_export_files(self,
                         format='',
                         previous_export_id=None,
                         filter=None,
                         use_cache=True,
                         max_column_size=2000,
                         separator='|',
                         process=None,
                         **kwargs):
        # the APIs of how these methods are broken down suck, but at least
        # it's DRY
        from couchexport.export import get_writer, get_export_components, get_headers, get_formatted_rows
        from django.core.cache import cache
        import hashlib

        export_tag = self.index

        CACHE_TIME = 1 * 60 * 60  # cache for 1 hour, in seconds

        def _build_cache_key(tag, prev_export_id, format, max_column_size):
            def _human_readable_key(tag, prev_export_id, format,
                                    max_column_size):
                return "couchexport_:%s:%s:%s:%s" % (tag, prev_export_id,
                                                     format, max_column_size)

            return hashlib.md5(
                _human_readable_key(
                    tag, prev_export_id, format,
                    max_column_size).encode('utf-8')).hexdigest()

        # check cache, only supported for filterless queries, currently
        cache_key = _build_cache_key(export_tag, previous_export_id, format,
                                     max_column_size)
        if use_cache and filter is None:
            cached_data = cache.get(cache_key)
            if cached_data:
                (tmp, checkpoint) = cached_data
                return ExportFiles(tmp, checkpoint)

        fd, path = tempfile.mkstemp()
        with os.fdopen(fd, 'wb') as tmp:
            schema_index = export_tag
            config, updated_schema, export_schema_checkpoint = get_export_components(
                schema_index, previous_export_id, filter)
            if config:
                writer = get_writer(format)

                # get cleaned up headers
                formatted_headers = self.remap_tables(
                    get_headers(updated_schema, separator=separator))
                writer.open(formatted_headers,
                            tmp,
                            max_column_size=max_column_size)

                total_docs = len(config.potentially_relevant_ids)
                if process:
                    DownloadBase.set_progress(process, 0, total_docs)
                for i, doc in config.enum_docs():
                    if self.transform:
                        doc = self.transform(doc)

                    writer.write(
                        self.remap_tables(
                            get_formatted_rows(doc,
                                               updated_schema,
                                               include_headers=False,
                                               separator=separator)))
                    if process:
                        DownloadBase.set_progress(process, i + 1, total_docs)
                writer.close()

            checkpoint = export_schema_checkpoint

        if checkpoint:
            if use_cache:
                cache.set(cache_key, (path, checkpoint), CACHE_TIME)
            return ExportFiles(path, checkpoint)

        return None
Ejemplo n.º 19
0
 def export_target(self):
     writer = get_writer(self.export_format)
     return writer.target_app