def get_export_files(self, format='', previous_export_id=None, filter=None, use_cache=True, max_column_size=2000, separator='|', process=None, **kwargs): # the APIs of how these methods are broken down suck, but at least # it's DRY from couchexport.export import get_writer, get_export_components, get_headers, get_formatted_rows from django.core.cache import cache import hashlib export_tag = self.index CACHE_TIME = 1 * 60 * 60 # cache for 1 hour, in seconds def _build_cache_key(tag, prev_export_id, format, max_column_size): def _human_readable_key(tag, prev_export_id, format, max_column_size): return "couchexport_:%s:%s:%s:%s" % (tag, prev_export_id, format, max_column_size) return hashlib.md5(_human_readable_key(tag, prev_export_id, format, max_column_size)).hexdigest() # check cache, only supported for filterless queries, currently cache_key = _build_cache_key(export_tag, previous_export_id, format, max_column_size) if use_cache and filter is None: cached_data = cache.get(cache_key) if cached_data: (tmp, checkpoint) = cached_data return ExportFiles(tmp, checkpoint) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: schema_index = export_tag config, updated_schema, export_schema_checkpoint = get_export_components(schema_index, previous_export_id, filter) if config: writer = get_writer(format) # get cleaned up headers formatted_headers = self.remap_tables(get_headers(updated_schema, separator=separator)) writer.open(formatted_headers, tmp, max_column_size=max_column_size) total_docs = len(config.potentially_relevant_ids) if process: DownloadBase.set_progress(process, 0, total_docs) for i, doc in config.enum_docs(): if self.transform: doc = self.transform(doc) writer.write(self.remap_tables(get_formatted_rows( doc, updated_schema, include_headers=False, separator=separator))) if process: DownloadBase.set_progress(process, i + 1, total_docs) writer.close() checkpoint = export_schema_checkpoint if checkpoint: if use_cache: cache.set(cache_key, (path, checkpoint), CACHE_TIME) return ExportFiles(path, checkpoint) return None
def generate_bulk_file(self): configs = list() schemas = list() checkpoints = list() for export_object in self.export_objects: config, schema, checkpoint = export_object.get_export_components(filter=self.export_filter) configs.append(config) schemas.append(schema) checkpoints.append(checkpoint) writer = get_writer(self.format) # generate the headers for the bulk excel file headers = self.generate_table_headers(schemas, checkpoints) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: writer.open(headers, tmp) # now that the headers are set, lets build the rows for i, config in enumerate(configs): for doc in config.get_docs(): if self.export_objects[i].transform: doc = self.export_objects[i].transform(doc) table = get_formatted_rows( doc, schemas[i], separator=self.separator, include_headers=isinstance(self, CustomBulkExport)) if isinstance(self, CustomBulkExport): table = self.export_objects[i].trim(table, doc) table = self.export_objects[i].parse_tables(table) writer.write(table) writer.close() return path
def generate_bulk_file(self, update_progress=None): update_progress = update_progress or (lambda x: x) configs = list() schemas = list() checkpoints = list() for export_object in self.export_objects: config, schema, checkpoint = export_object.get_export_components( filter=self.export_filter) configs.append(config) schemas.append(schema) checkpoints.append(checkpoint) writer = get_writer(self.format) # generate the headers for the bulk excel file headers = self.generate_table_headers(schemas, checkpoints) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: writer.open(headers, tmp) # now that the headers are set, lets build the rows for i, config in enumerate(configs): try: for doc in config.get_docs(): if self.export_objects[i].transform: doc = self.export_objects[i].transform(doc) table = get_formatted_rows(doc, schemas[i], separator=self.separator, include_headers=isinstance( self, CustomBulkExport)) if isinstance(self, CustomBulkExport): table = self.export_objects[i].trim(table, doc) if table and table[0]: # if an export only contains data from repeats and a form has no repeats # then the table list will be empty table = self.export_objects[i].parse_tables(table) writer.write(table) except SchemaMismatchException: # fire off a delayed force update to prevent this from happening again rebuild_schemas.delay(self.export_objects[i].index) writer.write([(self.export_objects[i].table_name, [ FormattedRow([ ugettext( 'There was an error generating this export. ' 'If the problem persists please report an issue.' ) ], separator=self.separator) ])]) update_progress(i + 1) writer.close() return path
def get_export_files(self, format=None, previous_export=None, filter=None, process=None, max_column_size=None, apply_transforms=True, limit=0, **kwargs): from couchexport.export import get_writer, get_formatted_rows if not format: format = self.default_format or Format.XLS_2007 config, updated_schema, export_schema_checkpoint = self.get_export_components( previous_export, filter) # transform docs onto output and save writer = get_writer(format) # open the doc and the headers formatted_headers = list(self.get_table_headers()) fd, path = tempfile.mkstemp() if six.PY2: path = path.decode('utf-8') with os.fdopen(fd, 'wb') as tmp: writer.open(formatted_headers, tmp, max_column_size=max_column_size, table_titles=dict([(table.index, table.display) for table in self.tables if table.display])) total_docs = len(config.potentially_relevant_ids) if process: DownloadBase.set_progress(process, 0, total_docs) for i, doc in config.enum_docs(): if limit and i > limit: break if self.transform and apply_transforms: doc = self.transform(doc) formatted_tables = self.trim(get_formatted_rows(doc, updated_schema, separator="."), doc, apply_transforms=apply_transforms) writer.write(formatted_tables) if process: DownloadBase.set_progress(process, i + 1, total_docs) writer.close() if format == Format.PYTHON_DICT: return writer.get_preview() return ExportFiles(path, export_schema_checkpoint, format)
def test(self): doc = {'gender': 'boy'} schema = {'gender': {'': 'string', 'gender': 'string'}} formatted_rows = get_formatted_rows(doc, schema, '.') headers = formatted_rows[0][1][0].get_data() values = formatted_rows[0][1][1].get_data() row_dict = dict(zip(list(headers), list(values))) self.assertEqual(row_dict, { 'id': '0', 'gender.gender': scalar_never_was, 'gender': 'boy' })
def generate_bulk_file(self, update_progress=None): update_progress = update_progress or (lambda x: x) configs = list() schemas = list() checkpoints = list() for export_object in self.export_objects: config, schema, checkpoint = export_object.get_export_components(filter=self.export_filter) configs.append(config) schemas.append(schema) checkpoints.append(checkpoint) writer = get_writer(self.format) # generate the headers for the bulk excel file headers = self.generate_table_headers(schemas, checkpoints) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: writer.open(headers, tmp) # now that the headers are set, lets build the rows for i, config in enumerate(configs): try: for doc in config.get_docs(): if self.export_objects[i].transform: doc = self.export_objects[i].transform(doc) table = get_formatted_rows( doc, schemas[i], separator=self.separator, include_headers=isinstance(self, CustomBulkExport)) if isinstance(self, CustomBulkExport): table = self.export_objects[i].trim(table, doc) if table and table[0]: # if an export only contains data from repeats and a form has no repeats # then the table list will be empty table = self.export_objects[i].parse_tables(table) writer.write(table) except SchemaMismatchException: # fire off a delayed force update to prevent this from happening again rebuild_schemas.delay(self.export_objects[i].index) writer.write( [(self.export_objects[i].table_name, [ FormattedRow([ ugettext( 'There was an error generating this export. ' 'If the problem persists please report an issue.' )], separator=self.separator) ])] ) update_progress(i+1) writer.close() return path
def get_export_files(self, format=None, previous_export=None, filter=None, process=None, max_column_size=None, apply_transforms=True, limit=0, **kwargs): from couchexport.export import get_writer, get_formatted_rows if not format: format = self.default_format or Format.XLS_2007 config, updated_schema, export_schema_checkpoint = self.get_export_components(previous_export, filter) # transform docs onto output and save writer = get_writer(format) # open the doc and the headers formatted_headers = list(self.get_table_headers()) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: writer.open( formatted_headers, tmp, max_column_size=max_column_size, table_titles=dict([ (table.index, table.display) for table in self.tables if table.display ]) ) total_docs = len(config.potentially_relevant_ids) if process: DownloadBase.set_progress(process, 0, total_docs) for i, doc in config.enum_docs(): if limit and i > limit: break if self.transform and apply_transforms: doc = self.transform(doc) formatted_tables = self.trim( get_formatted_rows(doc, updated_schema, separator="."), doc, apply_transforms=apply_transforms ) writer.write(formatted_tables) if process: DownloadBase.set_progress(process, i + 1, total_docs) writer.close() if format == Format.PYTHON_DICT: return writer.get_preview() return ExportFiles(path, export_schema_checkpoint, format)
def test(self): doc = { 'gender': 'boy' } schema = { 'gender': { '': 'string', 'gender': 'string' } } formatted_rows = get_formatted_rows(doc, schema, '.') headers = formatted_rows[0][1][0].get_data() values = formatted_rows[0][1][1].get_data() row_dict = dict(zip(list(headers), list(values))) self.assertEqual( row_dict, { 'id': '0', 'gender.gender': scalar_never_was, 'gender': 'boy' } )