def get_export_files(self, format='', previous_export_id=None, filter=None, use_cache=True, max_column_size=2000, separator='|', process=None, **kwargs): # the APIs of how these methods are broken down suck, but at least # it's DRY from couchexport.export import get_writer, get_export_components, get_headers, get_formatted_rows from django.core.cache import cache import hashlib export_tag = self.index CACHE_TIME = 1 * 60 * 60 # cache for 1 hour, in seconds def _build_cache_key(tag, prev_export_id, format, max_column_size): def _human_readable_key(tag, prev_export_id, format, max_column_size): return "couchexport_:%s:%s:%s:%s" % (tag, prev_export_id, format, max_column_size) return hashlib.md5(_human_readable_key(tag, prev_export_id, format, max_column_size)).hexdigest() # check cache, only supported for filterless queries, currently cache_key = _build_cache_key(export_tag, previous_export_id, format, max_column_size) if use_cache and filter is None: cached_data = cache.get(cache_key) if cached_data: (tmp, checkpoint) = cached_data return ExportFiles(tmp, checkpoint) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: schema_index = export_tag config, updated_schema, export_schema_checkpoint = get_export_components(schema_index, previous_export_id, filter) if config: writer = get_writer(format) # get cleaned up headers formatted_headers = self.remap_tables(get_headers(updated_schema, separator=separator)) writer.open(formatted_headers, tmp, max_column_size=max_column_size) total_docs = len(config.potentially_relevant_ids) if process: DownloadBase.set_progress(process, 0, total_docs) for i, doc in config.enum_docs(): if self.transform: doc = self.transform(doc) writer.write(self.remap_tables(get_formatted_rows( doc, updated_schema, include_headers=False, separator=separator))) if process: DownloadBase.set_progress(process, i + 1, total_docs) writer.close() checkpoint = export_schema_checkpoint if checkpoint: if use_cache: cache.set(cache_key, (path, checkpoint), CACHE_TIME) return ExportFiles(path, checkpoint) return None
def get_export_files(self, format=None, previous_export=None, filter=None, process=None, max_column_size=None, apply_transforms=True, limit=0, **kwargs): from couchexport.export import get_writer, get_formatted_rows if not format: format = self.default_format or Format.XLS_2007 config, updated_schema, export_schema_checkpoint = self.get_export_components( previous_export, filter) # transform docs onto output and save writer = get_writer(format) # open the doc and the headers formatted_headers = list(self.get_table_headers()) fd, path = tempfile.mkstemp() if six.PY2: path = path.decode('utf-8') with os.fdopen(fd, 'wb') as tmp: writer.open(formatted_headers, tmp, max_column_size=max_column_size, table_titles=dict([(table.index, table.display) for table in self.tables if table.display])) total_docs = len(config.potentially_relevant_ids) if process: DownloadBase.set_progress(process, 0, total_docs) for i, doc in config.enum_docs(): if limit and i > limit: break if self.transform and apply_transforms: doc = self.transform(doc) formatted_tables = self.trim(get_formatted_rows(doc, updated_schema, separator="."), doc, apply_transforms=apply_transforms) writer.write(formatted_tables) if process: DownloadBase.set_progress(process, i + 1, total_docs) writer.close() if format == Format.PYTHON_DICT: return writer.get_preview() return ExportFiles(path, export_schema_checkpoint, format)