def download_data(self, format="", previous_export=None, filter=None): """ If there is data, return an HTTPResponse with the appropriate data. If there is not data returns None. """ from couchexport.shortcuts import export_response from couchexport.export import get_writer, get_schema_new, format_tables, create_intermediate_tables if not format: format = self.default_format or Format.XLS_2007 from couchexport.export import ExportConfiguration database = get_db() config = ExportConfiguration(database, self.index, previous_export, util.intersect_filters(self.filter, filter)) # get and checkpoint the latest schema updated_schema = get_schema_new(config) export_schema_checkpoint = ExportSchema( seq=config.current_seq, schema=updated_schema, index=config.schema_index ) export_schema_checkpoint.save() # transform docs onto output and save writer = get_writer(format) # open the doc and the headers formatted_headers = self.get_table_headers() tmp = StringIO() writer.open(formatted_headers, tmp) for doc in config.get_docs(): writer.write(self.trim(format_tables(create_intermediate_tables(doc, updated_schema), separator="."))) writer.close() return export_response(tmp, format, self.name)
def generate_bulk_file(self): configs = list() schemas = list() checkpoints = list() for export_object in self.export_objects: config, schema, checkpoint = export_object.get_export_components(filter=self.export_filter) configs.append(config) schemas.append(schema) checkpoints.append(checkpoint) writer = get_writer(self.format) # generate the headers for the bulk excel file headers = self.generate_table_headers(schemas, checkpoints) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: writer.open(headers, tmp) # now that the headers are set, lets build the rows for i, config in enumerate(configs): for doc in config.get_docs(): if self.export_objects[i].transform: doc = self.export_objects[i].transform(doc) table = format_tables(create_intermediate_tables(doc, schemas[i]), include_headers=isinstance(self, CustomBulkExport), separator=self.separator) if isinstance(self, CustomBulkExport): table = self.export_objects[i].trim(table, doc) table = self.export_objects[i].parse_tables(table) writer.write(table) writer.close() return path
def get_export_files(self, format='', previous_export_id=None, filter=None, use_cache=True, max_column_size=2000, separator='|', process=None, **kwargs): # the APIs of how these methods are broken down suck, but at least # it's DRY from couchexport.export import get_writer, format_tables, create_intermediate_tables, get_export_components, get_headers from django.core.cache import cache import hashlib export_tag = self.index CACHE_TIME = 1 * 60 * 60 # cache for 1 hour, in seconds def _build_cache_key(tag, prev_export_id, format, max_column_size): def _human_readable_key(tag, prev_export_id, format, max_column_size): return "couchexport_:%s:%s:%s:%s" % (tag, prev_export_id, format, max_column_size) return hashlib.md5(_human_readable_key(tag, prev_export_id, format, max_column_size)).hexdigest() # check cache, only supported for filterless queries, currently cache_key = _build_cache_key(export_tag, previous_export_id, format, max_column_size) if use_cache and filter is None: cached_data = cache.get(cache_key) if cached_data: (tmp, checkpoint) = cached_data return ExportFiles(tmp, checkpoint) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: schema_index = export_tag config, updated_schema, export_schema_checkpoint = get_export_components(schema_index, previous_export_id, filter) if config: writer = get_writer(format) # get cleaned up headers formatted_headers = self.remap_tables(get_headers(updated_schema, separator=separator)) writer.open(formatted_headers, tmp, max_column_size=max_column_size) total_docs = len(config.potentially_relevant_ids) if process: DownloadBase.set_progress(process, 0, total_docs) for i, doc in config.enum_docs(): if self.transform: doc = self.transform(doc) writer.write(self.remap_tables(format_tables(create_intermediate_tables(doc, updated_schema), include_headers=False, separator=separator))) if process: DownloadBase.set_progress(process, i + 1, total_docs) writer.close() checkpoint = export_schema_checkpoint if checkpoint: if use_cache: cache.set(cache_key, (path, checkpoint), CACHE_TIME) return ExportFiles(path, checkpoint) return None
def get_export_files(self, format=None, previous_export=None, filter=None, process=None, max_column_size=None, apply_transforms=True, limit=0, **kwargs): from couchexport.export import get_writer, format_tables, create_intermediate_tables if not format: format = self.default_format or Format.XLS_2007 config, updated_schema, export_schema_checkpoint = self.get_export_components( previous_export, filter) # transform docs onto output and save writer = get_writer(format) # open the doc and the headers formatted_headers = list(self.get_table_headers()) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: writer.open(formatted_headers, tmp, max_column_size=max_column_size, table_titles=dict([(table.index, table.display) for table in self.tables if table.display])) total_docs = len(config.potentially_relevant_ids) if process: DownloadBase.set_progress(process, 0, total_docs) for i, doc in config.enum_docs(): if limit and i > limit: break if self.transform and apply_transforms: doc = self.transform(doc) formatted_tables = self.trim(format_tables( create_intermediate_tables(doc, updated_schema), separator="."), doc, apply_transforms=apply_transforms) writer.write(formatted_tables) if process: DownloadBase.set_progress(process, i + 1, total_docs) writer.close() return ExportFiles(path, export_schema_checkpoint, format)
def get_export_files(self, format=None, previous_export=None, filter=None, process=None, max_column_size=None, apply_transforms=True, limit=0, **kwargs): from couchexport.export import get_writer, format_tables, create_intermediate_tables if not format: format = self.default_format or Format.XLS_2007 config, updated_schema, export_schema_checkpoint = self.get_export_components(previous_export, filter) # transform docs onto output and save writer = get_writer(format) # open the doc and the headers formatted_headers = list(self.get_table_headers()) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: writer.open( formatted_headers, tmp, max_column_size=max_column_size, table_titles=dict([ (table.index, table.display) for table in self.tables if table.display ]) ) total_docs = len(config.potentially_relevant_ids) if process: DownloadBase.set_progress(process, 0, total_docs) for i, doc in config.enum_docs(): if limit and i > limit: break if self.transform and apply_transforms: doc = self.transform(doc) formatted_tables = self.trim( format_tables( create_intermediate_tables(doc, updated_schema), separator="." ), doc, apply_transforms=apply_transforms ) writer.write(formatted_tables) if process: DownloadBase.set_progress(process, i + 1, total_docs) writer.close() return ExportFiles(path, export_schema_checkpoint, format)
def get_export_files(self, format='', previous_export_id=None, filter=None, use_cache=True, max_column_size=2000, separator='|', process=None, **kwargs): # the APIs of how these methods are broken down suck, but at least # it's DRY from couchexport.export import get_writer, format_tables, create_intermediate_tables, get_export_components, get_headers from django.core.cache import cache import hashlib export_tag = self.index CACHE_TIME = 1 * 60 * 60 # cache for 1 hour, in seconds def _build_cache_key(tag, prev_export_id, format, max_column_size): def _human_readable_key(tag, prev_export_id, format, max_column_size): return "couchexport_:%s:%s:%s:%s" % (tag, prev_export_id, format, max_column_size) return hashlib.md5( _human_readable_key(tag, prev_export_id, format, max_column_size)).hexdigest() # check cache, only supported for filterless queries, currently cache_key = _build_cache_key(export_tag, previous_export_id, format, max_column_size) if use_cache and filter is None: cached_data = cache.get(cache_key) if cached_data: (tmp, checkpoint) = cached_data return ExportFiles(tmp, checkpoint) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: schema_index = export_tag config, updated_schema, export_schema_checkpoint = get_export_components( schema_index, previous_export_id, filter) if config: writer = get_writer(format) # get cleaned up headers formatted_headers = self.remap_tables( get_headers(updated_schema, separator=separator)) writer.open(formatted_headers, tmp, max_column_size=max_column_size) total_docs = len(config.potentially_relevant_ids) if process: DownloadBase.set_progress(process, 0, total_docs) for i, doc in config.enum_docs(): if self.transform: doc = self.transform(doc) writer.write( self.remap_tables( format_tables(create_intermediate_tables( doc, updated_schema), include_headers=False, separator=separator))) if process: DownloadBase.set_progress(process, i + 1, total_docs) writer.close() checkpoint = export_schema_checkpoint if checkpoint: if use_cache: cache.set(cache_key, (path, checkpoint), CACHE_TIME) return ExportFiles(path, checkpoint) return None