def handle(self, export_id, **options): export_instance = get_properly_wrapped_export_instance(export_id) if export_instance.type != FORM_EXPORT: raise CommandError("Unsupported export type: %s" % export_instance.type) filters = export_instance.get_filters() if any(isinstance(filter_, FormExportFilterBuilder.date_filter_class) for filter_ in filters): raise CommandError("Export already has a date filter and so must be fully rebuilt.") export_archive_path = download_export(export_instance, download_path=options.get('download_path')) last_run_meta = get_last_run_meta(export_instance, export_archive_path) last_form_id, last_form_received_on, last_page_number = last_run_meta print("Exporting data since '%s'" % last_form_received_on) filters.append(FormExportFilterBuilder.date_filter_class(gt=last_form_received_on)) if last_form_id: filters.append(NOT(TermFilter('_id', last_form_id))) total_docs = get_export_size(export_instance, filters) exporter = MultiprocessExporter( export_instance, total_docs, options['processes'], existing_archive_path=options['download_path'], keep_file=True ) paginator = OutputPaginator(export_id, last_page_number + 1) logger.info('Starting data dump of {} docs'.format(total_docs)) run_multiprocess_exporter(exporter, filters, paginator, 1000000)
def rebuild_export_mutiprocess(export_id, num_processes, page_size=100000): assert num_processes > 0 def _log_page_dumped(paginator): logger.info(' Dump page {} complete: {} docs'.format( paginator.page, paginator.page_size)) export_instance = get_properly_wrapped_export_instance(export_id) filters = export_instance.get_filters() total_docs = get_export_size(export_instance, filters) logger.info('Starting data dump of {} docs'.format(total_docs)) exporter = MultiprocessExporter(export_instance, total_docs, num_processes) paginator = OutputPaginator(export_id) with exporter, paginator: for doc in get_export_documents(export_instance, filters): paginator.write(doc) if paginator.page_size == page_size: _log_page_dumped(paginator) exporter.process_page(paginator.get_result()) paginator.next_page() if paginator.page_size: _log_page_dumped(paginator) exporter.process_page(paginator.get_result()) exporter.wait_till_completion()
def send_preparation_analytics(self, export_instances, export_filters): send_hubspot_form(HUBSPOT_DOWNLOADED_EXPORT_FORM_ID, self.request) track_workflow(self.request.couch_user.username, 'Downloaded {} Exports With {}Data'.format( self.model[0].upper() + self.model[1:], '' if any(get_export_size(instance, export_filters) > 0 for instance in export_instances) else 'No ', ))
def send_preparation_analytics(self, export_instances, export_filters): send_hubspot_form(HUBSPOT_DOWNLOADED_EXPORT_FORM_ID, self.request) track_workflow(self.request.couch_user.username, 'Downloaded {} Exports With {}Data'.format( self.model[0].upper() + self.model[1:], '' if any(get_export_size(instance, export_filters) > 0 for instance in export_instances) else 'No ', ))
def _check_export_size(domain, export_instances, export_filters): count = 0 for instance in export_instances: count += get_export_size(instance, export_filters) if count > MAX_EXPORTABLE_ROWS and not PAGINATED_EXPORTS.enabled(domain): raise ExportAsyncException( _("This export contains %(row_count)s rows. Please change the " "filters to be less than %(max_rows)s rows.") % { 'row_count': count, 'max_rows': MAX_EXPORTABLE_ROWS })
def rebuild_export_mutiprocess(export_id, num_processes, page_size=100000): assert num_processes > 0 export_instance = get_properly_wrapped_export_instance(export_id) filters = export_instance.get_filters() total_docs = get_export_size(export_instance, filters) exporter = MultiprocessExporter(export_instance, total_docs, num_processes) paginator = OutputPaginator(export_id) logger.info('Starting data dump of {} docs'.format(total_docs)) run_multiprocess_exporter(exporter, filters, paginator, page_size)
def _check_export_size(domain, export_instances, export_filters): count = 0 for instance in export_instances: count += get_export_size(instance, export_filters) if count > MAX_EXPORTABLE_ROWS and not PAGINATED_EXPORTS.enabled(domain): raise ExportAsyncException( _("This export contains %(row_count)s rows. Please change the " "filters to be less than %(max_rows)s rows.") % { 'row_count': count, 'max_rows': MAX_EXPORTABLE_ROWS } )