def handle(self, export_id, **options):
        export_instance = get_properly_wrapped_export_instance(export_id)

        if export_instance.type != FORM_EXPORT:
            raise CommandError("Unsupported export type: %s" % export_instance.type)

        filters = export_instance.get_filters()
        if any(isinstance(filter_, FormExportFilterBuilder.date_filter_class) for filter_ in filters):
            raise CommandError("Export already has a date filter and so must be fully rebuilt.")

        export_archive_path = download_export(export_instance, download_path=options.get('download_path'))
        last_run_meta = get_last_run_meta(export_instance, export_archive_path)
        last_form_id, last_form_received_on, last_page_number = last_run_meta

        print("Exporting data since '%s'" % last_form_received_on)
        filters.append(FormExportFilterBuilder.date_filter_class(gt=last_form_received_on))
        if last_form_id:
            filters.append(NOT(TermFilter('_id', last_form_id)))
        total_docs = get_export_size(export_instance, filters)
        exporter = MultiprocessExporter(
            export_instance, total_docs, options['processes'],
            existing_archive_path=options['download_path'], keep_file=True
        )
        paginator = OutputPaginator(export_id, last_page_number + 1)

        logger.info('Starting data dump of {} docs'.format(total_docs))
        run_multiprocess_exporter(exporter, filters, paginator, 1000000)
Esempio n. 2
0
def rebuild_export_mutiprocess(export_id, num_processes, page_size=100000):
    assert num_processes > 0

    def _log_page_dumped(paginator):
        logger.info('  Dump page {} complete: {} docs'.format(
            paginator.page, paginator.page_size))

    export_instance = get_properly_wrapped_export_instance(export_id)
    filters = export_instance.get_filters()
    total_docs = get_export_size(export_instance, filters)

    logger.info('Starting data dump of {} docs'.format(total_docs))
    exporter = MultiprocessExporter(export_instance, total_docs, num_processes)
    paginator = OutputPaginator(export_id)
    with exporter, paginator:
        for doc in get_export_documents(export_instance, filters):
            paginator.write(doc)
            if paginator.page_size == page_size:
                _log_page_dumped(paginator)
                exporter.process_page(paginator.get_result())
                paginator.next_page()
        if paginator.page_size:
            _log_page_dumped(paginator)
            exporter.process_page(paginator.get_result())

    exporter.wait_till_completion()
Esempio n. 3
0
    def send_preparation_analytics(self, export_instances, export_filters):
        send_hubspot_form(HUBSPOT_DOWNLOADED_EXPORT_FORM_ID, self.request)

        track_workflow(self.request.couch_user.username, 'Downloaded {} Exports With {}Data'.format(
            self.model[0].upper() + self.model[1:],
            '' if any(get_export_size(instance, export_filters) > 0 for instance in export_instances) else 'No ',
        ))
Esempio n. 4
0
    def send_preparation_analytics(self, export_instances, export_filters):
        send_hubspot_form(HUBSPOT_DOWNLOADED_EXPORT_FORM_ID, self.request)

        track_workflow(self.request.couch_user.username, 'Downloaded {} Exports With {}Data'.format(
            self.model[0].upper() + self.model[1:],
            '' if any(get_export_size(instance, export_filters) > 0 for instance in export_instances) else 'No ',
        ))
Esempio n. 5
0
def _check_export_size(domain, export_instances, export_filters):
    count = 0
    for instance in export_instances:
        count += get_export_size(instance, export_filters)
    if count > MAX_EXPORTABLE_ROWS and not PAGINATED_EXPORTS.enabled(domain):
        raise ExportAsyncException(
            _("This export contains %(row_count)s rows. Please change the "
              "filters to be less than %(max_rows)s rows.") % {
                  'row_count': count,
                  'max_rows': MAX_EXPORTABLE_ROWS
              })
Esempio n. 6
0
def rebuild_export_mutiprocess(export_id, num_processes, page_size=100000):
    assert num_processes > 0

    export_instance = get_properly_wrapped_export_instance(export_id)
    filters = export_instance.get_filters()
    total_docs = get_export_size(export_instance, filters)
    exporter = MultiprocessExporter(export_instance, total_docs, num_processes)
    paginator = OutputPaginator(export_id)

    logger.info('Starting data dump of {} docs'.format(total_docs))
    run_multiprocess_exporter(exporter, filters, paginator, page_size)
Esempio n. 7
0
def _check_export_size(domain, export_instances, export_filters):
    count = 0
    for instance in export_instances:
        count += get_export_size(instance, export_filters)
    if count > MAX_EXPORTABLE_ROWS and not PAGINATED_EXPORTS.enabled(domain):
        raise ExportAsyncException(
            _("This export contains %(row_count)s rows. Please change the "
              "filters to be less than %(max_rows)s rows.") % {
                'row_count': count,
                'max_rows': MAX_EXPORTABLE_ROWS
            }
        )