def test_get_properly_wrapped_export_instance(self): instance = get_properly_wrapped_export_instance( self.form_instance_daily_saved._id) self.assertEqual(type(instance), type(self.form_instance_daily_saved)) instance = get_properly_wrapped_export_instance(self.case_instance._id) self.assertEqual(type(instance), type(self.case_instance))
def handle(self, export_id, **options): export_instance = get_properly_wrapped_export_instance(export_id) if export_instance.type != FORM_EXPORT: raise CommandError("Unsupported export type: %s" % export_instance.type) filters = export_instance.get_filters() if any(isinstance(filter_, FormExportFilterBuilder.date_filter_class) for filter_ in filters): raise CommandError("Export already has a date filter and so must be fully rebuilt.") export_archive_path = download_export(export_instance, download_path=options.get('download_path')) last_run_meta = get_last_run_meta(export_instance, export_archive_path) last_form_id, last_form_received_on, last_page_number = last_run_meta print("Exporting data since '%s'" % last_form_received_on) filters.append(FormExportFilterBuilder.date_filter_class(gt=last_form_received_on)) if last_form_id: filters.append(NOT(TermFilter('_id', last_form_id))) total_docs = get_export_size(export_instance, filters) exporter = MultiprocessExporter( export_instance, total_docs, options['processes'], existing_archive_path=options['download_path'], keep_file=True ) paginator = OutputPaginator(export_id, last_page_number + 1) logger.info('Starting data dump of {} docs'.format(total_docs)) run_multiprocess_exporter(exporter, filters, paginator, 1000000)
def rebuild_export_mutiprocess(export_id, num_processes, page_size=100000): assert num_processes > 0 def _log_page_dumped(paginator): logger.info(' Dump page {} complete: {} docs'.format( paginator.page, paginator.page_size)) export_instance = get_properly_wrapped_export_instance(export_id) filters = export_instance.get_filters() total_docs = get_export_size(export_instance, filters) logger.info('Starting data dump of {} docs'.format(total_docs)) exporter = MultiprocessExporter(export_instance, total_docs, num_processes) paginator = OutputPaginator(export_id) with exporter, paginator: for doc in get_export_documents(export_instance, filters): paginator.write(doc) if paginator.page_size == page_size: _log_page_dumped(paginator) exporter.process_page(paginator.get_result()) paginator.next_page() if paginator.page_size: _log_page_dumped(paginator) exporter.process_page(paginator.get_result()) exporter.wait_till_completion()
def get_exports_page(self, page, limit, my_exports=False): if not self._priv_check(): raise Http404 # Calls self.get_saved_exports and formats each item using self.fmt_export_data brief_exports = sorted(self.get_saved_exports(), key=lambda x: x['name']) if toggles.EXPORT_OWNERSHIP.enabled(self.domain): def _can_view(e, user_id): if not hasattr(e, 'owner_id'): return True return e['sharing'] != SharingOption.PRIVATE or e[ 'owner_id'] == user_id brief_exports = [ export for export in brief_exports if _can_view(export, self.request.couch_user.user_id) and ( 'owner_id' in export and export['owner_id'] == self.request.couch_user.user_id) == my_exports ] if self.is_deid: brief_exports = [x for x in brief_exports if x['is_deidentified']] docs = [ self.fmt_export_data(get_properly_wrapped_export_instance( e['_id'])) for e in brief_exports[limit * (page - 1):limit * page] ] return (docs, len(brief_exports))
def handle(self, export_id, **options): path = options.pop('path') if not os.path.isfile(path): raise CommandError("File not found: {}".format(path)) export_instance = get_properly_wrapped_export_instance(export_id) with open(path, 'rb') as payload: save_export_payload(export_instance, payload)
def rebuild_export_task(export_instance_id, last_access_cutoff=None, filter=None): keys = ['rebuild_export_task_%s' % export_instance_id] timeout = 48 * 3600 # long enough to make sure this doesn't get called while another one is running with CriticalSection(keys, timeout=timeout, block=False) as locked_section: if locked_section.success(): export_instance = get_properly_wrapped_export_instance(export_instance_id) if should_rebuild_export(export_instance, last_access_cutoff): rebuild_export(export_instance, filter)
def handle(self, export_id, **options): path = options.pop('path') if not os.path.isfile(path): raise CommandError("File not found: {}".format(path)) export_instance = get_properly_wrapped_export_instance(export_id) with open(path, 'rb') as payload: save_export_payload(export_instance, payload)
def _record_export_duration(duration, export): export.last_build_duration = duration try: export.save() except ResourceConflict: export = get_properly_wrapped_export_instance(export.get_id) export.last_build_duration = duration export.save()
def _record_export_duration(duration, export): export.last_build_duration = duration try: export.save() except ResourceConflict: export = get_properly_wrapped_export_instance(export.get_id) export.last_build_duration = duration export.save()
def download_daily_saved_export(req, domain, export_instance_id): with CriticalSection( ['export-last-accessed-{}'.format(export_instance_id)]): try: export_instance = get_properly_wrapped_export_instance( export_instance_id) except ResourceNotFound: raise Http404(_("Export not found")) assert domain == export_instance.domain if export_instance.export_format == "html": if not domain_has_privilege(domain, EXCEL_DASHBOARD): raise Http404 elif export_instance.is_daily_saved_export: if not domain_has_privilege(domain, DAILY_SAVED_EXPORT): raise Http404 if not export_instance.filters.is_location_safe_for_user(req): return location_restricted_response(req) if not can_download_daily_saved_export(export_instance, domain, req.couch_user): raise Http404 if export_instance.export_format == "html": message = "Download Excel Dashboard" else: message = "Download Saved Export" track_workflow(req.couch_user.username, message, properties={ 'domain': domain, 'is_dimagi': req.couch_user.is_dimagi }) if should_update_export(export_instance.last_accessed): try: rebuild_saved_export(export_instance_id, manual=False) except Exception: notify_exception( req, 'Failed to rebuild export during download', { 'export_instance_id': export_instance_id, 'domain': domain, }, ) export_instance.last_accessed = datetime.utcnow() export_instance.save() payload = export_instance.get_payload(stream=True) format = Format.from_format(export_instance.export_format) return get_download_response(payload, export_instance.file_size, format, export_instance.filename, req)
def rebuild_export_mutiprocess(export_id, num_processes, page_size=100000): assert num_processes > 0 export_instance = get_properly_wrapped_export_instance(export_id) filters = export_instance.get_filters() total_docs = get_export_size(export_instance, filters) exporter = MultiprocessExporter(export_instance, total_docs, num_processes) paginator = OutputPaginator(export_id) logger.info('Starting data dump of {} docs'.format(total_docs)) run_multiprocess_exporter(exporter, filters, paginator, page_size)
def toggle_saved_export_enabled(request, domain): permissions = ExportsPermissionsManager(request.GET.get('model_type'), domain, request.couch_user) permissions.access_list_exports_or_404(is_deid=json.loads(request.POST.get('is_deid'))) export_instance_id = request.POST.get('export_id') export_instance = get_properly_wrapped_export_instance(export_instance_id) export_instance.auto_rebuild_enabled = not json.loads(request.POST.get('is_auto_rebuild_enabled')) export_instance.save() return json_response({ 'success': True, 'isAutoRebuildEnabled': export_instance.auto_rebuild_enabled })
def toggle_saved_export_enabled(request, domain): permissions = ExportsPermissionsManager(request.GET.get('model_type'), domain, request.couch_user) permissions.access_list_exports_or_404(is_deid=json.loads(request.POST.get('is_deid'))) export_instance_id = request.POST.get('export_id') export_instance = get_properly_wrapped_export_instance(export_instance_id) export_instance.auto_rebuild_enabled = not json.loads(request.POST.get('is_auto_rebuild_enabled')) export_instance.save() return json_response({ 'success': True, 'isAutoRebuildEnabled': export_instance.auto_rebuild_enabled })
def commit_filters(request, domain): permissions = ExportsPermissionsManager(request.POST.get('model_type'), domain, request.couch_user) if not permissions.has_edit_permissions: raise Http404 export_id = request.POST.get('export_id') form_data = json.loads(request.POST.get('form_data')) export = get_properly_wrapped_export_instance(export_id) if export.is_daily_saved_export and not domain_has_privilege( domain, DAILY_SAVED_EXPORT): raise Http404 if export.export_format == "html" and not domain_has_privilege( domain, EXCEL_DASHBOARD): raise Http404 if export.is_odata_config and not domain_has_privilege(domain, ODATA_FEED): raise Http404 if not export.filters.is_location_safe_for_user(request): return location_restricted_response(request) domain_object = Domain.get_by_name(domain) filter_form = DashboardFeedFilterForm(domain_object, form_data, couch_user=request.couch_user) if filter_form.is_valid(): old_can_access_all_locations = export.filters.can_access_all_locations old_accessible_location_ids = export.filters.accessible_location_ids filters = filter_form.to_export_instance_filters( # using existing location restrictions prevents a less restricted user from modifying # restrictions on an export that a more restricted user created (which would mean the more # restricted user would lose access to the export) old_can_access_all_locations, old_accessible_location_ids, export.type) if export.filters != filters: export.filters = filters export.save() if export.is_daily_saved_export: rebuild_saved_export(export_id, manual=True) return json_response({ 'success': True, 'locationRestrictions': ExportListHelper.get_location_restriction_names( export.filters.accessible_location_ids), }) else: return json_response({ 'success': False, 'error': _("Problem saving dashboard feed filters: Invalid form"), })
def download_daily_saved_export(req, domain, export_instance_id): with CriticalSection(['export-last-accessed-{}'.format(export_instance_id)]): try: export_instance = get_properly_wrapped_export_instance(export_instance_id) except ResourceNotFound: raise Http404(_("Export not found")) assert domain == export_instance.domain if export_instance.export_format == "html": if not domain_has_privilege(domain, EXCEL_DASHBOARD): raise Http404 elif export_instance.is_daily_saved_export: if not domain_has_privilege(domain, DAILY_SAVED_EXPORT): raise Http404 if not export_instance.filters.is_location_safe_for_user(req): return location_restricted_response(req) if not can_download_daily_saved_export(export_instance, domain, req.couch_user): raise Http404 if export_instance.export_format == "html": message = "Download Excel Dashboard" else: message = "Download Saved Export" track_workflow(req.couch_user.username, message, properties={ 'domain': domain, 'is_dimagi': req.couch_user.is_dimagi }) if should_update_export(export_instance.last_accessed): try: rebuild_saved_export(export_instance_id, manual=False) except Exception: notify_exception( req, 'Failed to rebuild export during download', { 'export_instance_id': export_instance_id, 'domain': domain, }, ) export_instance.last_accessed = datetime.utcnow() export_instance.save() payload = export_instance.get_payload(stream=True) format = Format.from_format(export_instance.export_format) return get_download_response(payload, export_instance.file_size, format, export_instance.filename, req)
def download_saved_export(export_id, dest_dir=None): # Downloads the latest saved export to shared-directory dest_dir = (dest_dir or settings.SHARED_DRIVE_ROOT).rstrip() export_instance = get_properly_wrapped_export_instance(export_id) export_archive_path = '{}/{}_{}.zip'.format( dest_dir, safe_filename(export_instance.name.encode('ascii', 'replace') or 'Export'), datetime.utcnow().isoformat() ) payload = export_instance.get_payload(stream=True) print("Downloading Export to {}".format(export_archive_path)) with open(export_archive_path, 'wb') as download: shutil.copyfileobj(payload, download) print("Download Finished!")
def download_saved_export(export_id, dest_dir=None): # Downloads the latest saved export to shared-directory dest_dir = (dest_dir or settings.SHARED_DRIVE_ROOT).rstrip() export_instance = get_properly_wrapped_export_instance(export_id) export_archive_path = '{}/{}_{}.zip'.format( dest_dir, safe_filename( export_instance.name.encode('ascii', 'replace') or 'Export'), datetime.utcnow().isoformat()) payload = export_instance.get_payload(stream=True) print("Downloading Export to {}".format(export_archive_path)) with open(export_archive_path, 'w') as download: shutil.copyfileobj(payload, download) print("Download Finished!")
def handle(self, **options): if __debug__: raise CommandError("You should run this with 'python -O'") export_id = options.pop('export_id') export_archive_path = options.pop('export_path') processes = options.pop('processes') force_upload = options.pop('force_upload') export_instance = get_properly_wrapped_export_instance(export_id) if not export_archive_path or not os.path.exists(export_archive_path): confirm = input(""" No export archive provided. Do you want to download the latest one? [y/N] """) if not confirm == "y": raise CommandError( "Export path missing: {}".format(export_archive_path)) export_archive_path = self._download_export(export_instance) extract_to = tempfile.mkdtemp() total_docs, unprocessed_pages = self._get_unprocessed_pages( export_archive_path, extract_to) print('{} pages still to process'.format(len(unprocessed_pages))) exporter = MultiprocessExporter(export_instance, total_docs, processes) error_pages, successful_pages = self._process_pages( exporter, unprocessed_pages) final_path = self.compile_final_zip(error_pages, export_archive_path, export_instance, successful_pages) if force_upload or not error_pages: print('Uploading final archive', '(forced)' if force_upload and error_pages else '') exporter.upload(final_path, clean=not error_pages) else: print( self.style.ERROR( 'Not all pages processed successfully.\n' 'You can re-run the command on the final archive to try again: {}\n' 'NOTE: final archive not uploaded. ' 'Use --force-upload to upload even with errors'.format( final_path))) shutil.rmtree(extract_to) self.stdout.write( self.style.SUCCESS('Rebuild Complete and payload uploaded'))
def handle(self, **options): if __debug__: raise CommandError("You should run this with 'python -O'") export_id = options.pop('export_id') export_archive_path = options.pop('export_path') processes = options.pop('processes') force_upload = options.pop('force_upload') export_instance = get_properly_wrapped_export_instance(export_id) if not export_archive_path or not os.path.exists(export_archive_path): confirm = input( """ No export archive provided. Do you want to download the latest one? [y/N] """ ) if not confirm == "y": raise CommandError("Export path missing: {}".format(export_archive_path)) export_archive_path = self._download_export(export_instance) extract_to = tempfile.mkdtemp() total_docs, unprocessed_pages = self._get_unprocessed_pages(export_archive_path, extract_to) print('{} pages still to process'.format(len(unprocessed_pages))) exporter = MultiprocessExporter(export_instance, total_docs, processes) error_pages, successful_pages = self._process_pages( exporter, unprocessed_pages ) final_path = self.compile_final_zip( error_pages, export_archive_path, export_instance, successful_pages ) if force_upload or not error_pages: print('Uploading final archive', '(forced)' if force_upload and error_pages else '') exporter.upload(final_path, clean=not error_pages) else: print(self.style.ERROR( 'Not all pages processed successfully.\n' 'You can re-run the command on the final archive to try again: {}\n' 'NOTE: final archive not uploaded. ' 'Use --force-upload to upload even with errors'.format(final_path)) ) shutil.rmtree(extract_to) self.stdout.write(self.style.SUCCESS('Rebuild Complete and payload uploaded'))
def get(self, request, domain, export_instance_id): export_instance = get_properly_wrapped_export_instance(export_instance_id) assert domain == export_instance.domain output_file = BytesIO() try: generate_from_export_instance(export_instance, output_file) except DETConfigError as e: return HttpResponse(_('Sorry, something went wrong creating that file: {error}').format(error=e)) output_file.seek(0) response = HttpResponse( output_file, content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', ) response['Content-Disposition'] = f'attachment; filename="{export_instance.name}-DET.xlsx"' return response
def get(self, request, domain, export_id, *args, **kwargs): try: export = get_properly_wrapped_export_instance(export_id) except ResourceNotFound: messages.error(request, _('You can only copy new exports.')) else: new_export = export.copy_export() if toggles.EXPORT_OWNERSHIP.enabled(domain): new_export.owner_id = request.couch_user.user_id new_export.sharing = SharingOption.PRIVATE new_export.save() messages.success( request, format_html(_("Export <strong>{}</strong> created."), new_export.name) ) redirect = request.GET.get('next', reverse('data_interfaces_default', args=[domain])) return HttpResponseRedirect(redirect)
def get(self, request, domain, export_id, *args, **kwargs): try: export = get_properly_wrapped_export_instance(export_id) except ResourceNotFound: messages.error(request, _('You can only copy new exports.')) else: new_export = export.copy_export() if toggles.EXPORT_OWNERSHIP.enabled(domain): new_export.owner_id = request.couch_user.user_id new_export.sharing = SharingOption.PRIVATE new_export.save() messages.success( request, mark_safe( _("Export <strong>{}</strong> created.").format( new_export.name ) ) ) redirect = request.GET.get('next', reverse('data_interfaces_default', args=[domain])) return HttpResponseRedirect(redirect)
def commit_filters(request, domain): permissions = ExportsPermissionsManager(request.POST.get('model_type'), domain, request.couch_user) if not permissions.has_edit_permissions: raise Http404 export_id = request.POST.get('export_id') form_data = json.loads(request.POST.get('form_data')) export = get_properly_wrapped_export_instance(export_id) if export.is_daily_saved_export and not domain_has_privilege(domain, DAILY_SAVED_EXPORT): raise Http404 if export.export_format == "html" and not domain_has_privilege(domain, EXCEL_DASHBOARD): raise Http404 if not export.filters.is_location_safe_for_user(request): return location_restricted_response(request) domain_object = Domain.get_by_name(domain) filter_form = DashboardFeedFilterForm(domain_object, form_data) if filter_form.is_valid(): old_can_access_all_locations = export.filters.can_access_all_locations old_accessible_location_ids = export.filters.accessible_location_ids filters = filter_form.to_export_instance_filters( # using existing location restrictions prevents a less restricted user from modifying # restrictions on an export that a more restricted user created (which would mean the more # restricted user would lose access to the export) old_can_access_all_locations, old_accessible_location_ids, export.type ) if export.filters != filters: export.filters = filters export.save() rebuild_saved_export(export_id, manual=True) return json_response({ 'success': True, }) else: return json_response({ 'success': False, 'error': _("Problem saving dashboard feed filters: Invalid form"), })
def get_exports_page(self, page, limit, my_exports=False): if not self._priv_check(): raise Http404 # Calls self.get_saved_exports and formats each item using self.fmt_export_data brief_exports = sorted(self.get_saved_exports(), key=lambda x: x['name']) if toggles.EXPORT_OWNERSHIP.enabled(self.domain): def _can_view(e, user_id): if not hasattr(e, 'owner_id'): return True return e['sharing'] != SharingOption.PRIVATE or e['owner_id'] == user_id brief_exports = [ export for export in brief_exports if _can_view(export, self.request.couch_user.user_id) and ('owner_id' in export and export['owner_id'] == self.request.couch_user.user_id) == my_exports ] if self.is_deid: brief_exports = [x for x in brief_exports if x['is_deidentified']] docs = [self.fmt_export_data(get_properly_wrapped_export_instance(e['_id'])) for e in brief_exports[limit * (page - 1):limit * page]] return (docs, len(brief_exports))
def rebuild_export_task(export_instance_id, last_access_cutoff=None, filter=None): export_instance = get_properly_wrapped_export_instance(export_instance_id) rebuild_export(export_instance, last_access_cutoff, filter)
def test_get_properly_wrapped_export_instance(self): instance = get_properly_wrapped_export_instance(self.form_instance_daily_saved._id) self.assertEqual(type(instance), type(self.form_instance_daily_saved)) instance = get_properly_wrapped_export_instance(self.case_instance._id) self.assertEqual(type(instance), type(self.case_instance))
def export_instance(self): return get_properly_wrapped_export_instance(self.export_instance_id)