def on_dataset_delete(sender, **kwargs): """ When a Dataset is deleted, purge its data and metadata from Solr. """ dataset = kwargs["instance"] PurgeDataTask.apply_async(args=[dataset.slug]) solr.delete(settings.SOLR_DATASETS_CORE, "slug:%s" % dataset.slug)
def delete(self, *args, **kwargs): """ Cancel any in progress task. """ # Cancel import if necessary if self.current_task: self.current_task.request_abort() # Cleanup data in Solr PurgeDataTask.apply_async(args=[self.slug]) solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % self.slug) super(Dataset, self).delete(*args, **kwargs)
def delete(self, *args, **kwargs): """ Cancel any in progress task. """ # Cancel import if necessary if self.current_task: self.current_task.request_abort() # Manually delete related uploads so their delete method is called for upload in chain(self.data_uploads.all(), self.related_uploads.all()): upload.delete(skip_purge=True) # Cleanup data in Solr PurgeDataTask.apply_async(args=[self.slug]) solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % self.slug) super(Dataset, self).delete(*args, **kwargs)
def delete(self, *args, **kwargs): """ Cancel any in progress task. """ skip_purge = kwargs.pop('skip_purge', False) # Update related datasets so deletes will not cascade if self.initial_upload_for.count(): for dataset in self.initial_upload_for.all(): dataset.initial_upload = None dataset.save() # Cleanup data in Solr if self.dataset and self.imported and not skip_purge: PurgeDataTask.apply_async(args=[self.dataset.slug, self.id]) super(DataUpload, self).delete(*args, **kwargs)
def delete(self, *args, **kwargs): """ Cancel any in progress task. """ # Cancel import if necessary if self.current_task: self.current_task.request_abort() # Manually delete related uploads so their delete method is called for upload in self.data_uploads.all(): upload.delete(skip_purge=True, force=True) for upload in self.related_uploads.all(): upload.delete() # Cleanup data in Solr PurgeDataTask.apply_async(args=[self.slug]) solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % self.slug) super(Dataset, self).delete(*args, **kwargs)
def delete(self, *args, **kwargs): """ Cancel any in progress task. """ skip_purge = kwargs.pop('skip_purge', False) force = kwargs.pop('force', False) # Don't allow deletion of dated uploads unless forced if not self.deletable and not force: raise DataUploadNotDeletable('This data upload was created before deleting individual data uploads was supported. In order to delete it you must delete the entire dataset.') # Update related datasets so deletes will not cascade if self.initial_upload_for.count(): for dataset in self.initial_upload_for.all(): dataset.initial_upload = None dataset.save() # Cleanup data in Solr if self.dataset and self.imported and not skip_purge: PurgeDataTask.apply_async(args=[self.dataset.slug, self.id]) super(DataUpload, self).delete(*args, **kwargs)
def delete(self, *args, **kwargs): """ Cancel any in progress task. """ skip_purge = kwargs.pop('skip_purge', False) force = kwargs.pop('force', False) # Don't allow deletion of dated uploads unless forced if not self.deletable and not force: raise DataUploadNotDeletable( 'This data upload was created before deleting individual data uploads was supported. In order to delete it you must delete the entire dataset.' ) # Update related datasets so deletes will not cascade if self.initial_upload_for.count(): for dataset in self.initial_upload_for.all(): dataset.initial_upload = None dataset.save() # Cleanup data in Solr if self.dataset and self.imported and not skip_purge: PurgeDataTask.apply_async(args=[self.dataset.slug, self.id]) super(DataUpload, self).delete(*args, **kwargs)
def search_all_data(self, request, **kwargs): """ List endpoint using Solr. Provides full-text search via the "q" parameter." """ self.method_check(request, allowed=["get"]) self.is_authenticated(request) self.throttle_check(request) query = request.GET.get("q", "") category = request.GET.get("category", "") since = request.GET.get("since", None) limit = int(request.GET.get("limit", settings.PANDA_DEFAULT_SEARCH_GROUPS)) offset = int(request.GET.get("offset", 0)) group_limit = int(request.GET.get("group_limit", settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP)) group_offset = int(request.GET.get("group_offset", 0)) export = bool(request.GET.get("export", False)) if category: if category != "uncategorized": category = Category.objects.get(slug=category) dataset_slugs = category.datasets.values_list("slug", flat=True) else: dataset_slugs = Dataset.objects.filter(categories=None).values_list("slug", flat=True) query += " dataset_slug:(%s)" % " ".join(dataset_slugs) if since: query = "last_modified:[" + since + "Z TO *] AND (%s)" % query # Because users may have authenticated via headers the request.user may # not be a full User instance. To be sure, we fetch one. user = UserProxy.objects.get(id=request.user.id) if export: task_type = ExportSearchTask task = TaskStatus.objects.create( task_name=task_type.name, task_description='Export search results for "%s".' % query, creator=user ) task_type.apply_async(args=[query, task.id], kwargs={}, task_id=task.id) else: response = solr.query_grouped( settings.SOLR_DATA_CORE, query, "dataset_slug", offset=offset, limit=limit, group_limit=group_limit, group_offset=group_offset, ) groups = response["grouped"]["dataset_slug"]["groups"] page = PandaPaginator( request.GET, groups, resource_uri=request.path_info, count=response["grouped"]["dataset_slug"]["ngroups"], ).page() datasets = [] for group in groups: dataset_slug = group["groupValue"] results = group["doclist"] try: dataset = Dataset.objects.get(slug=dataset_slug) # In the event that stale data exists in Solr, skip this dataset, # request the invalid data be purged and return the other results. # Pagination may be wrong, but this is the most functional solution. (#793) except Dataset.DoesNotExist: PurgeDataTask.apply_async(args=[dataset_slug]) solr.delete(settings.SOLR_DATASETS_CORE, "slug:%s" % dataset_slug) page["meta"]["total_count"] -= 1 continue dataset_resource = DatasetResource() dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request) dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle) dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle) objects = [SolrObject(obj) for obj in results["docs"]] dataset_search_url = reverse( "api_dataset_data_list", kwargs={ "api_name": self._meta.api_name, "dataset_resource_name": "dataset", "resource_name": "data", "dataset_slug": dataset.slug, }, ) data_page = PandaPaginator( {"limit": str(group_limit), "offset": str(group_offset), "q": query}, objects, resource_uri=dataset_search_url, count=results["numFound"], ).page() dataset_bundle.data.update(data_page) dataset_bundle.data["objects"] = [] for obj in objects: data_bundle = self.build_bundle(obj=obj, request=request) data_bundle = self.full_dehydrate(data_bundle) dataset_bundle.data["objects"].append(data_bundle) datasets.append(dataset_bundle.data) page["objects"] = datasets # Log query SearchLog.objects.create(user=user, dataset=None, query=query) self.log_throttled_access(request) if export: return self.create_response(request, "Export queued.") else: return self.create_response(request, page)
def search_all_data(self, request, **kwargs): """ List endpoint using Solr. Provides full-text search via the "q" parameter." """ self.method_check(request, allowed=['get']) self.is_authenticated(request) self.throttle_check(request) query = request.GET.get('q', '') category = request.GET.get('category', '') since = request.GET.get('since', None) limit = int( request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS)) offset = int(request.GET.get('offset', 0)) group_limit = int( request.GET.get('group_limit', settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP)) group_offset = int(request.GET.get('group_offset', 0)) export = bool(request.GET.get('export', False)) if category: if category != 'uncategorized': category = Category.objects.get(slug=category) dataset_slugs = category.datasets.values_list('slug', flat=True) else: dataset_slugs = Dataset.objects.filter( categories=None).values_list('slug', flat=True) query += ' dataset_slug:(%s)' % ' '.join(dataset_slugs) if since: query = 'last_modified:[' + since + 'Z TO *] AND (%s)' % query # Because users may have authenticated via headers the request.user may # not be a full User instance. To be sure, we fetch one. user = UserProxy.objects.get(id=request.user.id) if export: task_type = ExportSearchTask task = TaskStatus.objects.create( task_name=task_type.name, task_description='Export search results for "%s".' % query, creator=user) task_type.apply_async(args=[query, task.id], kwargs={}, task_id=task.id) else: response = solr.query_grouped(settings.SOLR_DATA_CORE, query, 'dataset_slug', offset=offset, limit=limit, group_limit=group_limit, group_offset=group_offset) groups = response['grouped']['dataset_slug']['groups'] page = PandaPaginator( request.GET, groups, resource_uri=request.path_info, count=response['grouped']['dataset_slug']['ngroups']).page() datasets = [] for group in groups: dataset_slug = group['groupValue'] results = group['doclist'] try: dataset = Dataset.objects.get(slug=dataset_slug) # In the event that stale data exists in Solr, skip this dataset, # request the invalid data be purged and return the other results. # Pagination may be wrong, but this is the most functional solution. (#793) except Dataset.DoesNotExist: PurgeDataTask.apply_async(args=[dataset_slug]) solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % dataset_slug) page['meta']['total_count'] -= 1 continue dataset_resource = DatasetResource() dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request) dataset_bundle = dataset_resource.full_dehydrate( dataset_bundle) dataset_bundle = dataset_resource.simplify_bundle( dataset_bundle) objects = [SolrObject(obj) for obj in results['docs']] dataset_search_url = reverse('api_dataset_data_list', kwargs={ 'api_name': self._meta.api_name, 'dataset_resource_name': 'dataset', 'resource_name': 'data', 'dataset_slug': dataset.slug }) data_page = PandaPaginator( { 'limit': str(group_limit), 'offset': str(group_offset), 'q': query }, objects, resource_uri=dataset_search_url, count=results['numFound']).page() dataset_bundle.data.update(data_page) dataset_bundle.data['objects'] = [] for obj in objects: data_bundle = self.build_bundle(obj=obj, request=request) data_bundle = self.full_dehydrate(data_bundle) dataset_bundle.data['objects'].append(data_bundle) datasets.append(dataset_bundle.data) page['objects'] = datasets # Log query SearchLog.objects.create(user=user, dataset=None, query=query) self.log_throttled_access(request) if export: return self.create_response(request, 'Export queued.') else: return self.create_response(request, page)
def search_all_data(self, request, **kwargs): """ List endpoint using Solr. Provides full-text search via the "q" parameter." """ self.method_check(request, allowed=['get']) self.is_authenticated(request) self.throttle_check(request) try: query = '(%s)' % request.GET['q'] except KeyError: query = '' category = request.GET.get('category', '') since = request.GET.get('since', None) limit = int(request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS)) offset = int(request.GET.get('offset', 0)) group_limit = int(request.GET.get('group_limit', settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP)) group_offset = int(request.GET.get('group_offset', 0)) export = bool(request.GET.get('export', False)) solr_query_bits = [query] if category: if category != 'uncategorized': category = Category.objects.get(slug=category) dataset_slugs = category.datasets.values_list('slug', flat=True) else: dataset_slugs = Dataset.objects.filter(categories=None).values_list('slug', flat=True) solr_query_bits.append('dataset_slug:(%s)' % ' '.join(dataset_slugs)) if since: solr_query_bits.append('last_modified:[' + since + 'Z TO *]') # Because users may have authenticated via headers the request.user may # not be a full User instance. To be sure, we fetch one. user = UserProxy.objects.get(id=request.user.id) if export: task_type = ExportSearchTask task = TaskStatus.objects.create( task_name=task_type.name, task_description=_('Export search results for "%s".') % query, creator=user ) task_type.apply_async( args=[query, task.id], kwargs={}, task_id=task.id ) else: response = solr.query_grouped( settings.SOLR_DATA_CORE, ' AND '.join(solr_query_bits), 'dataset_slug', offset=offset, limit=limit, group_limit=group_limit, group_offset=group_offset ) groups = response['grouped']['dataset_slug']['groups'] page = PandaPaginator( request.GET, groups, resource_uri=request.path_info, count=response['grouped']['dataset_slug']['ngroups'] ).page() datasets = [] for group in groups: dataset_slug = group['groupValue'] results = group['doclist'] try: dataset = Dataset.objects.get(slug=dataset_slug) # In the event that stale data exists in Solr, skip this dataset, # request the invalid data be purged and return the other results. # Pagination may be wrong, but this is the most functional solution. (#793) except Dataset.DoesNotExist: PurgeDataTask.apply_async(args=[dataset_slug]) solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % dataset_slug) page['meta']['total_count'] -= 1 continue dataset_resource = DatasetResource() dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request) dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle) dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle) objects = [SolrObject(obj) for obj in results['docs']] dataset_search_url = reverse('api_dataset_data_list', kwargs={ 'api_name': self._meta.api_name, 'dataset_resource_name': 'dataset', 'resource_name': 'data', 'dataset_slug': dataset.slug }) data_page = PandaPaginator( { 'limit': str(group_limit), 'offset': str(group_offset), 'q': query }, objects, resource_uri=dataset_search_url, count=results['numFound'] ).page() dataset_bundle.data.update(data_page) dataset_bundle.data['objects'] = [] for obj in objects: data_bundle = self.build_bundle(obj=obj, request=request) data_bundle = self.full_dehydrate(data_bundle) dataset_bundle.data['objects'].append(data_bundle) datasets.append(dataset_bundle.data) page['objects'] = datasets # Log query SearchLog.objects.create(user=user, dataset=None, query=query) self.log_throttled_access(request) if export: return self.create_response(request, _('Export queued.')) else: return self.create_response(request, page)