Beispiel #1
0
    def run(self, query, task_status_id, filename=None, *args, **kwargs):
        """
        Execute export.
        """
        from panda.models import Dataset, TaskStatus

        log = logging.getLogger(self.name)
        log.info('Beginning export, query: %s' % query)

        task_status = TaskStatus.objects.get(id=task_status_id)
        task_status.begin('Preparing to import')

        if not filename:
            filename = 'search_export_%s' % (now().isoformat())

        zip_name = '%s.zip' % filename

        path = os.path.join(settings.EXPORT_ROOT, filename)
        zip_path = os.path.join(settings.EXPORT_ROOT, zip_name)

        try:
            os.makedirs(os.path.realpath(path))
        except:
            pass

        zipfile = ZipFile(zip_path, 'w')

        response = solr.query_grouped(settings.SOLR_DATA_CORE,
                                      query,
                                      'dataset_slug',
                                      offset=0,
                                      limit=1000,
                                      group_limit=0,
                                      group_offset=0)
        groups = response['grouped']['dataset_slug']['groups']

        datasets = {}

        for group in groups:
            dataset_slug = group['groupValue']
            count = group['doclist']['numFound']

            datasets[dataset_slug] = count

        total_n = 0
        throttle = config_value('PERF', 'TASK_THROTTLE')

        for dataset_slug in datasets:
            try:
                dataset = Dataset.objects.get(slug=dataset_slug)
            except Dataset.DoesNotExist:
                log.warning(
                    'Skipping part of export due to Dataset being deleted, dataset_slug: %s'
                    % dataset_slug)

                continue

            filename = '%s.csv' % dataset_slug
            file_path = os.path.join(path, filename)

            f = open(file_path, 'w')
            writer = CSVKitWriter(f)

            # Header
            writer.writerow([c['name'] for c in dataset.column_schema])

            response = solr.query(settings.SOLR_DATA_CORE,
                                  query,
                                  offset=0,
                                  limit=0)

            # Update dataset and total counts for progress tracking
            datasets[dataset_slug] = response['response']['numFound']
            total_count = sum(datasets.values())

            n = 0

            while n < datasets[dataset_slug]:
                response = solr.query(settings.SOLR_DATA_CORE,
                                      'dataset_slug: %s %s' %
                                      (dataset_slug, query),
                                      offset=n,
                                      limit=SOLR_PAGE_SIZE)

                results = response['response']['docs']

                for row in results:
                    data = json.loads(row['data'])

                    writer.writerow(data)

                task_status.update(
                    '%.0f%% complete' %
                    floor(float(total_n) / float(total_count) * 100))

                if self.is_aborted():
                    task_status.abort(
                        'Aborted after exporting %.0f%%' %
                        floor(float(total_n) / float(total_count) * 100))

                    log.warning('Export aborted, query: %s' % query)

                    return

                n += SOLR_PAGE_SIZE
                total_n += response['response']['numFound']

                time.sleep(throttle)

            f.close()

            # Add to zip and nuke temp file
            zipfile.write(file_path, filename)
            os.remove(file_path)

        # Finish zip file and nuke temp directory
        zipfile.close()
        os.rmdir(path)

        task_status.update('100% complete')

        log.info('Finished export, query: %s' % query)

        return zip_name
Beispiel #2
0
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=["get"])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get("q", "")
        category = request.GET.get("category", "")
        since = request.GET.get("since", None)
        limit = int(request.GET.get("limit", settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get("offset", 0))
        group_limit = int(request.GET.get("group_limit", settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get("group_offset", 0))
        export = bool(request.GET.get("export", False))

        if category:
            if category != "uncategorized":
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list("slug", flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(categories=None).values_list("slug", flat=True)

            query += " dataset_slug:(%s)" % " ".join(dataset_slugs)

        if since:
            query = "last_modified:[" + since + "Z TO *] AND (%s)" % query

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        if export:
            task_type = ExportSearchTask

            task = TaskStatus.objects.create(
                task_name=task_type.name, task_description='Export search results for "%s".' % query, creator=user
            )

            task_type.apply_async(args=[query, task.id], kwargs={}, task_id=task.id)
        else:
            response = solr.query_grouped(
                settings.SOLR_DATA_CORE,
                query,
                "dataset_slug",
                offset=offset,
                limit=limit,
                group_limit=group_limit,
                group_offset=group_offset,
            )
            groups = response["grouped"]["dataset_slug"]["groups"]

            page = PandaPaginator(
                request.GET,
                groups,
                resource_uri=request.path_info,
                count=response["grouped"]["dataset_slug"]["ngroups"],
            ).page()

            datasets = []

            for group in groups:
                dataset_slug = group["groupValue"]
                results = group["doclist"]

                try:
                    dataset = Dataset.objects.get(slug=dataset_slug)
                # In the event that stale data exists in Solr, skip this dataset,
                # request the invalid data be purged and return the other results.
                # Pagination may be wrong, but this is the most functional solution. (#793)
                except Dataset.DoesNotExist:
                    PurgeDataTask.apply_async(args=[dataset_slug])
                    solr.delete(settings.SOLR_DATASETS_CORE, "slug:%s" % dataset_slug)

                    page["meta"]["total_count"] -= 1

                    continue

                dataset_resource = DatasetResource()
                dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
                dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
                dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

                objects = [SolrObject(obj) for obj in results["docs"]]

                dataset_search_url = reverse(
                    "api_dataset_data_list",
                    kwargs={
                        "api_name": self._meta.api_name,
                        "dataset_resource_name": "dataset",
                        "resource_name": "data",
                        "dataset_slug": dataset.slug,
                    },
                )

                data_page = PandaPaginator(
                    {"limit": str(group_limit), "offset": str(group_offset), "q": query},
                    objects,
                    resource_uri=dataset_search_url,
                    count=results["numFound"],
                ).page()

                dataset_bundle.data.update(data_page)
                dataset_bundle.data["objects"] = []

                for obj in objects:
                    data_bundle = self.build_bundle(obj=obj, request=request)
                    data_bundle = self.full_dehydrate(data_bundle)
                    dataset_bundle.data["objects"].append(data_bundle)

                datasets.append(dataset_bundle.data)

            page["objects"] = datasets

            # Log query
            SearchLog.objects.create(user=user, dataset=None, query=query)

        self.log_throttled_access(request)

        if export:
            return self.create_response(request, "Export queued.")
        else:
            return self.create_response(request, page)
Beispiel #3
0
    def run(self, query, task_status_id, filename=None, *args, **kwargs):
        """
        Execute export.
        """
        from panda.models import Dataset, TaskStatus

        log = logging.getLogger(self.name)
        log.info('Beginning export, query: %s' % query)

        task_status = TaskStatus.objects.get(id=task_status_id)
        task_status.begin('Preparing to import')

        if not filename:
            filename = 'search_export_%s' % (now().isoformat())

        zip_name = '%s.zip' % filename

        path = os.path.join(settings.EXPORT_ROOT, filename)
        zip_path = os.path.join(settings.EXPORT_ROOT, zip_name)

        try:
            os.makedirs(os.path.realpath(path))
        except:
            pass
        
        zipfile = ZipFile(zip_path, 'w')

        response = solr.query_grouped(
            settings.SOLR_DATA_CORE,
            query,
            'dataset_slug',
            offset=0,
            limit=1000,
            group_limit=0,
            group_offset=0
        )
        groups = response['grouped']['dataset_slug']['groups']

        datasets = {}

        for group in groups:
            dataset_slug = group['groupValue']
            count = group['doclist']['numFound']

            datasets[dataset_slug] = count

        total_n = 0
        throttle = config_value('PERF', 'TASK_THROTTLE')

        for dataset_slug in datasets:
            try:
                dataset = Dataset.objects.get(slug=dataset_slug)
            except Dataset.DoesNotExist:
                log.warning('Skipping part of export due to Dataset being deleted, dataset_slug: %s' % dataset_slug)

                continue

            filename = '%s.csv' % dataset_slug
            file_path = os.path.join(path, filename)

            f = open(file_path, 'w')
            writer = CSVKitWriter(f)
            
            # Header
            writer.writerow([c['name'] for c in dataset.column_schema])
                
            response = solr.query(
                settings.SOLR_DATA_CORE,
                query,
                offset=0,
                limit=0
            )

            # Update dataset and total counts for progress tracking
            datasets[dataset_slug] = response['response']['numFound']
            total_count = sum(datasets.values())

            n = 0

            while n < datasets[dataset_slug]:
                response = solr.query(
                    settings.SOLR_DATA_CORE,
                    'dataset_slug: %s %s' % (dataset_slug, query),
                    offset=n,
                    limit=SOLR_PAGE_SIZE
                )

                results = response['response']['docs']

                for row in results:
                    data = json.loads(row['data'])

                    writer.writerow(data)

                task_status.update('%.0f%% complete' % floor(float(total_n) / float(total_count) * 100))

                if self.is_aborted():
                    task_status.abort('Aborted after exporting %.0f%%' % floor(float(total_n) / float(total_count) * 100))

                    log.warning('Export aborted, query: %s' % query)

                    return

                n += SOLR_PAGE_SIZE
                total_n += response['response']['numFound'] 
                
                time.sleep(throttle)

            f.close()

            # Add to zip and nuke temp file
            zipfile.write(file_path, filename)
            os.remove(file_path)

        # Finish zip file and nuke temp directory
        zipfile.close()
        os.rmdir(path)

        task_status.update('100% complete')

        log.info('Finished export, query: %s' % query)

        return zip_name
Beispiel #4
0
Datei: data.py Projekt: eob/panda
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get('q', '')
        limit = int(request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get('offset', 0))
        group_limit = int(request.GET.get('group_limit', settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get('group_offset', 0))

        response = solr.query_grouped(
            settings.SOLR_DATA_CORE,
            query,
            'dataset_slug',
            offset=offset,
            limit=limit,
            group_limit=group_limit,
            group_offset=group_offset
        )
        groups = response['grouped']['dataset_slug']['groups']

        page = PandaPaginator(
            request.GET,
            groups,
            resource_uri=request.path_info,
            count=response['grouped']['dataset_slug']['ngroups']
        ).page()

        datasets = []

        for group in groups:
            dataset_slug = group['groupValue']
            results = group['doclist']

            dataset_resource = DatasetResource()
            dataset = Dataset.objects.get(slug=dataset_slug)
            dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
            dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
            dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

            objects = [SolrObject(obj) for obj in results['docs']]
            
            dataset_search_url = reverse('api_dataset_data_list', kwargs={ 'api_name': self._meta.api_name, 'dataset_resource_name': 'dataset', 'resource_name': 'data', 'dataset_slug': dataset.slug })

            data_page = PandaPaginator(
                { 'limit': str(group_limit), 'offset': str(group_offset), 'q': query },
                objects,
                resource_uri=dataset_search_url,
                count=results['numFound']
            ).page()

            dataset_bundle.data.update(data_page)
            dataset_bundle.data['objects'] = []

            for obj in objects:
                data_bundle = self.build_bundle(obj=obj, request=request)
                data_bundle = self.full_dehydrate(data_bundle)
                dataset_bundle.data['objects'].append(data_bundle)

            datasets.append(dataset_bundle.data)

        page['objects'] = datasets

        self.log_throttled_access(request)

        return self.create_response(request, page)
Beispiel #5
0
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get('q', '')
        category = request.GET.get('category', '')
        since = request.GET.get('since', None)
        limit = int(
            request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get('offset', 0))
        group_limit = int(
            request.GET.get('group_limit',
                            settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get('group_offset', 0))
        export = bool(request.GET.get('export', False))

        if category:
            if category != 'uncategorized':
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list('slug',
                                                              flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(
                    categories=None).values_list('slug', flat=True)

            query += ' dataset_slug:(%s)' % ' '.join(dataset_slugs)

        if since:
            query = 'last_modified:[' + since + 'Z TO *] AND (%s)' % query

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        if export:
            task_type = ExportSearchTask

            task = TaskStatus.objects.create(
                task_name=task_type.name,
                task_description='Export search results for "%s".' % query,
                creator=user)

            task_type.apply_async(args=[query, task.id],
                                  kwargs={},
                                  task_id=task.id)
        else:
            response = solr.query_grouped(settings.SOLR_DATA_CORE,
                                          query,
                                          'dataset_slug',
                                          offset=offset,
                                          limit=limit,
                                          group_limit=group_limit,
                                          group_offset=group_offset)
            groups = response['grouped']['dataset_slug']['groups']

            page = PandaPaginator(
                request.GET,
                groups,
                resource_uri=request.path_info,
                count=response['grouped']['dataset_slug']['ngroups']).page()

            datasets = []

            for group in groups:
                dataset_slug = group['groupValue']
                results = group['doclist']

                try:
                    dataset = Dataset.objects.get(slug=dataset_slug)
                # In the event that stale data exists in Solr, skip this dataset,
                # request the invalid data be purged and return the other results.
                # Pagination may be wrong, but this is the most functional solution. (#793)
                except Dataset.DoesNotExist:
                    PurgeDataTask.apply_async(args=[dataset_slug])
                    solr.delete(settings.SOLR_DATASETS_CORE,
                                'slug:%s' % dataset_slug)

                    page['meta']['total_count'] -= 1

                    continue

                dataset_resource = DatasetResource()
                dataset_bundle = dataset_resource.build_bundle(obj=dataset,
                                                               request=request)
                dataset_bundle = dataset_resource.full_dehydrate(
                    dataset_bundle)
                dataset_bundle = dataset_resource.simplify_bundle(
                    dataset_bundle)

                objects = [SolrObject(obj) for obj in results['docs']]

                dataset_search_url = reverse('api_dataset_data_list',
                                             kwargs={
                                                 'api_name':
                                                 self._meta.api_name,
                                                 'dataset_resource_name':
                                                 'dataset',
                                                 'resource_name': 'data',
                                                 'dataset_slug': dataset.slug
                                             })

                data_page = PandaPaginator(
                    {
                        'limit': str(group_limit),
                        'offset': str(group_offset),
                        'q': query
                    },
                    objects,
                    resource_uri=dataset_search_url,
                    count=results['numFound']).page()

                dataset_bundle.data.update(data_page)
                dataset_bundle.data['objects'] = []

                for obj in objects:
                    data_bundle = self.build_bundle(obj=obj, request=request)
                    data_bundle = self.full_dehydrate(data_bundle)
                    dataset_bundle.data['objects'].append(data_bundle)

                datasets.append(dataset_bundle.data)

            page['objects'] = datasets

            # Log query
            SearchLog.objects.create(user=user, dataset=None, query=query)

        self.log_throttled_access(request)

        if export:
            return self.create_response(request, 'Export queued.')
        else:
            return self.create_response(request, page)
Beispiel #6
0
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=["get"])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get("q", "")
        limit = int(request.GET.get("limit", settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get("offset", 0))
        group_limit = int(request.GET.get("group_limit", settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get("group_offset", 0))

        response = solr.query_grouped(
            settings.SOLR_DATA_CORE,
            query,
            "dataset_slug",
            offset=offset,
            limit=limit,
            group_limit=group_limit,
            group_offset=group_offset,
        )
        groups = response["grouped"]["dataset_slug"]["groups"]

        page = PandaPaginator(
            request.GET, groups, resource_uri=request.path_info, count=response["grouped"]["dataset_slug"]["ngroups"]
        ).page()

        datasets = []

        for group in groups:
            dataset_slug = group["groupValue"]
            results = group["doclist"]

            dataset_resource = DatasetResource()
            dataset = Dataset.objects.get(slug=dataset_slug)
            dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
            dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
            dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

            objects = [SolrObject(obj) for obj in results["docs"]]

            dataset_search_url = reverse(
                "api_dataset_data_list",
                kwargs={
                    "api_name": self._meta.api_name,
                    "dataset_resource_name": "dataset",
                    "resource_name": "data",
                    "dataset_slug": dataset.slug,
                },
            )

            data_page = PandaPaginator(
                {"limit": str(group_limit), "offset": str(group_offset), "q": query},
                objects,
                resource_uri=dataset_search_url,
                count=results["numFound"],
            ).page()

            dataset_bundle.data.update(data_page)
            dataset_bundle.data["objects"] = []

            for obj in objects:
                data_bundle = self.build_bundle(obj=obj, request=request)
                data_bundle = self.full_dehydrate(data_bundle)
                dataset_bundle.data["objects"].append(data_bundle)

            datasets.append(dataset_bundle.data)

        page["objects"] = datasets

        self.log_throttled_access(request)

        return self.create_response(request, page)
Beispiel #7
0
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get('q', '')
        category = request.GET.get('category', '')
        since = request.GET.get('since', None)
        limit = int(request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get('offset', 0))
        group_limit = int(request.GET.get('group_limit', settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get('group_offset', 0))

        if category:
            if category != 'uncategorized':
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list('slug', flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(categories=None).values_list('slug', flat=True) 

            query += ' dataset_slug:(%s)' % ' '.join(dataset_slugs)

        if since:
            query = 'last_modified:[' + since + 'Z TO *] AND (%s)' % query

        response = solr.query_grouped(
            settings.SOLR_DATA_CORE,
            query,
            'dataset_slug',
            offset=offset,
            limit=limit,
            group_limit=group_limit,
            group_offset=group_offset
        )
        groups = response['grouped']['dataset_slug']['groups']

        page = PandaPaginator(
            request.GET,
            groups,
            resource_uri=request.path_info,
            count=response['grouped']['dataset_slug']['ngroups']
        ).page()

        datasets = []

        for group in groups:
            dataset_slug = group['groupValue']
            results = group['doclist']

            dataset_resource = DatasetResource()
            dataset = Dataset.objects.get(slug=dataset_slug)
            dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
            dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
            dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

            objects = [SolrObject(obj) for obj in results['docs']]
            
            dataset_search_url = reverse('api_dataset_data_list', kwargs={ 'api_name': self._meta.api_name, 'dataset_resource_name': 'dataset', 'resource_name': 'data', 'dataset_slug': dataset.slug })

            data_page = PandaPaginator(
                { 'limit': str(group_limit), 'offset': str(group_offset), 'q': query },
                objects,
                resource_uri=dataset_search_url,
                count=results['numFound']
            ).page()

            dataset_bundle.data.update(data_page)
            dataset_bundle.data['objects'] = []

            for obj in objects:
                data_bundle = self.build_bundle(obj=obj, request=request)
                data_bundle = self.full_dehydrate(data_bundle)
                dataset_bundle.data['objects'].append(data_bundle)

            datasets.append(dataset_bundle.data)

        page['objects'] = datasets
        
        # Log query
        SearchLog.objects.create(user=request.user, dataset=None, query=query)

        self.log_throttled_access(request)

        return self.create_response(request, page)
Beispiel #8
0
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        try:
            query = '(%s)' % request.GET['q']
        except KeyError:
            query = ''

        category = request.GET.get('category', '')
        since = request.GET.get('since', None)
        limit = int(request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get('offset', 0))
        group_limit = int(request.GET.get('group_limit', settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get('group_offset', 0))
        export = bool(request.GET.get('export', False))

        solr_query_bits = [query]

        if category:
            if category != 'uncategorized':
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list('slug', flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(categories=None).values_list('slug', flat=True) 

            solr_query_bits.append('dataset_slug:(%s)' % ' '.join(dataset_slugs))

        if since:
            solr_query_bits.append('last_modified:[' + since + 'Z TO *]')

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        if export:
            task_type = ExportSearchTask

            task = TaskStatus.objects.create(
                task_name=task_type.name,
                task_description=_('Export search results for "%s".') % query,
                creator=user
            )

            task_type.apply_async(
                args=[query, task.id],
                kwargs={},
                task_id=task.id
            )
        else:
            response = solr.query_grouped(
                settings.SOLR_DATA_CORE,
                ' AND '.join(solr_query_bits),
                'dataset_slug',
                offset=offset,
                limit=limit,
                group_limit=group_limit,
                group_offset=group_offset
            )
            groups = response['grouped']['dataset_slug']['groups']

            page = PandaPaginator(
                request.GET,
                groups,
                resource_uri=request.path_info,
                count=response['grouped']['dataset_slug']['ngroups']
            ).page()

            datasets = []

            for group in groups:
                dataset_slug = group['groupValue']
                results = group['doclist']
                
                try:
                    dataset = Dataset.objects.get(slug=dataset_slug)
                # In the event that stale data exists in Solr, skip this dataset,
                # request the invalid data be purged and return the other results.
                # Pagination may be wrong, but this is the most functional solution. (#793)
                except Dataset.DoesNotExist:
                    PurgeDataTask.apply_async(args=[dataset_slug])
                    solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % dataset_slug)

                    page['meta']['total_count'] -= 1

                    continue
                
                dataset_resource = DatasetResource()
                dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
                dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
                dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

                objects = [SolrObject(obj) for obj in results['docs']]
                
                dataset_search_url = reverse('api_dataset_data_list', kwargs={ 'api_name': self._meta.api_name, 'dataset_resource_name': 'dataset', 'resource_name': 'data', 'dataset_slug': dataset.slug })

                data_page = PandaPaginator(
                    { 'limit': str(group_limit), 'offset': str(group_offset), 'q': query },
                    objects,
                    resource_uri=dataset_search_url,
                    count=results['numFound']
                ).page()

                dataset_bundle.data.update(data_page)
                dataset_bundle.data['objects'] = []

                for obj in objects:
                    data_bundle = self.build_bundle(obj=obj, request=request)
                    data_bundle = self.full_dehydrate(data_bundle)
                    dataset_bundle.data['objects'].append(data_bundle)

                datasets.append(dataset_bundle.data)

            page['objects'] = datasets
            
            # Log query
            SearchLog.objects.create(user=user, dataset=None, query=query)

        self.log_throttled_access(request)

        if export:
            return self.create_response(request, _('Export queued.'))
        else:
            return self.create_response(request, page)