Ejemplo n.º 1
0
    def get_resource_uri(self, bundle_or_obj):
        """
        Build a canonical uri for a datum.

        If the resource doesn't have an external_id it is
        considered "unaddressable" and this will return None.
        """
        dr = DatasetResource()

        kwargs = {
            'api_name': self._meta.api_name,
            'dataset_resource_name': dr._meta.resource_name,
            'resource_name': self._meta.resource_name,
        }

        if isinstance(bundle_or_obj, Bundle):
            kwargs['dataset_slug'] = bundle_or_obj.obj.dataset_slug
            kwargs['external_id'] = bundle_or_obj.obj.external_id
        else:
            kwargs['dataset_slug'] = bundle_or_obj.dataset_slug
            kwargs['external_id'] = bundle_or_obj.external_id

        if not kwargs['external_id']:
            return None

        return dr._build_reverse_url('api_dataset_data_detail', kwargs=kwargs)
Ejemplo n.º 2
0
    def get_resource_uri(self, bundle_or_obj):
        """
        Build a canonical uri for a datum.

        If the resource doesn't have an external_id it is
        considered "unaddressable" and this will return None.
        """
        dr = DatasetResource()

        kwargs = {
            "api_name": self._meta.api_name,
            "dataset_resource_name": dr._meta.resource_name,
            "resource_name": self._meta.resource_name,
        }

        if isinstance(bundle_or_obj, Bundle):
            kwargs["dataset_slug"] = bundle_or_obj.obj.dataset_slug
            kwargs["external_id"] = bundle_or_obj.obj.external_id
        else:
            kwargs["dataset_slug"] = bundle_or_obj.dataset_slug
            kwargs["external_id"] = bundle_or_obj.external_id

        if not kwargs["external_id"]:
            return None

        return dr._build_reverse_url("api_dataset_data_detail", kwargs=kwargs)
Ejemplo n.º 3
0
    def search_dataset_data(self, request, **kwargs):
        """
        Perform a full-text search on only one dataset.

        See ``get_list``.
        """
        dataset = Dataset.objects.get(slug=kwargs['dataset_slug'])

        query = request.GET.get('q', '')
        since = request.GET.get('since', None)
        limit = int(
            request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_ROWS))
        offset = int(request.GET.get('offset', 0))
        sort = request.GET.get('sort', '_docid_ asc')

        if query:
            solr_query = 'dataset_slug:%s AND (%s)' % (dataset.slug, query)
        else:
            solr_query = 'dataset_slug:%s' % dataset.slug

        if since:
            solr_query += ' AND last_modified:[' + since + 'Z TO *]'

        response = solr.query(settings.SOLR_DATA_CORE,
                              solr_query,
                              offset=offset,
                              sort=sort,
                              limit=limit)

        dataset_resource = DatasetResource()
        dataset_bundle = dataset_resource.build_bundle(obj=dataset,
                                                       request=request)
        dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
        dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

        results = [SolrObject(d) for d in response['response']['docs']]

        page = PandaPaginator(request.GET,
                              results,
                              resource_uri=request.path_info,
                              count=response['response']['numFound']).page()

        dataset_bundle.data.update(page)
        dataset_bundle.data['objects'] = []

        for obj in results:
            bundle = self.build_bundle(obj=obj, request=request)
            bundle = self.full_dehydrate(bundle)
            dataset_bundle.data['objects'].append(bundle.data)

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        SearchLog.objects.create(user=user, dataset=dataset, query=query)

        return dataset_bundle
Ejemplo n.º 4
0
    def search_dataset_data(self, request, **kwargs):
        """
        Perform a full-text search on only one dataset.

        See ``get_list``.
        """
        dataset = Dataset.objects.get(slug=kwargs['dataset_slug'])

        query = request.GET.get('q', '')
        since = request.GET.get('since', None)
        limit = int(request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_ROWS))
        offset = int(request.GET.get('offset', 0))

        if query:
            solr_query = 'dataset_slug:%s AND (%s)' % (dataset.slug, query)
        else:
            solr_query = 'dataset_slug:%s' % dataset.slug

        if since:
            solr_query += ' AND last_modified:[' + since + 'Z TO *]'

        response = solr.query(
            settings.SOLR_DATA_CORE,
            solr_query,
            offset=offset,
            limit=limit
        )

        dataset_resource = DatasetResource()
        dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
        dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
        dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)
       
        results = [SolrObject(d) for d in response['response']['docs']]

        page = PandaPaginator(
            request.GET,
            results,
            resource_uri=request.path_info,
            count=response['response']['numFound']
        ).page() 
        
        dataset_bundle.data.update(page)
        dataset_bundle.data['objects'] = []

        for obj in results:
            bundle = self.build_bundle(obj=obj, request=request)
            bundle = self.full_dehydrate(bundle)
            dataset_bundle.data['objects'].append(bundle.data)
        
        SearchLog.objects.create(user=request.user, dataset=dataset, query=query)

        return dataset_bundle
Ejemplo n.º 5
0
    def search_dataset_data(self, request, **kwargs):
        """
        Perform a full-text search on only one dataset.

        See ``get_list``.
        """
        dataset = Dataset.objects.get(slug=kwargs["dataset_slug"])

        query = request.GET.get("q", "")
        since = request.GET.get("since", None)
        limit = int(request.GET.get("limit", settings.PANDA_DEFAULT_SEARCH_ROWS))
        offset = int(request.GET.get("offset", 0))
        sort = request.GET.get("sort", "_docid_ asc")

        if query:
            solr_query = "dataset_slug:%s AND (%s)" % (dataset.slug, query)
        else:
            solr_query = "dataset_slug:%s" % dataset.slug

        if since:
            solr_query += " AND last_modified:[" + since + "Z TO *]"

        response = solr.query(settings.SOLR_DATA_CORE, solr_query, offset=offset, sort=sort, limit=limit)

        dataset_resource = DatasetResource()
        dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
        dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
        dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

        results = [SolrObject(d) for d in response["response"]["docs"]]

        page = PandaPaginator(
            request.GET, results, resource_uri=request.path_info, count=response["response"]["numFound"]
        ).page()

        dataset_bundle.data.update(page)
        dataset_bundle.data["objects"] = []

        for obj in results:
            bundle = self.build_bundle(obj=obj, request=request)
            bundle = self.full_dehydrate(bundle)
            dataset_bundle.data["objects"].append(bundle.data)

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        SearchLog.objects.create(user=user, dataset=dataset, query=query)

        return dataset_bundle
Ejemplo n.º 6
0
    def dehydrate(self, bundle):
        """
        Trim the dataset_slug field and add a proper relationship.
        """
        dataset = Dataset.objects.get(slug=bundle.data['dataset_slug'])

        del bundle.data['dataset_slug']
        bundle.data['dataset'] = DatasetResource().get_resource_uri(dataset)

        return bundle
Ejemplo n.º 7
0
Archivo: data.py Proyecto: eads/panda
    def search_dataset_data(self, request, **kwargs):
        """
        Perform a full-text search on only one dataset.

        See ``get_list``.
        """
        dataset = Dataset.objects.get(slug=kwargs["dataset_slug"])

        query = request.GET.get("q", None)
        limit = int(request.GET.get("limit", settings.PANDA_DEFAULT_SEARCH_ROWS))
        offset = int(request.GET.get("offset", 0))

        if query:
            solr_query = "dataset_slug:%s AND %s" % (dataset.slug, query)
        else:
            solr_query = "dataset_slug:%s" % dataset.slug

        response = solr.query(settings.SOLR_DATA_CORE, solr_query, offset=offset, limit=limit)

        dataset_resource = DatasetResource()
        dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
        dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
        dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

        results = [SolrObject(d) for d in response["response"]["docs"]]

        page = PandaPaginator(
            request.GET, results, resource_uri=request.path_info, count=response["response"]["numFound"]
        ).page()

        dataset_bundle.data.update(page)
        dataset_bundle.data["objects"] = []

        for obj in results:
            bundle = self.build_bundle(obj=obj, request=request)
            bundle = self.full_dehydrate(bundle)
            dataset_bundle.data["objects"].append(bundle.data)

        return dataset_bundle
Ejemplo n.º 8
0
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=["get"])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get("q", "")
        category = request.GET.get("category", "")
        since = request.GET.get("since", None)
        limit = int(request.GET.get("limit", settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get("offset", 0))
        group_limit = int(request.GET.get("group_limit", settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get("group_offset", 0))
        export = bool(request.GET.get("export", False))

        if category:
            if category != "uncategorized":
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list("slug", flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(categories=None).values_list("slug", flat=True)

            query += " dataset_slug:(%s)" % " ".join(dataset_slugs)

        if since:
            query = "last_modified:[" + since + "Z TO *] AND (%s)" % query

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        if export:
            task_type = ExportSearchTask

            task = TaskStatus.objects.create(
                task_name=task_type.name, task_description='Export search results for "%s".' % query, creator=user
            )

            task_type.apply_async(args=[query, task.id], kwargs={}, task_id=task.id)
        else:
            response = solr.query_grouped(
                settings.SOLR_DATA_CORE,
                query,
                "dataset_slug",
                offset=offset,
                limit=limit,
                group_limit=group_limit,
                group_offset=group_offset,
            )
            groups = response["grouped"]["dataset_slug"]["groups"]

            page = PandaPaginator(
                request.GET,
                groups,
                resource_uri=request.path_info,
                count=response["grouped"]["dataset_slug"]["ngroups"],
            ).page()

            datasets = []

            for group in groups:
                dataset_slug = group["groupValue"]
                results = group["doclist"]

                try:
                    dataset = Dataset.objects.get(slug=dataset_slug)
                # In the event that stale data exists in Solr, skip this dataset,
                # request the invalid data be purged and return the other results.
                # Pagination may be wrong, but this is the most functional solution. (#793)
                except Dataset.DoesNotExist:
                    PurgeDataTask.apply_async(args=[dataset_slug])
                    solr.delete(settings.SOLR_DATASETS_CORE, "slug:%s" % dataset_slug)

                    page["meta"]["total_count"] -= 1

                    continue

                dataset_resource = DatasetResource()
                dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
                dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
                dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

                objects = [SolrObject(obj) for obj in results["docs"]]

                dataset_search_url = reverse(
                    "api_dataset_data_list",
                    kwargs={
                        "api_name": self._meta.api_name,
                        "dataset_resource_name": "dataset",
                        "resource_name": "data",
                        "dataset_slug": dataset.slug,
                    },
                )

                data_page = PandaPaginator(
                    {"limit": str(group_limit), "offset": str(group_offset), "q": query},
                    objects,
                    resource_uri=dataset_search_url,
                    count=results["numFound"],
                ).page()

                dataset_bundle.data.update(data_page)
                dataset_bundle.data["objects"] = []

                for obj in objects:
                    data_bundle = self.build_bundle(obj=obj, request=request)
                    data_bundle = self.full_dehydrate(data_bundle)
                    dataset_bundle.data["objects"].append(data_bundle)

                datasets.append(dataset_bundle.data)

            page["objects"] = datasets

            # Log query
            SearchLog.objects.create(user=user, dataset=None, query=query)

        self.log_throttled_access(request)

        if export:
            return self.create_response(request, "Export queued.")
        else:
            return self.create_response(request, page)
Ejemplo n.º 9
0
Archivo: data.py Proyecto: eob/panda
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get('q', '')
        limit = int(request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get('offset', 0))
        group_limit = int(request.GET.get('group_limit', settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get('group_offset', 0))

        response = solr.query_grouped(
            settings.SOLR_DATA_CORE,
            query,
            'dataset_slug',
            offset=offset,
            limit=limit,
            group_limit=group_limit,
            group_offset=group_offset
        )
        groups = response['grouped']['dataset_slug']['groups']

        page = PandaPaginator(
            request.GET,
            groups,
            resource_uri=request.path_info,
            count=response['grouped']['dataset_slug']['ngroups']
        ).page()

        datasets = []

        for group in groups:
            dataset_slug = group['groupValue']
            results = group['doclist']

            dataset_resource = DatasetResource()
            dataset = Dataset.objects.get(slug=dataset_slug)
            dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
            dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
            dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

            objects = [SolrObject(obj) for obj in results['docs']]
            
            dataset_search_url = reverse('api_dataset_data_list', kwargs={ 'api_name': self._meta.api_name, 'dataset_resource_name': 'dataset', 'resource_name': 'data', 'dataset_slug': dataset.slug })

            data_page = PandaPaginator(
                { 'limit': str(group_limit), 'offset': str(group_offset), 'q': query },
                objects,
                resource_uri=dataset_search_url,
                count=results['numFound']
            ).page()

            dataset_bundle.data.update(data_page)
            dataset_bundle.data['objects'] = []

            for obj in objects:
                data_bundle = self.build_bundle(obj=obj, request=request)
                data_bundle = self.full_dehydrate(data_bundle)
                dataset_bundle.data['objects'].append(data_bundle)

            datasets.append(dataset_bundle.data)

        page['objects'] = datasets

        self.log_throttled_access(request)

        return self.create_response(request, page)
Ejemplo n.º 10
0
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get('q', '')
        category = request.GET.get('category', '')
        since = request.GET.get('since', None)
        limit = int(
            request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get('offset', 0))
        group_limit = int(
            request.GET.get('group_limit',
                            settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get('group_offset', 0))
        export = bool(request.GET.get('export', False))

        if category:
            if category != 'uncategorized':
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list('slug',
                                                              flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(
                    categories=None).values_list('slug', flat=True)

            query += ' dataset_slug:(%s)' % ' '.join(dataset_slugs)

        if since:
            query = 'last_modified:[' + since + 'Z TO *] AND (%s)' % query

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        if export:
            task_type = ExportSearchTask

            task = TaskStatus.objects.create(
                task_name=task_type.name,
                task_description='Export search results for "%s".' % query,
                creator=user)

            task_type.apply_async(args=[query, task.id],
                                  kwargs={},
                                  task_id=task.id)
        else:
            response = solr.query_grouped(settings.SOLR_DATA_CORE,
                                          query,
                                          'dataset_slug',
                                          offset=offset,
                                          limit=limit,
                                          group_limit=group_limit,
                                          group_offset=group_offset)
            groups = response['grouped']['dataset_slug']['groups']

            page = PandaPaginator(
                request.GET,
                groups,
                resource_uri=request.path_info,
                count=response['grouped']['dataset_slug']['ngroups']).page()

            datasets = []

            for group in groups:
                dataset_slug = group['groupValue']
                results = group['doclist']

                try:
                    dataset = Dataset.objects.get(slug=dataset_slug)
                # In the event that stale data exists in Solr, skip this dataset,
                # request the invalid data be purged and return the other results.
                # Pagination may be wrong, but this is the most functional solution. (#793)
                except Dataset.DoesNotExist:
                    PurgeDataTask.apply_async(args=[dataset_slug])
                    solr.delete(settings.SOLR_DATASETS_CORE,
                                'slug:%s' % dataset_slug)

                    page['meta']['total_count'] -= 1

                    continue

                dataset_resource = DatasetResource()
                dataset_bundle = dataset_resource.build_bundle(obj=dataset,
                                                               request=request)
                dataset_bundle = dataset_resource.full_dehydrate(
                    dataset_bundle)
                dataset_bundle = dataset_resource.simplify_bundle(
                    dataset_bundle)

                objects = [SolrObject(obj) for obj in results['docs']]

                dataset_search_url = reverse('api_dataset_data_list',
                                             kwargs={
                                                 'api_name':
                                                 self._meta.api_name,
                                                 'dataset_resource_name':
                                                 'dataset',
                                                 'resource_name': 'data',
                                                 'dataset_slug': dataset.slug
                                             })

                data_page = PandaPaginator(
                    {
                        'limit': str(group_limit),
                        'offset': str(group_offset),
                        'q': query
                    },
                    objects,
                    resource_uri=dataset_search_url,
                    count=results['numFound']).page()

                dataset_bundle.data.update(data_page)
                dataset_bundle.data['objects'] = []

                for obj in objects:
                    data_bundle = self.build_bundle(obj=obj, request=request)
                    data_bundle = self.full_dehydrate(data_bundle)
                    dataset_bundle.data['objects'].append(data_bundle)

                datasets.append(dataset_bundle.data)

            page['objects'] = datasets

            # Log query
            SearchLog.objects.create(user=user, dataset=None, query=query)

        self.log_throttled_access(request)

        if export:
            return self.create_response(request, 'Export queued.')
        else:
            return self.create_response(request, page)
Ejemplo n.º 11
0
Archivo: data.py Proyecto: eads/panda
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=["get"])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get("q", "")
        limit = int(request.GET.get("limit", settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get("offset", 0))
        group_limit = int(request.GET.get("group_limit", settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get("group_offset", 0))

        response = solr.query_grouped(
            settings.SOLR_DATA_CORE,
            query,
            "dataset_slug",
            offset=offset,
            limit=limit,
            group_limit=group_limit,
            group_offset=group_offset,
        )
        groups = response["grouped"]["dataset_slug"]["groups"]

        page = PandaPaginator(
            request.GET, groups, resource_uri=request.path_info, count=response["grouped"]["dataset_slug"]["ngroups"]
        ).page()

        datasets = []

        for group in groups:
            dataset_slug = group["groupValue"]
            results = group["doclist"]

            dataset_resource = DatasetResource()
            dataset = Dataset.objects.get(slug=dataset_slug)
            dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
            dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
            dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

            objects = [SolrObject(obj) for obj in results["docs"]]

            dataset_search_url = reverse(
                "api_dataset_data_list",
                kwargs={
                    "api_name": self._meta.api_name,
                    "dataset_resource_name": "dataset",
                    "resource_name": "data",
                    "dataset_slug": dataset.slug,
                },
            )

            data_page = PandaPaginator(
                {"limit": str(group_limit), "offset": str(group_offset), "q": query},
                objects,
                resource_uri=dataset_search_url,
                count=results["numFound"],
            ).page()

            dataset_bundle.data.update(data_page)
            dataset_bundle.data["objects"] = []

            for obj in objects:
                data_bundle = self.build_bundle(obj=obj, request=request)
                data_bundle = self.full_dehydrate(data_bundle)
                dataset_bundle.data["objects"].append(data_bundle)

            datasets.append(dataset_bundle.data)

        page["objects"] = datasets

        self.log_throttled_access(request)

        return self.create_response(request, page)
Ejemplo n.º 12
0
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get('q', '')
        category = request.GET.get('category', '')
        since = request.GET.get('since', None)
        limit = int(request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get('offset', 0))
        group_limit = int(request.GET.get('group_limit', settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get('group_offset', 0))

        if category:
            if category != 'uncategorized':
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list('slug', flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(categories=None).values_list('slug', flat=True) 

            query += ' dataset_slug:(%s)' % ' '.join(dataset_slugs)

        if since:
            query = 'last_modified:[' + since + 'Z TO *] AND (%s)' % query

        response = solr.query_grouped(
            settings.SOLR_DATA_CORE,
            query,
            'dataset_slug',
            offset=offset,
            limit=limit,
            group_limit=group_limit,
            group_offset=group_offset
        )
        groups = response['grouped']['dataset_slug']['groups']

        page = PandaPaginator(
            request.GET,
            groups,
            resource_uri=request.path_info,
            count=response['grouped']['dataset_slug']['ngroups']
        ).page()

        datasets = []

        for group in groups:
            dataset_slug = group['groupValue']
            results = group['doclist']

            dataset_resource = DatasetResource()
            dataset = Dataset.objects.get(slug=dataset_slug)
            dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
            dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
            dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

            objects = [SolrObject(obj) for obj in results['docs']]
            
            dataset_search_url = reverse('api_dataset_data_list', kwargs={ 'api_name': self._meta.api_name, 'dataset_resource_name': 'dataset', 'resource_name': 'data', 'dataset_slug': dataset.slug })

            data_page = PandaPaginator(
                { 'limit': str(group_limit), 'offset': str(group_offset), 'q': query },
                objects,
                resource_uri=dataset_search_url,
                count=results['numFound']
            ).page()

            dataset_bundle.data.update(data_page)
            dataset_bundle.data['objects'] = []

            for obj in objects:
                data_bundle = self.build_bundle(obj=obj, request=request)
                data_bundle = self.full_dehydrate(data_bundle)
                dataset_bundle.data['objects'].append(data_bundle)

            datasets.append(dataset_bundle.data)

        page['objects'] = datasets
        
        # Log query
        SearchLog.objects.create(user=request.user, dataset=None, query=query)

        self.log_throttled_access(request)

        return self.create_response(request, page)
Ejemplo n.º 13
0
Archivo: data.py Proyecto: Rawadx/panda
    def search_dataset_data(self, request, **kwargs):
        """
        Perform a full-text search on only one dataset.

        See ``get_list``.
        """
        dataset = Dataset.objects.get(slug=kwargs['dataset_slug'])

        try:
            query = '(%s)' % request.GET['q']
        except KeyError:
            query = ''

        since = request.GET.get('since', None)
        limit = int(request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_ROWS))
        offset = int(request.GET.get('offset', 0))
        sort = request.GET.get('sort', '_docid_ asc')

        solr_query_bits = [query]
        solr_query_bits.append('dataset_slug:%s' % dataset.slug)

        if since:
            solr_query_bits.append('last_modified:[' + since + 'Z TO *]')

        response = solr.query(
            settings.SOLR_DATA_CORE,
            ' AND '.join(solr_query_bits),
            offset=offset,
            sort=sort,
            limit=limit
        )

        dataset_resource = DatasetResource()
        dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
        dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
        dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)
       
        results = [SolrObject(d) for d in response['response']['docs']]

        page = PandaPaginator(
            request.GET,
            results,
            resource_uri=request.path_info,
            count=response['response']['numFound']
        ).page() 
        
        dataset_bundle.data.update(page)
        dataset_bundle.data['objects'] = []

        for obj in results:
            bundle = self.build_bundle(obj=obj, request=request)
            bundle = self.full_dehydrate(bundle)
            dataset_bundle.data['objects'].append(bundle.data)

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)
        
        SearchLog.objects.create(user=user, dataset=dataset, query=query)

        return dataset_bundle
Ejemplo n.º 14
0
Archivo: data.py Proyecto: Rawadx/panda
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        try:
            query = '(%s)' % request.GET['q']
        except KeyError:
            query = ''

        category = request.GET.get('category', '')
        since = request.GET.get('since', None)
        limit = int(request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get('offset', 0))
        group_limit = int(request.GET.get('group_limit', settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get('group_offset', 0))
        export = bool(request.GET.get('export', False))

        solr_query_bits = [query]

        if category:
            if category != 'uncategorized':
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list('slug', flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(categories=None).values_list('slug', flat=True) 

            solr_query_bits.append('dataset_slug:(%s)' % ' '.join(dataset_slugs))

        if since:
            solr_query_bits.append('last_modified:[' + since + 'Z TO *]')

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        if export:
            task_type = ExportSearchTask

            task = TaskStatus.objects.create(
                task_name=task_type.name,
                task_description=_('Export search results for "%s".') % query,
                creator=user
            )

            task_type.apply_async(
                args=[query, task.id],
                kwargs={},
                task_id=task.id
            )
        else:
            response = solr.query_grouped(
                settings.SOLR_DATA_CORE,
                ' AND '.join(solr_query_bits),
                'dataset_slug',
                offset=offset,
                limit=limit,
                group_limit=group_limit,
                group_offset=group_offset
            )
            groups = response['grouped']['dataset_slug']['groups']

            page = PandaPaginator(
                request.GET,
                groups,
                resource_uri=request.path_info,
                count=response['grouped']['dataset_slug']['ngroups']
            ).page()

            datasets = []

            for group in groups:
                dataset_slug = group['groupValue']
                results = group['doclist']
                
                try:
                    dataset = Dataset.objects.get(slug=dataset_slug)
                # In the event that stale data exists in Solr, skip this dataset,
                # request the invalid data be purged and return the other results.
                # Pagination may be wrong, but this is the most functional solution. (#793)
                except Dataset.DoesNotExist:
                    PurgeDataTask.apply_async(args=[dataset_slug])
                    solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % dataset_slug)

                    page['meta']['total_count'] -= 1

                    continue
                
                dataset_resource = DatasetResource()
                dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
                dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
                dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

                objects = [SolrObject(obj) for obj in results['docs']]
                
                dataset_search_url = reverse('api_dataset_data_list', kwargs={ 'api_name': self._meta.api_name, 'dataset_resource_name': 'dataset', 'resource_name': 'data', 'dataset_slug': dataset.slug })

                data_page = PandaPaginator(
                    { 'limit': str(group_limit), 'offset': str(group_offset), 'q': query },
                    objects,
                    resource_uri=dataset_search_url,
                    count=results['numFound']
                ).page()

                dataset_bundle.data.update(data_page)
                dataset_bundle.data['objects'] = []

                for obj in objects:
                    data_bundle = self.build_bundle(obj=obj, request=request)
                    data_bundle = self.full_dehydrate(data_bundle)
                    dataset_bundle.data['objects'].append(data_bundle)

                datasets.append(dataset_bundle.data)

            page['objects'] = datasets
            
            # Log query
            SearchLog.objects.create(user=user, dataset=None, query=query)

        self.log_throttled_access(request)

        if export:
            return self.create_response(request, _('Export queued.'))
        else:
            return self.create_response(request, page)
Ejemplo n.º 15
0
    def dehydrate(self, bundle):
        """
        Always remove the password form the serialized bundle.
        """
        del bundle.data['password']

        user = bundle.obj

        if 'notifications' in bundle.request.GET and bundle.request.GET[
                'notifications'].lower() == 'true':
            from panda.api.notifications import NotificationResource

            resource = NotificationResource()

            notifications = user.notifications.all(
            )[:settings.PANDA_NOTIFICATIONS_TO_SHOW]

            bundles = [resource.build_bundle(obj=n) for n in notifications]
            notifications = [resource.full_dehydrate(b) for b in bundles]

            bundle.data['notifications'] = notifications

        if 'exports' in bundle.request.GET and bundle.request.GET[
                'exports'].lower() == 'true':
            from panda.api.exports import ExportResource

            resource = ExportResource()

            exports = Export.objects.filter(creator=user)

            bundles = [resource.build_bundle(obj=e) for e in exports]
            exports = [resource.full_dehydrate(b) for b in bundles]

            bundle.data['exports'] = exports

        if 'datasets' in bundle.request.GET and bundle.request.GET[
                'datasets'].lower() == 'true':
            from panda.api.datasets import DatasetResource

            resource = DatasetResource()

            datasets = user.datasets.all()

            bundles = [resource.build_bundle(obj=d) for d in datasets]
            datasets = [
                resource.simplify_bundle(resource.full_dehydrate(b))
                for b in bundles
            ]

            bundle.data['datasets'] = datasets

        if 'search_subscriptions' in bundle.request.GET and bundle.request.GET[
                'search_subscriptions'].lower() == 'true':
            from panda.api.search_subscriptions import SearchSubscriptionResource

            resource = SearchSubscriptionResource()

            subscriptions = user.search_subscriptions.all()

            bundles = [resource.build_bundle(obj=s) for s in subscriptions]
            datasets = [resource.full_dehydrate(b) for b in bundles]

            bundle.data['subscriptions'] = datasets

        return bundle