def mock_find(resource, req, lookup, p):
     doc = {
         'fetch_endpoint': 'search_providers_proxy',
         'pubstatus': 'usable',
         'slugline': 'Fish on a bike',
         'byline': 'Fred Smith/AAP PHOTOS',
         '_id': '20200108001362610429',
         '_type': 'externalsource',
         'original_source': 'AAP Image/AAP',
         'description_text': 'Sydney to the Gong some years ago',
         'guid': '20200108001362610429',
         'type': 'picture',
         'firstcreated': utcnow(),
         'ednote': 'Not for publication',
         'source': 'AAP Image',
         'headline': 'Fish on a bike',
         'versioncreated': utcnow(),
         'archive_description': 'Sydney to the Gong some years ago'
     }
     hits = {'docs': [doc], 'total': 1}
     return ElasticCursor(docs=hits['docs'],
                          hits={
                              'hits': hits,
                              'aggregations': None
                          })
 def mock_find_progress(resource, req, lookup, p):
     doc = {}
     hits = {'docs': [doc], 'total': 0}
     return ElasticCursor(docs=hits['docs'],
                          hits={
                              'hits': hits,
                              'aggregations': None
                          })
Ejemplo n.º 3
0
    def _parse_hits(self, hits):
        schemas = {
            'planning': self._get_resource_schema('planning'),
            'events': self._get_resource_schema('events')
        }

        docs = []
        for hit in hits.get('hits', {}).get('hits', []):
            item_type = hit.get('_type')
            schema = schemas.get(item_type)
            dates = get_dates(schema)
            doc = format_doc(hit, schema, dates)
            build_custom_hateoas({'self': {'title': doc['_type'], 'href': '/{}/{{_id}}'.format(doc['_type'])}}, doc)
            docs.append(doc)

        return ElasticCursor(hits, docs)
Ejemplo n.º 4
0
 def find(self, resource, req, lookup):
     url = self._app.config['AAP_MM_SEARCH_URL'] + '/Assets/search'
     query_keywords = '*:*'
     if 'query' in req['query']['filtered']:
         query_keywords = req['query']['filtered']['query']['query_string'][
             'query']
     fields = {
         'query':
         query_keywords,
         'pageSize':
         str(req.get('size', '25')),
         'pageNumber':
         str(int(req.get('from', '0')) // int(req.get('size', '25')) + 1)
     }
     r = self._http.request('GET',
                            url,
                            fields=fields,
                            headers=self._headers)
     hits = self._parse_hits(json.loads(r.data.decode('UTF-8')))
     return ElasticCursor(docs=hits['docs'], hits={'hits': hits})
Ejemplo n.º 5
0
    def get(self, req, lookup):
        """
        Return a list of items related to the given item. The given item id is retrieved
        from the lookup dictionary as 'item_id'
        """
        if "item_id" not in lookup:
            raise SuperdeskApiError.badRequestError(
                _("The item identifier is required"))
        item = get_resource_service("archive_autosave").find_one(
            req=None, _id=lookup["item_id"])
        if not item:
            item = get_resource_service("archive").find_one(
                req=None, _id=lookup["item_id"])
            if not item:
                raise SuperdeskApiError.notFoundError(
                    _("Invalid item identifer"))

        keywords = self.provider.get_keywords(self._transform(item))
        if not keywords:
            return ElasticCursor([])

        query = {
            "query": {
                "filtered": {
                    "query": {
                        "query_string": {
                            "query": " ".join(kwd["text"] for kwd in keywords)
                        }
                    }
                }
            }
        }

        req = ParsedRequest()
        req.args = {
            "source": json.dumps(query),
            "repo": "archive,published,archived"
        }

        return get_resource_service("search").get(req=req, lookup=None)
Ejemplo n.º 6
0
    def get(self, req, lookup):
        """
        Return a list of items related to the given item. The given item id is retrieved
        from the lookup dictionary as 'item_id'
        """
        if 'item_id' not in lookup:
            raise SuperdeskApiError.badRequestError(
                _('The item identifier is required'))
        item = get_resource_service('archive_autosave').find_one(
            req=None, _id=lookup['item_id'])
        if not item:
            item = get_resource_service('archive').find_one(
                req=None, _id=lookup['item_id'])
            if not item:
                raise SuperdeskApiError.notFoundError(
                    _('Invalid item identifer'))

        keywords = self.provider.get_keywords(self._transform(item))
        if not keywords:
            return ElasticCursor([])

        query = {
            'query': {
                'filtered': {
                    'query': {
                        'query_string': {
                            'query': ' '.join(kwd['text'] for kwd in keywords)
                        }
                    }
                }
            }
        }

        req = ParsedRequest()
        req.args = {
            'source': json.dumps(query),
            'repo': 'archive,published,archived'
        }

        return get_resource_service('search').get(req=req, lookup=None)
Ejemplo n.º 7
0
    def find(self, resource, req, lookup):
        """
        Called to execute a search against the Scanpix API. It attempts to translate the search request
        passed in req to a suitable form for a search request against the API. It parses the response into a
        suitable ElasticCursor.
        :param resource:
        :param req:
        :param lookup:
        :return:
        """
        url = self._app.config['SCANPIX_SEARCH_URL'] + '/search'
        data = {'mainGroup': 'any'}

        if 'query' in req['query']['filtered']:
            query = req['query']['filtered']['query']['query_string']['query'] \
                .replace('slugline:', 'keywords:') \
                .replace('description:', 'caption:')

            # Black & White
            try:
                bw = bool(int(extract_params(query, 'bw')['bw']))
            except KeyError:
                pass
            else:
                if bw:
                    data['saturation'] = {'max': 1}

            # Clear Edge
            try:
                clear_edge = bool(
                    int(extract_params(query, 'clear_edge')['clear_edge']))
            except KeyError:
                pass
            else:
                if clear_edge:
                    data['clearEdge'] = True

            # subscription
            try:
                data['subscription'] = extract_params(
                    query, 'subscription')['subscription']
            except KeyError:
                data[
                    'subscription'] = 'subscription'  # this is requested as a default value

            if 'ntbtema' in resource and data['subscription'] == 'subscription':
                # small hack for SDNTB-250
                data['subscription'] = 'punchcard'

            if data['subscription'] == 'all':
                del data['subscription']

            text_params = extract_params(
                query, ('headline', 'keywords', 'caption', 'text'))
            # combine all possible text params to use the q field.
            data['searchString'] = ' '.join(text_params.values())

            try:
                ids = extract_params(query, 'id')['id'].split()
            except KeyError:
                pass
            else:
                data['refPtrs'] = ids

        for criterion in req.get('post_filter', {}).get('and', {}):
            if 'range' in criterion:
                start = None
                end = None
                filter_data = criterion.get('range', {})

                if 'firstcreated' in filter_data:
                    created = criterion['range']['firstcreated']
                    if 'gte' in created:
                        start = created['gte'][0:10]
                    if 'lte' in created:
                        end = created['lte'][0:10]

                # if there is a special start and no end it's one of the date buttons
                if start and not end:
                    if start == 'now-24H':
                        data['timeLimit'] = 'last24'
                    if start == 'now-1w':
                        data['timeLimit'] = 'lastweek'
                    if start == 'now-1M':
                        data['timeLimit'] = 'lastmonth'
                elif start or end:
                    data['archived'] = {'min': '', 'max': ''}
                    if start:
                        data['archived']['min'] = start
                    if end:
                        data['archived']['max'] = end

            if 'terms' in criterion:
                if 'type' in criterion.get('terms', {}):
                    type_ = criterion['terms']['type']
                    if type_ == CONTENT_TYPE.VIDEO:
                        data['mainGroup'] = 'video'

        offset, limit = int(req.get('from',
                                    '0')), max(10, int(req.get('size', '25')))
        data['offset'] = offset
        data['showNumResults'] = limit
        r = self._request(url, data)
        hits = self._parse_hits(r.json())
        return ElasticCursor(docs=hits['docs'], hits={'hits': hits})
Ejemplo n.º 8
0
    def find(self, resource, req, lookup):
        """
        Called to execute a search against the AAP Mulitmedia API. It attempts to translate the search request
        passed in req to a suitable form for a search request against the API. It parses the response into a
        suitable ElasticCursor, the front end will never know.
        :param resource:
        :param req:
        :param lookup:
        :return:
        """
        if self._headers is None:
            self.__set_auth_cookie(self._app)

        url = self._app.config['AAP_MM_SEARCH_URL'] + '/Assets/search'
        query_keywords = '*:*'
        if 'query' in req['query']['filtered']:
            query_keywords = req['query']['filtered']['query']['query_string'][
                'query']
            query_keywords = query_keywords.replace('slugline:', 'objectname:')
            query_keywords = query_keywords.replace('description:',
                                                    'captionabstract:')

        fields = {}
        for criterion in req.get('post_filter', {}).get('and', {}):
            # parse out the date range if possible
            if 'range' in criterion:
                start = None
                end = None
                daterange = None
                if 'firstcreated' in criterion.get('range', {}):
                    if 'gte' in criterion['range']['firstcreated']:
                        start = criterion['range']['firstcreated']['gte'][0:10]
                    if 'lte' in criterion['range']['firstcreated']:
                        end = criterion['range']['firstcreated']['lte'][0:10]
                # if there is a special start and no end it's one of the date buttons
                if start and not end:
                    if start == 'now-24H':
                        daterange = {
                            'Dates': ['[NOW/HOUR-24HOURS TO NOW/HOUR]']
                        }
                    if start == 'now-1w':
                        daterange = {'Dates': ['[NOW/DAY-7DAYS TO NOW/DAY]']}
                    if start == 'now-1M':
                        daterange = {'Dates': ['[NOW/DAY-1MONTH TO NOW/DAY]']}
                # we've got something but no daterange set above
                if (start or end) and not daterange:
                    daterange = {
                        'DateRange': [{
                            'Start': start,
                            'End': end
                        }],
                        'DateCreatedFilter': 'true'
                    }
                if daterange:
                    fields.update(daterange)

            if 'terms' in criterion:
                if 'type' in criterion.get('terms', {}):
                    fields.update({'MediaTypes': criterion['terms']['type']})
                if 'credit' in criterion.get('terms', {}):
                    fields.update({'Credits': criterion['terms']['credit']})
                if 'anpa_category.name' in criterion.get('terms', {}):
                    cat_list = []
                    for cat in criterion['terms']['anpa_category.name']:
                        qcode = [
                            key for key, value in subject_codes.items()
                            if value == cat
                        ]
                        if qcode:
                            for code in qcode:
                                cat_list.append(code)
                        else:
                            cat_list.append(cat)
                    fields.update({'Categories': cat_list})

        size = int(req.get('size',
                           '25')) if int(req.get('size', '25')) > 0 else 25
        query = {
            'Query': query_keywords,
            'pageSize': str(size),
            'pageNumber': str(int(req.get('from', '0')) // size + 1)
        }

        r = self._http.urlopen('POST',
                               url + '?' + urllib.parse.urlencode(query),
                               body=json.dumps(fields),
                               headers=self._headers)
        hits = self._parse_hits(json.loads(r.data.decode('UTF-8')))
        return ElasticCursor(docs=hits['docs'],
                             hits={
                                 'hits': hits,
                                 'aggregations': self._parse_aggregations(hits)
                             })