def mock_find(resource, req, lookup, p): doc = { 'fetch_endpoint': 'search_providers_proxy', 'pubstatus': 'usable', 'slugline': 'Fish on a bike', 'byline': 'Fred Smith/AAP PHOTOS', '_id': '20200108001362610429', '_type': 'externalsource', 'original_source': 'AAP Image/AAP', 'description_text': 'Sydney to the Gong some years ago', 'guid': '20200108001362610429', 'type': 'picture', 'firstcreated': utcnow(), 'ednote': 'Not for publication', 'source': 'AAP Image', 'headline': 'Fish on a bike', 'versioncreated': utcnow(), 'archive_description': 'Sydney to the Gong some years ago' } hits = {'docs': [doc], 'total': 1} return ElasticCursor(docs=hits['docs'], hits={ 'hits': hits, 'aggregations': None })
def mock_find_progress(resource, req, lookup, p): doc = {} hits = {'docs': [doc], 'total': 0} return ElasticCursor(docs=hits['docs'], hits={ 'hits': hits, 'aggregations': None })
def _parse_hits(self, hits): schemas = { 'planning': self._get_resource_schema('planning'), 'events': self._get_resource_schema('events') } docs = [] for hit in hits.get('hits', {}).get('hits', []): item_type = hit.get('_type') schema = schemas.get(item_type) dates = get_dates(schema) doc = format_doc(hit, schema, dates) build_custom_hateoas({'self': {'title': doc['_type'], 'href': '/{}/{{_id}}'.format(doc['_type'])}}, doc) docs.append(doc) return ElasticCursor(hits, docs)
def find(self, resource, req, lookup): url = self._app.config['AAP_MM_SEARCH_URL'] + '/Assets/search' query_keywords = '*:*' if 'query' in req['query']['filtered']: query_keywords = req['query']['filtered']['query']['query_string'][ 'query'] fields = { 'query': query_keywords, 'pageSize': str(req.get('size', '25')), 'pageNumber': str(int(req.get('from', '0')) // int(req.get('size', '25')) + 1) } r = self._http.request('GET', url, fields=fields, headers=self._headers) hits = self._parse_hits(json.loads(r.data.decode('UTF-8'))) return ElasticCursor(docs=hits['docs'], hits={'hits': hits})
def get(self, req, lookup): """ Return a list of items related to the given item. The given item id is retrieved from the lookup dictionary as 'item_id' """ if "item_id" not in lookup: raise SuperdeskApiError.badRequestError( _("The item identifier is required")) item = get_resource_service("archive_autosave").find_one( req=None, _id=lookup["item_id"]) if not item: item = get_resource_service("archive").find_one( req=None, _id=lookup["item_id"]) if not item: raise SuperdeskApiError.notFoundError( _("Invalid item identifer")) keywords = self.provider.get_keywords(self._transform(item)) if not keywords: return ElasticCursor([]) query = { "query": { "filtered": { "query": { "query_string": { "query": " ".join(kwd["text"] for kwd in keywords) } } } } } req = ParsedRequest() req.args = { "source": json.dumps(query), "repo": "archive,published,archived" } return get_resource_service("search").get(req=req, lookup=None)
def get(self, req, lookup): """ Return a list of items related to the given item. The given item id is retrieved from the lookup dictionary as 'item_id' """ if 'item_id' not in lookup: raise SuperdeskApiError.badRequestError( _('The item identifier is required')) item = get_resource_service('archive_autosave').find_one( req=None, _id=lookup['item_id']) if not item: item = get_resource_service('archive').find_one( req=None, _id=lookup['item_id']) if not item: raise SuperdeskApiError.notFoundError( _('Invalid item identifer')) keywords = self.provider.get_keywords(self._transform(item)) if not keywords: return ElasticCursor([]) query = { 'query': { 'filtered': { 'query': { 'query_string': { 'query': ' '.join(kwd['text'] for kwd in keywords) } } } } } req = ParsedRequest() req.args = { 'source': json.dumps(query), 'repo': 'archive,published,archived' } return get_resource_service('search').get(req=req, lookup=None)
def find(self, resource, req, lookup): """ Called to execute a search against the Scanpix API. It attempts to translate the search request passed in req to a suitable form for a search request against the API. It parses the response into a suitable ElasticCursor. :param resource: :param req: :param lookup: :return: """ url = self._app.config['SCANPIX_SEARCH_URL'] + '/search' data = {'mainGroup': 'any'} if 'query' in req['query']['filtered']: query = req['query']['filtered']['query']['query_string']['query'] \ .replace('slugline:', 'keywords:') \ .replace('description:', 'caption:') # Black & White try: bw = bool(int(extract_params(query, 'bw')['bw'])) except KeyError: pass else: if bw: data['saturation'] = {'max': 1} # Clear Edge try: clear_edge = bool( int(extract_params(query, 'clear_edge')['clear_edge'])) except KeyError: pass else: if clear_edge: data['clearEdge'] = True # subscription try: data['subscription'] = extract_params( query, 'subscription')['subscription'] except KeyError: data[ 'subscription'] = 'subscription' # this is requested as a default value if 'ntbtema' in resource and data['subscription'] == 'subscription': # small hack for SDNTB-250 data['subscription'] = 'punchcard' if data['subscription'] == 'all': del data['subscription'] text_params = extract_params( query, ('headline', 'keywords', 'caption', 'text')) # combine all possible text params to use the q field. data['searchString'] = ' '.join(text_params.values()) try: ids = extract_params(query, 'id')['id'].split() except KeyError: pass else: data['refPtrs'] = ids for criterion in req.get('post_filter', {}).get('and', {}): if 'range' in criterion: start = None end = None filter_data = criterion.get('range', {}) if 'firstcreated' in filter_data: created = criterion['range']['firstcreated'] if 'gte' in created: start = created['gte'][0:10] if 'lte' in created: end = created['lte'][0:10] # if there is a special start and no end it's one of the date buttons if start and not end: if start == 'now-24H': data['timeLimit'] = 'last24' if start == 'now-1w': data['timeLimit'] = 'lastweek' if start == 'now-1M': data['timeLimit'] = 'lastmonth' elif start or end: data['archived'] = {'min': '', 'max': ''} if start: data['archived']['min'] = start if end: data['archived']['max'] = end if 'terms' in criterion: if 'type' in criterion.get('terms', {}): type_ = criterion['terms']['type'] if type_ == CONTENT_TYPE.VIDEO: data['mainGroup'] = 'video' offset, limit = int(req.get('from', '0')), max(10, int(req.get('size', '25'))) data['offset'] = offset data['showNumResults'] = limit r = self._request(url, data) hits = self._parse_hits(r.json()) return ElasticCursor(docs=hits['docs'], hits={'hits': hits})
def find(self, resource, req, lookup): """ Called to execute a search against the AAP Mulitmedia API. It attempts to translate the search request passed in req to a suitable form for a search request against the API. It parses the response into a suitable ElasticCursor, the front end will never know. :param resource: :param req: :param lookup: :return: """ if self._headers is None: self.__set_auth_cookie(self._app) url = self._app.config['AAP_MM_SEARCH_URL'] + '/Assets/search' query_keywords = '*:*' if 'query' in req['query']['filtered']: query_keywords = req['query']['filtered']['query']['query_string'][ 'query'] query_keywords = query_keywords.replace('slugline:', 'objectname:') query_keywords = query_keywords.replace('description:', 'captionabstract:') fields = {} for criterion in req.get('post_filter', {}).get('and', {}): # parse out the date range if possible if 'range' in criterion: start = None end = None daterange = None if 'firstcreated' in criterion.get('range', {}): if 'gte' in criterion['range']['firstcreated']: start = criterion['range']['firstcreated']['gte'][0:10] if 'lte' in criterion['range']['firstcreated']: end = criterion['range']['firstcreated']['lte'][0:10] # if there is a special start and no end it's one of the date buttons if start and not end: if start == 'now-24H': daterange = { 'Dates': ['[NOW/HOUR-24HOURS TO NOW/HOUR]'] } if start == 'now-1w': daterange = {'Dates': ['[NOW/DAY-7DAYS TO NOW/DAY]']} if start == 'now-1M': daterange = {'Dates': ['[NOW/DAY-1MONTH TO NOW/DAY]']} # we've got something but no daterange set above if (start or end) and not daterange: daterange = { 'DateRange': [{ 'Start': start, 'End': end }], 'DateCreatedFilter': 'true' } if daterange: fields.update(daterange) if 'terms' in criterion: if 'type' in criterion.get('terms', {}): fields.update({'MediaTypes': criterion['terms']['type']}) if 'credit' in criterion.get('terms', {}): fields.update({'Credits': criterion['terms']['credit']}) if 'anpa_category.name' in criterion.get('terms', {}): cat_list = [] for cat in criterion['terms']['anpa_category.name']: qcode = [ key for key, value in subject_codes.items() if value == cat ] if qcode: for code in qcode: cat_list.append(code) else: cat_list.append(cat) fields.update({'Categories': cat_list}) size = int(req.get('size', '25')) if int(req.get('size', '25')) > 0 else 25 query = { 'Query': query_keywords, 'pageSize': str(size), 'pageNumber': str(int(req.get('from', '0')) // size + 1) } r = self._http.urlopen('POST', url + '?' + urllib.parse.urlencode(query), body=json.dumps(fields), headers=self._headers) hits = self._parse_hits(json.loads(r.data.decode('UTF-8'))) return ElasticCursor(docs=hits['docs'], hits={ 'hits': hits, 'aggregations': self._parse_aggregations(hits) })