def _post_process_results(s, start, end, page_size, search_results, request, filter_dead) -> List[Hit]: """ After fetching the search results from the back end, iterate through the results, perform image validation, and route certain thumbnails through our proxy. :param s: The Elasticsearch Search object. :param start: The start of the result slice. :param end: The end of the result slice. :param search_results: The Elasticsearch response object containing search results. :param request: The Django request object, used to build a "reversed" URL to detail pages. :param filter_dead: Whether images should be validated. :return: List of results. """ results = [] to_validate = [] for res in search_results: if hasattr(res.meta, 'highlight'): res.fields_matched = dir(res.meta.highlight) to_validate.append(res.url) if PROXY_THUMBS: # Route all images through a dynamically resizing caching proxy. # If a 3rd party thumbnail is available, in order to save limited # bandwidth and memory resources required for resizing, we'll # proxy the 3rd party thumbnail instead of the full-sized image. if THUMBNAIL in res: to_proxy = THUMBNAIL else: to_proxy = URL original = res[to_proxy] ext = res["url"].split(".")[-1] proxied = "http://{}{}".format( request.get_host(), reverse('thumbs', kwargs={ 'identifier': "{}.{}".format(res["identifier"], ext) })) res[THUMBNAIL] = proxied results.append(res) if filter_dead: query_hash = get_query_hash(s) validate_images(query_hash, start, results, to_validate) if len(results) < page_size: end += int(end / 2) if start + end > ELASTICSEARCH_MAX_RESULT_WINDOW: return results s = s[start:end] search_response = s.execute() return _post_process_results(s, start, end, page_size, search_response, request, filter_dead) return results[:page_size]
def _post_process_results(s, start, end, page_size, search_results, request, filter_dead) -> List[Hit]: """ After fetching the search results from the back end, iterate through the results, add links to detail views, perform image validation, and route certain thumbnails through out proxy. :param s: The Elasticsearch Search object. :param start: The start of the result slice. :param end: The end of the result slice. :param search_results: The Elasticsearch response object containing search results. :param request: The Django request object, used to build a "reversed" URL to detail pages. :param filter_dead: Whether images should be validated. :return: List of results. """ results = [] to_validate = [] for res in search_results: url = request.build_absolute_uri( reverse('image-detail', [res.identifier])) res.detail = url if hasattr(res.meta, 'highlight'): res.fields_matched = dir(res.meta.highlight) to_validate.append(res.url) if PROXY_THUMBS: # Proxy thumbnails from providers who don't provide SSL. We also # have a list of providers that have poor quality or no thumbnails, # so we produce our own on-the-fly. provider = res[PROVIDER] if THUMBNAIL in res and provider not in PROXY_ALL: to_proxy = THUMBNAIL else: to_proxy = URL if 'http://' in res[to_proxy] or provider in PROXY_ALL: original = res[to_proxy] secure = '{proxy_url}/{width}/{original}'.format( proxy_url=THUMBNAIL_PROXY_URL, width=THUMBNAIL_WIDTH_PX, original=original) res[THUMBNAIL] = secure results.append(res) if filter_dead: query_hash = get_query_hash(s) validate_images(query_hash, start, results, to_validate) if len(results) < page_size: end += int(end / 2) if start + end > ELASTICSEARCH_MAX_RESULT_WINDOW: return results s = s[start:end] search_response = s.execute() return _post_process_results(s, start, end, page_size, search_response, request, filter_dead) return results[:page_size]
def _post_process_results(s, start, end, page_size, search_results, request, filter_dead) -> List[Hit]: """ After fetching the search results from the back end, iterate through the results, perform image validation, and route certain thumbnails through our proxy. :param s: The Elasticsearch Search object. :param start: The start of the result slice. :param end: The end of the result slice. :param search_results: The Elasticsearch response object containing search results. :param request: The Django request object, used to build a "reversed" URL to detail pages. :param filter_dead: Whether images should be validated. :return: List of results. """ results = [] to_validate = [] for res in search_results: if hasattr(res.meta, 'highlight'): res.fields_matched = dir(res.meta.highlight) to_validate.append(res.url) if PROXY_THUMBS: # Route all images through a dynamically resizing caching proxy. proxied = "https://{}{}".format( request.get_host(), reverse('thumbs', kwargs={'identifier': res["identifier"]})) res[THUMBNAIL] = proxied results.append(res) if filter_dead: query_hash = get_query_hash(s) validate_images(query_hash, start, results, to_validate) if len(results) < page_size: end += int(end / 2) if start + end > ELASTICSEARCH_MAX_RESULT_WINDOW: return results s = s[start:end] search_response = s.execute() return _post_process_results(s, start, end, page_size, search_response, request, filter_dead) return results[:page_size]
def _post_process_results(search_results, request, filter_dead): """ After fetching the search results from the back end, iterate through the results, add links to detail views, perform image validation, and route certain thumbnails through out proxy. :param search_results: The Elasticsearch response object containing search results. :param request: The Django request object, used to build a "reversed" URL to detail pages. :param filter_dead: Whether images should be validated. """ results = [] to_validate = [] for res in search_results: url = request.build_absolute_uri( reverse('image-detail', [res.identifier])) res.detail = url if hasattr(res.meta, 'highlight'): res.fields_matched = dir(res.meta.highlight) to_validate.append(res.url) if PROXY_THUMBS: # Proxy thumbnails from providers who don't provide SSL. We also # have a list of providers that have poor quality or no thumbnails, # so we produce our own on-the-fly. provider = res[PROVIDER] if THUMBNAIL in res and provider not in PROXY_ALL: to_proxy = THUMBNAIL else: to_proxy = URL if 'http://' in res[to_proxy] or provider in PROXY_ALL: original = res[to_proxy] secure = '{proxy_url}/{width}/{original}'.format( proxy_url=THUMBNAIL_PROXY_URL, width=THUMBNAIL_WIDTH_PX, original=original) res[THUMBNAIL] = secure results.append(res) if filter_dead: validate_images(results, to_validate) return results
def get(self, request, format=None): # Parse and validate query parameters params = ImageSearchQueryStringSerializer(data=request.query_params) if not params.is_valid(): return Response( status=400, data={ "validation_error": params.errors } ) hashed_ip = hash(_get_user_ip(request)) page_param = params.data[PAGE] page_size = params.data[PAGESIZE] try: search_results = search_controller.search(params, index='image', page_size=page_size, ip=hashed_ip, page=page_param) except ValueError: return Response( status=400, data={ VALIDATION_ERROR: 'Deep pagination is not allowed.' } ) # Fetch each result from Elasticsearch. Resolve links to detail views. results = [] to_validate = [] for result in search_results: url = request.build_absolute_uri( reverse('image-detail', [result.identifier]) ) result.detail = url to_validate.append(result.url) results.append(result) if params.data[FILTER_DEAD]: validate_images(results, to_validate) serialized_results =\ ImageSerializer(results, many=True).data # Elasticsearch does not allow deep pagination of ranked queries. # Adjust returned page count to reflect this. natural_page_count = int(search_results.hits.total / page_size) last_allowed_page = int((5000 + page_size / 2) / page_size) page_count = min(natural_page_count, last_allowed_page) result_count = search_results.hits.total if len(results) < page_size and page_count == 0: result_count = len(results) response_data = { 'result_count': result_count, 'page_count': page_count, RESULTS: serialized_results } # Post-process the search results to fix malformed URLs and insecure # HTTP thumbnails. for idx, res in enumerate(serialized_results): if PROXY_THUMBS: provider = res[PROVIDER] # Proxy either the thumbnail or URL, depending on whether # a thumbnail was provided. if THUMBNAIL in res and provider not in PROXY_ALL: to_proxy = THUMBNAIL else: to_proxy = URL if 'http://' in res[to_proxy] or provider in PROXY_ALL: original = res[to_proxy] secure = '{proxy_url}/{width}/{original}'.format( proxy_url=THUMBNAIL_PROXY_URL, width=THUMBNAIL_WIDTH_PX, original=original ) response_data[RESULTS][idx][THUMBNAIL] = secure if FOREIGN_LANDING_URL in res: foreign = _add_protocol(res[FOREIGN_LANDING_URL]) response_data[RESULTS][idx][FOREIGN_LANDING_URL] = foreign if CREATOR_URL in res: creator_url = _add_protocol(res[CREATOR_URL]) response_data[RESULTS][idx][CREATOR_URL] = creator_url serialized_response = ImageSearchResultsSerializer(data=response_data) return Response(status=200, data=serialized_response.initial_data)