def _paginate_with_dead_link_mask(s: Search, page_size: int, page: int) -> Tuple[int, int]: """ Given a query, a page and page_size, return the start and end of the slice of results. :param s: The elasticsearch Search object :param page_size: How big the page should be. :param page: The page number. :return: Tuple of start and end. """ query_hash = get_query_hash(s) query_mask = get_query_mask(query_hash) if not query_mask: start = 0 end = ceil(page_size * page / (1 - DEAD_LINK_RATIO)) elif page_size * (page - 1) > sum(query_mask): start = len(query_mask) end = ceil(page_size * page / (1 - DEAD_LINK_RATIO)) else: accu_query_mask = list(accumulate(query_mask)) start = 0 if page > 1: try: start = accu_query_mask.index(page_size * (page - 1) + 1) except ValueError: start = accu_query_mask.index(page_size * (page - 1)) + 1 if page_size * page > sum(query_mask): end = ceil(page_size * page / (1 - DEAD_LINK_RATIO)) else: end = accu_query_mask.index(page_size * page) + 1 return start, end
def _post_process_results(s, start, end, page_size, search_results, request, filter_dead) -> List[Hit]: """ After fetching the search results from the back end, iterate through the results, perform image validation, and route certain thumbnails through our proxy. :param s: The Elasticsearch Search object. :param start: The start of the result slice. :param end: The end of the result slice. :param search_results: The Elasticsearch response object containing search results. :param request: The Django request object, used to build a "reversed" URL to detail pages. :param filter_dead: Whether images should be validated. :return: List of results. """ results = [] to_validate = [] for res in search_results: if hasattr(res.meta, 'highlight'): res.fields_matched = dir(res.meta.highlight) to_validate.append(res.url) if PROXY_THUMBS: # Route all images through a dynamically resizing caching proxy. # If a 3rd party thumbnail is available, in order to save limited # bandwidth and memory resources required for resizing, we'll # proxy the 3rd party thumbnail instead of the full-sized image. if THUMBNAIL in res: to_proxy = THUMBNAIL else: to_proxy = URL original = res[to_proxy] ext = res["url"].split(".")[-1] proxied = "http://{}{}".format( request.get_host(), reverse('thumbs', kwargs={ 'identifier': "{}.{}".format(res["identifier"], ext) })) res[THUMBNAIL] = proxied results.append(res) if filter_dead: query_hash = get_query_hash(s) validate_images(query_hash, start, results, to_validate) if len(results) < page_size: end += int(end / 2) if start + end > ELASTICSEARCH_MAX_RESULT_WINDOW: return results s = s[start:end] search_response = s.execute() return _post_process_results(s, start, end, page_size, search_response, request, filter_dead) return results[:page_size]
def _post_process_results(s, start, end, page_size, search_results, request, filter_dead) -> List[Hit]: """ After fetching the search results from the back end, iterate through the results, add links to detail views, perform image validation, and route certain thumbnails through out proxy. :param s: The Elasticsearch Search object. :param start: The start of the result slice. :param end: The end of the result slice. :param search_results: The Elasticsearch response object containing search results. :param request: The Django request object, used to build a "reversed" URL to detail pages. :param filter_dead: Whether images should be validated. :return: List of results. """ results = [] to_validate = [] for res in search_results: url = request.build_absolute_uri( reverse('image-detail', [res.identifier])) res.detail = url if hasattr(res.meta, 'highlight'): res.fields_matched = dir(res.meta.highlight) to_validate.append(res.url) if PROXY_THUMBS: # Proxy thumbnails from providers who don't provide SSL. We also # have a list of providers that have poor quality or no thumbnails, # so we produce our own on-the-fly. provider = res[PROVIDER] if THUMBNAIL in res and provider not in PROXY_ALL: to_proxy = THUMBNAIL else: to_proxy = URL if 'http://' in res[to_proxy] or provider in PROXY_ALL: original = res[to_proxy] secure = '{proxy_url}/{width}/{original}'.format( proxy_url=THUMBNAIL_PROXY_URL, width=THUMBNAIL_WIDTH_PX, original=original) res[THUMBNAIL] = secure results.append(res) if filter_dead: query_hash = get_query_hash(s) validate_images(query_hash, start, results, to_validate) if len(results) < page_size: end += int(end / 2) if start + end > ELASTICSEARCH_MAX_RESULT_WINDOW: return results s = s[start:end] search_response = s.execute() return _post_process_results(s, start, end, page_size, search_response, request, filter_dead) return results[:page_size]
def _post_process_results(s, start, end, page_size, search_results, request, filter_dead) -> List[Hit]: """ After fetching the search results from the back end, iterate through the results, perform image validation, and route certain thumbnails through our proxy. :param s: The Elasticsearch Search object. :param start: The start of the result slice. :param end: The end of the result slice. :param search_results: The Elasticsearch response object containing search results. :param request: The Django request object, used to build a "reversed" URL to detail pages. :param filter_dead: Whether images should be validated. :return: List of results. """ results = [] to_validate = [] for res in search_results: if hasattr(res.meta, 'highlight'): res.fields_matched = dir(res.meta.highlight) to_validate.append(res.url) if PROXY_THUMBS: # Route all images through a dynamically resizing caching proxy. proxied = "https://{}{}".format( request.get_host(), reverse('thumbs', kwargs={'identifier': res["identifier"]})) res[THUMBNAIL] = proxied results.append(res) if filter_dead: query_hash = get_query_hash(s) validate_images(query_hash, start, results, to_validate) if len(results) < page_size: end += int(end / 2) if start + end > ELASTICSEARCH_MAX_RESULT_WINDOW: return results s = s[start:end] search_response = s.execute() return _post_process_results(s, start, end, page_size, search_response, request, filter_dead) return results[:page_size]