def tryGetOrCreateSearchResultDbEntry(searchResultId, indexerId, result): try: return SearchResult().get(SearchResult.id == searchResultId) except SearchResult.DoesNotExist: return SearchResult().create(id=searchResultId, indexer_id=indexerId, guid=result.indexerguid, title=result.title, link=result.link, details=result.details_link, firstFound=datetime.datetime.utcnow())
def search(search_request): if search_request.maxage is None and config.settings.searching.maxAge: search_request.maxage = config.settings.searching.maxAge logger.info("Will ignore results older than %d days" % search_request.maxage) # Clean up cache for k in list(pseudo_cache.keys()): if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow(): pseudo_cache.pop(k) # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called keepFor = config.settings.main.keepSearchResultsForDays oldSearchResultsCount = SearchResult.select().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).count() if oldSearchResultsCount > 0: logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor)) SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute() else: if logger.getEffectiveLevel() == logging.DEBUG: logger.debug("%d search results stored in database" % SearchResult.select().count()) limit = search_request.limit external_offset = int(search_request.offset) search_hash = search_request.search_hash categoryResult = categories.getCategoryByAnyInput(search_request.category) search_request.category = categoryResult if search_hash not in pseudo_cache.keys() or search_request.offset == 0: # If it's a new search (which starts with offset 0) do it again instead of using the cached results logger.debug("Didn't find this query in cache or want to do a new search") cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0} category = categoryResult.category indexers_to_call = pick_indexers(search_request) for p in indexers_to_call: cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False} dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type, username=search_request.username) # dbsearch.save() cache_entry["dbsearch"] = dbsearch # Find ignored words and parse query for ignored words search_request.forbiddenWords = [] search_request.requiredWords = [] applyRestrictionsGlobal = config.settings.searching.applyRestrictions == "both" or (config.settings.searching.applyRestrictions == "internal" and search_request.internal) or (config.settings.searching.applyRestrictions == "external" and not search_request.internal) applyRestrictionsCategory = category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (search_request.category.category.applyRestrictions == "external" and not search_request.internal) if config.settings.searching.forbiddenWords and applyRestrictionsGlobal: logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords) search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))]) if config.settings.searching.requiredWords and applyRestrictionsGlobal: logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords) search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))]) if category.forbiddenWords and applyRestrictionsCategory: logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords)) search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))]) if category.requiredWords and applyRestrictionsCategory: logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords)) search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))]) if search_request.query: forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)] if len(forbiddenWords) > 0: logger.debug("Query before removing NOT terms: %s" % search_request.query) search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query) logger.debug("Query after removing NOT terms: %s" % search_request.query) logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords)) search_request.forbiddenWords.extend(forbiddenWords) pseudo_cache[search_hash] = cache_entry else: cache_entry = pseudo_cache[search_hash] indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]] dbsearch = cache_entry["dbsearch"] logger.debug("Found search in cache") logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.") while len(cache_entry["results"]) < external_offset + limit and len(indexers_to_call) > 0: logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"]))) logger.debug("%d indexers still have results" % len(indexers_to_call)) search_request.offset = cache_entry["offset"] logger.debug("Searching indexers with offset %d" % search_request.offset) result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call}) logger.debug("All search calls to indexers completed") search_results = [] indexers_to_call = [] for indexer, queries_execution_result in result["results"].items(): #Drastically improves db access time but means that if one database write fails all fail. That's a risk we need to take with db.atomic(): logger.debug("%s returned %d results. Writing them to database..." % (indexer, len(queries_execution_result.results))) for result in queries_execution_result.results: if result.title is None or result.link is None or result.indexerguid is None: logger.info("Skipping result with missing data: %s" % result) continue searchResult, _ = SearchResult().create_or_get(indexer=indexer.indexer, guid=result.indexerguid, title= result.title, link= result.link, details= result.details_link) result.searchResultId = searchResult.id search_results.append(result) logger.debug("Written results results to database") cache_entry["indexer_infos"][indexer].update( {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known, "indexer_search": queries_execution_result.indexerSearchEntry}) if queries_execution_result.has_more: indexers_to_call.append(indexer) logger.debug("%s still has more results so we could use it the next round" % indexer) if queries_execution_result.total_known: if not cache_entry["indexer_infos"][indexer]["total_included"]: cache_entry["total"] += queries_execution_result.total logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total)) cache_entry["indexer_infos"][indexer]["total_included"] = True elif queries_execution_result.has_more: logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer) cache_entry["total"] += 100 if search_request.internal or config.settings.searching.removeDuplicatesExternal: logger.debug("Searching for duplicates") countBefore = len(search_results) grouped_by_sameness = find_duplicates(search_results) allresults = [] for group in grouped_by_sameness: if search_request.internal: for i in group: # We give each group of results a unique value by which they can be identified later i.hash = hash(group[0].details_link) allresults.append(i) else: # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen group = sorted(group, key=lambda x: x.epoch, reverse=True) group = sorted(group, key=lambda x: x.indexerscore, reverse=True) allresults.append(group[0]) search_results = allresults if not search_request.internal: countAfter = len(search_results) countRemoved = countBefore - countAfter logger.info("Removed %d duplicates from %d results" % (countRemoved, countBefore)) search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True) cache_entry["results"].extend(search_results) cache_entry["offset"] += limit if search_request.internal: logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:]) else: logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)]) cache_entry["last_access"] = arrow.utcnow() logger.info("Returning %d results" % len(nzb_search_results)) return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset}