def get_search_requests(page=0, limit=100, type=None): query = Search().select(Search.time, Search.internal, Search.query, Search.identifier_key, Search.identifier_value, Search.category, Search.season, Search.episode, Search.type) if type is not None and type != "All": query = query.where(Search.internal) if type == "Internal" else query.where(~Search.internal) total_requests = query.count() requests = list(query.order_by(Search.time.desc()).paginate(page, limit).dicts()) search_requests = {"totalRequests": total_requests, "searchRequests": requests} return search_requests
def get_search_requests(page=0, limit=100, type=None): query = Search().select(Search.time, Search.internal, Search.query, Search.identifier_key, Search.identifier_value, Search.category, Search.season, Search.episode, Search.type, Search.username, TvIdCache.title.alias("tvtitle"), MovieIdCache.title.alias("movietitle")).join(TvIdCache, JOIN.LEFT_OUTER, on=( ((Search.identifier_value == TvIdCache.tvdb) & (Search.identifier_key == "tvdbid")) | ((Search.identifier_value == TvIdCache.tvrage) & (Search.identifier_key == "rid")) )).join(MovieIdCache, JOIN.LEFT_OUTER, on=( ((Search.identifier_value == MovieIdCache.imdb) & (Search.identifier_key == "imdbid")) | ((Search.identifier_value == MovieIdCache.tmdb) & (Search.identifier_key == "tmdbid")))) if type is not None and type != "All": query = query.where(Search.internal) if type == "Internal" else query.where(~Search.internal) total_requests = query.count() requests = list(query.order_by(Search.time.desc()).paginate(page, limit).dicts()) search_requests = {"totalRequests": total_requests, "searchRequests": requests} return search_requests
def getTimeBasedSearchStats(after, before): searches = Search().select(Search.time).where((Search.time > after) & (Search.time < before)) searchTimes = [arrow.get(x.time).to(tz.tzlocal()) for x in searches] perDayOfWeek, perHourOfDay = calculcateTimeBasedStats(searchTimes) return {"perDayOfWeek": perDayOfWeek, "perHourOfDay": perHourOfDay}
def getTimeBasedSearchStats(): searches = Search().select(Search.time) searchTimes = [arrow.get(x.time) for x in searches] perDayOfWeek, perHourOfDay = calculcateTimeBasedStats(searchTimes) return {"perDayOfWeek": perDayOfWeek, "perHourOfDay": perHourOfDay}
def get_search_requests(page=0, limit=100, sortModel=None, filterModel=None, distinct=False, onlyUser=None): columnNameToEntityMap = { "time": Search.time, "query": Search.query, "category": Search.category, "access": Search.internal, "username": Search.username } columns = [Search.time, Search.internal, Search.query, Search.identifier_key, Search.identifier_value, Search.category, Search.season, Search.episode, Search.type, Search.username, Search.title, Search.author, TvIdCache.title.alias("tvtitle"), MovieIdCache.title.alias("movietitle")] query = Search().select(*columns) query = query.join(TvIdCache, JOIN.LEFT_OUTER, on=( ((Search.identifier_value == TvIdCache.tvdb) & (Search.identifier_key == "tvdbid")) | ((Search.identifier_value == TvIdCache.tvrage) & (Search.identifier_key == "rid")) )).join(MovieIdCache, JOIN.LEFT_OUTER, on=( ((Search.identifier_value == MovieIdCache.imdb) & (Search.identifier_key == "imdbid")) | ((Search.identifier_value == MovieIdCache.tmdb) & (Search.identifier_key == "tmdbid")))) query = extendQueryWithFilter(columnNameToEntityMap, filterModel, query) query = extendQueryWithSorting(columnNameToEntityMap, query, sortModel, Search.time.desc()) if onlyUser is not None and onlyUser: query = query.where(Search.username == onlyUser) if distinct: query = query.group_by(Search.internal, Search.query, Search.identifier_key, Search.identifier_value, Search.category, Search.season, Search.episode, Search.type, Search.username, Search.title, Search.author) total_requests = query.count() requests = list(query.paginate(page, limit).dicts()) search_requests = {"totalRequests": total_requests, "searchRequests": requests} return search_requests
def get_search_requests(page=0, limit=100, type=None): query = Search().select( Search.time, Search.internal, Search.query, Search.identifier_key, Search.identifier_value, Search.category, Search.season, Search.episode, Search.type, Search.username, Search.title, Search.author, TvIdCache.title.alias("tvtitle"), MovieIdCache.title.alias("movietitle")).join( TvIdCache, JOIN.LEFT_OUTER, on=(((Search.identifier_value == TvIdCache.tvdb) & (Search.identifier_key == "tvdbid")) | ((Search.identifier_value == TvIdCache.tvrage) & (Search.identifier_key == "rid")))).join( MovieIdCache, JOIN.LEFT_OUTER, on=(((Search.identifier_value == MovieIdCache.imdb) & (Search.identifier_key == "imdbid")) | ((Search.identifier_value == MovieIdCache.tmdb) & (Search.identifier_key == "tmdbid")))) if type is not None and type != "All": query = query.where( Search.internal) if type == "Internal" else query.where( ~Search.internal) total_requests = query.count() requests = list( query.order_by(Search.time.desc()).paginate(page, limit).dicts()) search_requests = { "totalRequests": total_requests, "searchRequests": requests } return search_requests
def testIndexersApiLimits(self): config.settings.searching.generate_queries = [] self.newznab1.hitLimit = 3 self.newznab1.hitLimitResetTime = None config.settings.indexers = [self.newznab1] read_indexers_from_config() search_request = SearchRequest() indexers = search.pick_indexers(search_request) self.assertEqual(1, len(indexers)) dbsearch = Search(internal=True, time=arrow.utcnow().datetime) dbsearch.save() indexer = Indexer().get(name="newznab1") #Two accesses one and 12 hours ago IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-1).datetime, type="search", url="", response_successful=True).save() IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-12).datetime, type="search", url="", response_successful=True).save() self.assertEqual(1, len(search.pick_indexers(search_request))) #Another one 20 hours ago, so limit should be reached IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-20).datetime, type="search", url="", response_successful=True).save() self.assertEqual(0, len(search.pick_indexers(search_request)))
def get_search_requests(page=0, limit=100, type=None): query = Search().select(Search.time, Search.internal, Search.query, Search.identifier_key, Search.identifier_value, Search.category, Search.season, Search.episode, Search.type, Search.username) if type is not None and type != "All": query = query.where( Search.internal) if type == "Internal" else query.where( ~Search.internal) total_requests = query.count() requests = list( query.order_by(Search.time.desc()).paginate(page, limit).dicts()) search_requests = { "totalRequests": total_requests, "searchRequests": requests } return search_requests
def search(internal, search_request): for k in list(pseudo_cache.keys()): if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow(): pseudo_cache.pop(k) limit = search_request.limit # todo use actual configured limit external_offset = int(search_request.offset) search_hash = search_request.search_hash if search_hash not in pseudo_cache.keys() or search_request.offset == 0: #If it's a new search (which starts with offset 0) do it again instead of using the cached results logger.debug("Didn't find this query in cache or want to do a new search") cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0} indexers_to_call, with_query_generation = pick_indexers(query_supplied=True if search_request.query is not None and search_request.query != "" else False, identifier_key=search_request.identifier_key, internal=internal, selected_indexers=search_request.indexers) for p in indexers_to_call: cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False} dbsearch = Search(internal=internal, query=search_request.query, category=search_request.category, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type) dbsearch.save() cache_entry["dbsearch"] = dbsearch if with_query_generation and search_request.identifier_key and search_request.title is None: search_request.title = infos.title_from_id(search_request.identifier_key, search_request.identifier_value) pseudo_cache[search_hash] = cache_entry else: cache_entry = pseudo_cache[search_hash] indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]] dbsearch = cache_entry["dbsearch"] logger.debug("Found search in cache") logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.") while len(cache_entry["results"]) < external_offset + limit and len(indexers_to_call) > 0: logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"]))) logger.debug("%d indexers still have results" % len(indexers_to_call)) search_request.offset = cache_entry["offset"] logger.debug("Searching indexers with offset %d" % search_request.offset) result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call}) search_results = [] indexers_to_call = [] for indexer, queries_execution_result in result["results"].items(): search_results.extend(queries_execution_result.results) logger.debug("%s returned %d results" % (indexer, len(queries_execution_result.results))) cache_entry["indexer_infos"][indexer].update({"search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known, "indexer_search": queries_execution_result.dbentry}) if queries_execution_result.has_more: indexers_to_call.append(indexer) logger.debug("%s still has more results so we could use it the next round" % indexer) if queries_execution_result.total_known: if not cache_entry["indexer_infos"][indexer]["total_included"]: cache_entry["total"] += queries_execution_result.total logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total)) cache_entry["indexer_infos"][indexer]["total_included"] = True elif queries_execution_result.has_more: logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer) cache_entry["total"] += 100 search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True) cache_entry["results"].extend(search_results) cache_entry["offset"] += limit #todo: perhaps move duplicate handling here. WOuld allow to recognize duplicates that were added, for example 100 were already loaded and then we get 101-200 und 100 and 101 are duplicates #todo: then make configurable if we want to delete duplicates for api, internal, both, none. would also mean that we return 100 actually different results, otherwise in the worst case we could for example return 50 originals and 50 duplicates if internal: logger.debug("We have %d cached results and them all because we search internally" % len(cache_entry["results"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:]) else: logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)]) cache_entry["last_access"] = arrow.utcnow() return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearch": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset}
def search(search_request): logger.info("Starting new search: %s" % search_request) if search_request.maxage is None and config.settings.searching.maxAge: search_request.maxage = config.settings.searching.maxAge logger.info("Will ignore results older than %d days" % search_request.maxage) # Clean up cache for k in list(pseudo_cache.keys()): if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow(): pseudo_cache.pop(k) # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called keepFor = config.settings.main.keepSearchResultsForDays oldSearchResultsCount = countOldSearchResults(keepFor) if oldSearchResultsCount > 0: logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor)) SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute() else: if logger.getEffectiveLevel() == logging.DEBUG: logger.debug("%d search results stored in database" % SearchResult.select().count()) limit = search_request.limit external_offset = int(search_request.offset) search_hash = search_request.search_hash categoryResult = categories.getCategoryByAnyInput(search_request.category) search_request.category = categoryResult if search_hash not in pseudo_cache.keys() or search_request.offset == 0: # If it's a new search (which starts with offset 0) do it again instead of using the cached results logger.debug("Didn't find this query in cache or want to do a new search") cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0, "rejected": SearchModule.getRejectedCountDict()} category = categoryResult.category indexers_to_call = pick_indexers(search_request) for p in indexers_to_call: cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False} dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type, title=search_request.title, author=search_request.author, username=search_request.username) saveSearch(dbsearch) # dbsearch.save() cache_entry["dbsearch"] = dbsearch # Find ignored words and parse query for ignored words search_request.forbiddenWords = [] search_request.requiredWords = [] applyRestrictionsGlobal = config.settings.searching.applyRestrictions == "both" or (config.settings.searching.applyRestrictions == "internal" and search_request.internal) or (config.settings.searching.applyRestrictions == "external" and not search_request.internal) applyRestrictionsCategory = category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (search_request.category.category.applyRestrictions == "external" and not search_request.internal) if config.settings.searching.forbiddenWords and applyRestrictionsGlobal: logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords) search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))]) if config.settings.searching.requiredWords and applyRestrictionsGlobal: logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords) search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))]) if category.forbiddenWords and applyRestrictionsCategory: logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords)) search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))]) if category.requiredWords and applyRestrictionsCategory: logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords)) search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))]) if search_request.query: forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)] if len(forbiddenWords) > 0: logger.debug("Query before removing NOT terms: %s" % search_request.query) search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query) logger.debug("Query after removing NOT terms: %s" % search_request.query) logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords)) search_request.forbiddenWords.extend(forbiddenWords) cache_entry["forbiddenWords"] = search_request.forbiddenWords cache_entry["requiredWords"] = search_request.requiredWords cache_entry["query"] = search_request.query pseudo_cache[search_hash] = cache_entry else: cache_entry = pseudo_cache[search_hash] indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]] dbsearch = cache_entry["dbsearch"] search_request.forbiddenWords = cache_entry["forbiddenWords"] search_request.requiredWords = cache_entry["requiredWords"] search_request.query = cache_entry["query"] logger.debug("Found search in cache") logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.") if search_request.loadAll: logger.debug("Requested to load all results. Will continue to search until all indexers are exhausted") while (len(cache_entry["results"]) < external_offset + limit or search_request.loadAll) and len(indexers_to_call) > 0: if len(cache_entry["results"]) < external_offset + limit: logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"]))) elif search_request.loadAll: logger.debug("All results requested. Continuing to search.") logger.debug("%d indexers still have results" % len(indexers_to_call)) search_request.offset = cache_entry["offset"] logger.debug("Searching indexers with offset %d" % search_request.offset) result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call}) logger.debug("All search calls to indexers completed") search_results = [] indexers_to_call = [] waslocked = False before = arrow.now() if databaseLock.locked(): logger.debug("Database accesses locked by other search. Will wait for our turn.") waslocked = True databaseLock.acquire() if waslocked: after = arrow.now() took = (after - before).seconds * 1000 + (after - before).microseconds / 1000 logger.debug("Waited %dms for database lock" % took) for indexer, queries_execution_result in result["results"].items(): with db.atomic(): logger.info("%s returned %d results" % (indexer, len(queries_execution_result.results))) for result in queries_execution_result.results: if result.title is None or result.link is None or result.indexerguid is None: logger.info("Skipping result with missing data: %s" % result) continue try: searchResultId = hashlib.sha1(str(indexer.indexer.id) + result.indexerguid).hexdigest() tryGetOrCreateSearchResultDbEntry(searchResultId, indexer.indexer.id, result) result.searchResultId = searchResultId search_results.append(result) except (IntegrityError, OperationalError) as e: logger.error("Error while trying to save search result to database. Skipping it. Error: %s" % e) cache_entry["indexer_infos"][indexer].update( {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known, "indexer_search": queries_execution_result.indexerSearchEntry, "rejected": queries_execution_result.rejected, "processed_results": queries_execution_result.loaded_results}) if queries_execution_result.has_more: indexers_to_call.append(indexer) logger.debug("%s still has more results so we could use it the next round" % indexer) if queries_execution_result.total_known: if not cache_entry["indexer_infos"][indexer]["total_included"]: cache_entry["total"] += queries_execution_result.total logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total)) cache_entry["indexer_infos"][indexer]["total_included"] = True elif queries_execution_result.has_more: logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer) cache_entry["total"] += 100 for rejectKey in cache_entry["rejected"].keys(): if rejectKey in cache_entry["indexer_infos"][indexer]["rejected"].keys(): cache_entry["rejected"][rejectKey] += cache_entry["indexer_infos"][indexer]["rejected"][rejectKey] databaseLock.release() logger.debug("Searching for duplicates") numberResultsBeforeDuplicateRemoval = len(search_results) grouped_by_sameness, uniqueResultsPerIndexer = find_duplicates(search_results) allresults = [] for group in grouped_by_sameness: if search_request.internal: for i in group: # We give each group of results a unique value by which they can be identified later i.hash = hash(group[0].details_link) allresults.append(i) else: # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen group = sorted(group, key=lambda x: x.epoch, reverse=True) group = sorted(group, key=lambda x: x.indexerscore, reverse=True) allresults.append(group[0]) search_results = allresults with databaseLock: for indexer, infos in cache_entry["indexer_infos"].iteritems(): if indexer.name in uniqueResultsPerIndexer.keys(): # If the search failed it isn't contained in the duplicates list uniqueResultsCount = uniqueResultsPerIndexer[infos["indexer"]] processedResults = infos["processed_results"] logger.debug("Indexer %s had a unique results share of %d%% (%d of %d total results were only provided by this indexer)" % (indexer.name, 100 / (numberResultsBeforeDuplicateRemoval / uniqueResultsCount), uniqueResultsCount, numberResultsBeforeDuplicateRemoval)) infos["indexer_search"].uniqueResults = uniqueResultsCount infos["indexer_search"].processedResults = processedResults infos["indexer_search"].save() if not search_request.internal: countAfter = len(search_results) countRemoved = numberResultsBeforeDuplicateRemoval - countAfter logger.info("Removed %d duplicates from %d results" % (countRemoved, numberResultsBeforeDuplicateRemoval)) search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True) cache_entry["results"].extend(search_results) cache_entry["offset"] += limit if len(indexers_to_call) == 0: logger.info("All indexers exhausted") elif len(cache_entry["results"]) >= external_offset + limit: logger.debug("Loaded a total of %d results which is enough for the %d requested. Stopping search." % (len(cache_entry["results"]), (external_offset + limit))) if search_request.internal: logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:]) else: logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)]) cache_entry["last_access"] = arrow.utcnow() for k, v in cache_entry["rejected"].items(): if v > 0: logger.info("Rejected %d because: %s" % (v, k)) logger.info("Returning %d results" % len(nzb_search_results)) return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset, "rejected": cache_entry["rejected"].items()}
def search(internal, search_request): for k in list(pseudo_cache.keys()): if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow(): pseudo_cache.pop(k) limit = search_request.limit external_offset = int(search_request.offset) search_hash = search_request.search_hash if search_hash not in pseudo_cache.keys( ) or search_request.offset == 0: # If it's a new search (which starts with offset 0) do it again instead of using the cached results logger.debug( "Didn't find this query in cache or want to do a new search") cache_entry = { "results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0 } indexers_to_call, with_query_generation = pick_indexers( query_supplied=True if search_request.query is not None and search_request.query != "" else False, identifier_key=search_request.identifier_key, internal=internal, selected_indexers=search_request.indexers) for p in indexers_to_call: cache_entry["indexer_infos"][p] = { "has_more": True, "search_request": search_request, "total_included": False } dbsearch = Search(internal=internal, query=search_request.query, category=search_request.category, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type, username=request.authorization.username if request.authorization is not None else None) #dbsearch.save() cache_entry["dbsearch"] = dbsearch if with_query_generation and search_request.identifier_key and search_request.title is None: try: search_request.title = infos.title_from_id( search_request.identifier_key, search_request.identifier_value) except: pass pseudo_cache[search_hash] = cache_entry else: cache_entry = pseudo_cache[search_hash] indexers_to_call = [ indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"] ] dbsearch = cache_entry["dbsearch"] logger.debug("Found search in cache") logger.debug( "Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results." ) while len(cache_entry["results"]) < external_offset + limit and len( indexers_to_call) > 0: logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"]))) logger.debug("%d indexers still have results" % len(indexers_to_call)) search_request.offset = cache_entry["offset"] logger.debug("Searching indexers with offset %d" % search_request.offset) result = search_and_handle_db( dbsearch, {x: search_request for x in indexers_to_call}) search_results = [] indexers_to_call = [] for indexer, queries_execution_result in result["results"].items(): search_results.extend(queries_execution_result.results) logger.debug("%s returned %d results" % (indexer, len(queries_execution_result.results))) cache_entry["indexer_infos"][indexer].update({ "did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known, "indexer_search": queries_execution_result.indexerSearchEntry }) if queries_execution_result.has_more: indexers_to_call.append(indexer) logger.debug( "%s still has more results so we could use it the next round" % indexer) if queries_execution_result.total_known: if not cache_entry["indexer_infos"][indexer]["total_included"]: cache_entry["total"] += queries_execution_result.total logger.debug( "%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total)) cache_entry["indexer_infos"][indexer][ "total_included"] = True elif queries_execution_result.has_more: logger.debug( "%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer) cache_entry["total"] += 100 if internal or config.settings.searching.removeDuplicatesExternal: countBefore = len(search_results) grouped_by_sameness = find_duplicates(search_results) allresults = [] for group in grouped_by_sameness: if internal: for i in group: # We give each group of results a unique value by which they can be identified later i.hash = hash(group[0].guid) allresults.append(i) else: # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen group = sorted(group, key=lambda x: x.epoch, reverse=True) group = sorted(group, key=lambda x: x.indexerscore, reverse=True) allresults.append(group[0]) search_results = allresults if not internal: countAfter = len(search_results) countRemoved = countBefore - countAfter logger.info("Removed %d duplicates from %d results" % (countRemoved, countBefore)) search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True) cache_entry["results"].extend(search_results) cache_entry["offset"] += limit if internal: logger.debug( "We have %d cached results and return them all because we search internally" % len(cache_entry["results"])) nzb_search_results = copy.deepcopy( cache_entry["results"][external_offset:]) else: logger.debug( "We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"])) nzb_search_results = copy.deepcopy( cache_entry["results"][external_offset:(external_offset + limit)]) cache_entry["last_access"] = arrow.utcnow() return { "results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset }
def search(search_request): if search_request.maxage is None and config.settings.searching.maxAge: search_request.maxage = config.settings.searching.maxAge logger.info("Will ignore results older than %d days" % search_request.maxage) # Clean up cache for k in list(pseudo_cache.keys()): if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow(): pseudo_cache.pop(k) # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called keepFor = config.settings.main.keepSearchResultsForDays oldSearchResultsCount = SearchResult.select().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).count() if oldSearchResultsCount > 0: logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor)) SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute() else: if logger.getEffectiveLevel() == logging.DEBUG: logger.debug("%d search results stored in database" % SearchResult.select().count()) limit = search_request.limit external_offset = int(search_request.offset) search_hash = search_request.search_hash categoryResult = categories.getCategoryByAnyInput(search_request.category) search_request.category = categoryResult if search_hash not in pseudo_cache.keys() or search_request.offset == 0: # If it's a new search (which starts with offset 0) do it again instead of using the cached results logger.debug("Didn't find this query in cache or want to do a new search") cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0} category = categoryResult.category indexers_to_call = pick_indexers(search_request) for p in indexers_to_call: cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False} dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type, username=search_request.username) # dbsearch.save() cache_entry["dbsearch"] = dbsearch # Find ignored words and parse query for ignored words search_request.forbiddenWords = [] search_request.requiredWords = [] applyRestrictionsGlobal = config.settings.searching.applyRestrictions == "both" or (config.settings.searching.applyRestrictions == "internal" and search_request.internal) or (config.settings.searching.applyRestrictions == "external" and not search_request.internal) applyRestrictionsCategory = category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (search_request.category.category.applyRestrictions == "external" and not search_request.internal) if config.settings.searching.forbiddenWords and applyRestrictionsGlobal: logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords) search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))]) if config.settings.searching.requiredWords and applyRestrictionsGlobal: logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords) search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))]) if category.forbiddenWords and applyRestrictionsCategory: logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords)) search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))]) if category.requiredWords and applyRestrictionsCategory: logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords)) search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))]) if search_request.query: forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)] if len(forbiddenWords) > 0: logger.debug("Query before removing NOT terms: %s" % search_request.query) search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query) logger.debug("Query after removing NOT terms: %s" % search_request.query) logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords)) search_request.forbiddenWords.extend(forbiddenWords) pseudo_cache[search_hash] = cache_entry else: cache_entry = pseudo_cache[search_hash] indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]] dbsearch = cache_entry["dbsearch"] logger.debug("Found search in cache") logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.") while len(cache_entry["results"]) < external_offset + limit and len(indexers_to_call) > 0: logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"]))) logger.debug("%d indexers still have results" % len(indexers_to_call)) search_request.offset = cache_entry["offset"] logger.debug("Searching indexers with offset %d" % search_request.offset) result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call}) logger.debug("All search calls to indexers completed") search_results = [] indexers_to_call = [] for indexer, queries_execution_result in result["results"].items(): #Drastically improves db access time but means that if one database write fails all fail. That's a risk we need to take with db.atomic(): logger.debug("%s returned %d results. Writing them to database..." % (indexer, len(queries_execution_result.results))) for result in queries_execution_result.results: if result.title is None or result.link is None or result.indexerguid is None: logger.info("Skipping result with missing data: %s" % result) continue searchResult, _ = SearchResult().create_or_get(indexer=indexer.indexer, guid=result.indexerguid, title= result.title, link= result.link, details= result.details_link) result.searchResultId = searchResult.id search_results.append(result) logger.debug("Written results results to database") cache_entry["indexer_infos"][indexer].update( {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known, "indexer_search": queries_execution_result.indexerSearchEntry}) if queries_execution_result.has_more: indexers_to_call.append(indexer) logger.debug("%s still has more results so we could use it the next round" % indexer) if queries_execution_result.total_known: if not cache_entry["indexer_infos"][indexer]["total_included"]: cache_entry["total"] += queries_execution_result.total logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total)) cache_entry["indexer_infos"][indexer]["total_included"] = True elif queries_execution_result.has_more: logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer) cache_entry["total"] += 100 if search_request.internal or config.settings.searching.removeDuplicatesExternal: logger.debug("Searching for duplicates") countBefore = len(search_results) grouped_by_sameness = find_duplicates(search_results) allresults = [] for group in grouped_by_sameness: if search_request.internal: for i in group: # We give each group of results a unique value by which they can be identified later i.hash = hash(group[0].details_link) allresults.append(i) else: # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen group = sorted(group, key=lambda x: x.epoch, reverse=True) group = sorted(group, key=lambda x: x.indexerscore, reverse=True) allresults.append(group[0]) search_results = allresults if not search_request.internal: countAfter = len(search_results) countRemoved = countBefore - countAfter logger.info("Removed %d duplicates from %d results" % (countRemoved, countBefore)) search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True) cache_entry["results"].extend(search_results) cache_entry["offset"] += limit if search_request.internal: logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:]) else: logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)]) cache_entry["last_access"] = arrow.utcnow() logger.info("Returning %d results" % len(nzb_search_results)) return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset}
def testThatDatabaseValuesAreStored(self): with self.app.test_request_context('/'): with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: newznabItems = [ [mockbuilder.buildNewznabItem(title="title1", pubdate=arrow.get(1000).format("ddd, DD MMM YYYY HH:mm:ss Z"), size=1000, indexer_name="newznab1")], [mockbuilder.buildNewznabItem(title="title2", pubdate=arrow.get(1000).format("ddd, DD MMM YYYY HH:mm:ss Z"), size=1000, indexer_name="newznab2")] ] self.prepareSearchMocks(rsps, indexerCount=len(newznabItems), newznabItems=newznabItems) # Make the second access unsuccessful rsps._urls.pop(1) rsps.add(responses.GET, r".*", body="an error message", status=500, content_type='application/x-html') searchRequest = SearchRequest(type="search", query="aquery", category="acategory", identifier_key="imdbid", identifier_value="animdbid", season=1, episode=2, indexers="newznab1|newznab2") result = search.search(searchRequest) results = result["results"] self.assertEqual(1, len(results)) dbSearch = Search().get() self.assertEqual(True, dbSearch.internal) self.assertEqual("aquery", dbSearch.query) self.assertEqual("All", dbSearch.category) self.assertEqual("imdbid", dbSearch.identifier_key) self.assertEqual("animdbid", dbSearch.identifier_value) self.assertEqual("1", dbSearch.season) self.assertEqual("2", dbSearch.episode) self.assertEqual("search", dbSearch.type) self.assertEqual(18, dbSearch.time.hour) indexerSearch1 = IndexerSearch.get(IndexerSearch.indexer == Indexer.get(Indexer.name == "newznab1")) self.assertEqual(indexerSearch1.search, dbSearch) self.assertEqual(18, indexerSearch1.time.hour) indexerSearch2 = IndexerSearch.get(IndexerSearch.indexer == Indexer.get(Indexer.name == "newznab2")) self.assertEqual(indexerSearch2.search, dbSearch) self.assertEqual(18, indexerSearch2.time.hour) calledUrls = sorted([x.request.url for x in rsps.calls]) indexerApiAccess1 = IndexerApiAccess.get(IndexerApiAccess.indexer == Indexer.get(Indexer.name == "newznab1")) self.assertEqual(indexerSearch1, indexerApiAccess1.indexer_search) self.assertEqual(18, indexerApiAccess1.time.hour) self.assertEqual("search", indexerApiAccess1.type) self.assertEqual(calledUrls[0], indexerApiAccess1.url) self.assertTrue(indexerApiAccess1.response_successful) self.assertEqual(0, indexerApiAccess1.response_time) self.assertIsNone(indexerApiAccess1.error) indexerApiAccess2 = IndexerApiAccess.get(IndexerApiAccess.indexer == Indexer.get(Indexer.name == "newznab2")) self.assertEqual(indexerSearch2, indexerApiAccess2.indexer_search) self.assertEqual(18, indexerApiAccess2.time.hour) self.assertEqual("search", indexerApiAccess2.type) self.assertEqual(calledUrls[1], indexerApiAccess2.url) self.assertFalse(indexerApiAccess2.response_successful) self.assertIsNone(indexerApiAccess2.response_time) self.assertTrue("Connection refused" in indexerApiAccess2.error) indexerStatus2 = IndexerStatus.get(IndexerStatus.indexer == Indexer.get(Indexer.name == "newznab2")) self.assertEqual(1, indexerStatus2.level) self.assertTrue("Connection refused" in indexerStatus2.reason)