コード例 #1
0
ファイル: stats.py プロジェクト: gh0std4ncer/nzbhydra
def get_search_requests(page=0, limit=100, type=None):
    query = Search().select(Search.time, Search.internal, Search.query, Search.identifier_key, Search.identifier_value, Search.category, Search.season, Search.episode, Search.type)
    if type is not None and type != "All":
        query = query.where(Search.internal) if type == "Internal" else query.where(~Search.internal)
    total_requests = query.count()
    requests = list(query.order_by(Search.time.desc()).paginate(page, limit).dicts())

    search_requests = {"totalRequests": total_requests, "searchRequests": requests}
    return search_requests
コード例 #2
0
ファイル: stats.py プロジェクト: jedvalley123/nzbhydra
def get_search_requests(page=0, limit=100, type=None):
    query = Search().select(Search.time, Search.internal, Search.query, Search.identifier_key, Search.identifier_value, Search.category, Search.season, Search.episode, Search.type, Search.username, TvIdCache.title.alias("tvtitle"), MovieIdCache.title.alias("movietitle")).join(TvIdCache, JOIN.LEFT_OUTER, on=(
        ((Search.identifier_value == TvIdCache.tvdb) & (Search.identifier_key == "tvdbid")) | 
        ((Search.identifier_value == TvIdCache.tvrage) & (Search.identifier_key == "rid"))
    )).join(MovieIdCache, JOIN.LEFT_OUTER, on=(
        ((Search.identifier_value == MovieIdCache.imdb) & (Search.identifier_key == "imdbid")) |
        ((Search.identifier_value == MovieIdCache.tmdb) & (Search.identifier_key == "tmdbid"))))

    if type is not None and type != "All":
        query = query.where(Search.internal) if type == "Internal" else query.where(~Search.internal)
    total_requests = query.count()
    requests = list(query.order_by(Search.time.desc()).paginate(page, limit).dicts())

    search_requests = {"totalRequests": total_requests, "searchRequests": requests}
    return search_requests
コード例 #3
0
ファイル: stats.py プロジェクト: theotherp/nzbhydra
def getTimeBasedSearchStats(after, before):
    searches = Search().select(Search.time).where((Search.time > after) & (Search.time < before))
    searchTimes = [arrow.get(x.time).to(tz.tzlocal()) for x in searches]

    perDayOfWeek, perHourOfDay = calculcateTimeBasedStats(searchTimes)

    return {"perDayOfWeek": perDayOfWeek, "perHourOfDay": perHourOfDay}
コード例 #4
0
ファイル: stats.py プロジェクト: petrichor8/nzbhydra
def getTimeBasedSearchStats():
    searches = Search().select(Search.time)
    searchTimes = [arrow.get(x.time) for x in searches]

    perDayOfWeek, perHourOfDay = calculcateTimeBasedStats(searchTimes)

    return {"perDayOfWeek": perDayOfWeek, "perHourOfDay": perHourOfDay}
コード例 #5
0
ファイル: stats.py プロジェクト: theotherp/nzbhydra
def get_search_requests(page=0, limit=100, sortModel=None, filterModel=None, distinct=False, onlyUser=None):
    columnNameToEntityMap = {
        "time": Search.time,
        "query": Search.query,
        "category": Search.category,
        "access": Search.internal,
        "username": Search.username
    }
    columns = [Search.time, Search.internal, Search.query, Search.identifier_key, Search.identifier_value, Search.category, Search.season, Search.episode, Search.type, Search.username, Search.title, Search.author, TvIdCache.title.alias("tvtitle"), MovieIdCache.title.alias("movietitle")]

    query = Search().select(*columns)
    query = query.join(TvIdCache, JOIN.LEFT_OUTER, on=(
        ((Search.identifier_value == TvIdCache.tvdb) & (Search.identifier_key == "tvdbid")) |
        ((Search.identifier_value == TvIdCache.tvrage) & (Search.identifier_key == "rid"))
    )).join(MovieIdCache, JOIN.LEFT_OUTER, on=(
        ((Search.identifier_value == MovieIdCache.imdb) & (Search.identifier_key == "imdbid")) |
        ((Search.identifier_value == MovieIdCache.tmdb) & (Search.identifier_key == "tmdbid"))))

    query = extendQueryWithFilter(columnNameToEntityMap, filterModel, query)
    query = extendQueryWithSorting(columnNameToEntityMap, query, sortModel, Search.time.desc())

    if onlyUser is not None and onlyUser:
        query = query.where(Search.username == onlyUser)
    if distinct:
        query = query.group_by(Search.internal, Search.query, Search.identifier_key, Search.identifier_value, Search.category, Search.season, Search.episode, Search.type, Search.username, Search.title, Search.author)
    total_requests = query.count()
    requests = list(query.paginate(page, limit).dicts())

    search_requests = {"totalRequests": total_requests, "searchRequests": requests}
    return search_requests
コード例 #6
0
def get_search_requests(page=0, limit=100, type=None):
    query = Search().select(
        Search.time, Search.internal, Search.query, Search.identifier_key,
        Search.identifier_value, Search.category, Search.season,
        Search.episode, Search.type, Search.username, Search.title,
        Search.author, TvIdCache.title.alias("tvtitle"),
        MovieIdCache.title.alias("movietitle")).join(
            TvIdCache,
            JOIN.LEFT_OUTER,
            on=(((Search.identifier_value == TvIdCache.tvdb) &
                 (Search.identifier_key == "tvdbid")) |
                ((Search.identifier_value == TvIdCache.tvrage) &
                 (Search.identifier_key == "rid")))).join(
                     MovieIdCache,
                     JOIN.LEFT_OUTER,
                     on=(((Search.identifier_value == MovieIdCache.imdb) &
                          (Search.identifier_key == "imdbid")) |
                         ((Search.identifier_value == MovieIdCache.tmdb) &
                          (Search.identifier_key == "tmdbid"))))

    if type is not None and type != "All":
        query = query.where(
            Search.internal) if type == "Internal" else query.where(
                ~Search.internal)
    total_requests = query.count()
    requests = list(
        query.order_by(Search.time.desc()).paginate(page, limit).dicts())

    search_requests = {
        "totalRequests": total_requests,
        "searchRequests": requests
    }
    return search_requests
コード例 #7
0
ファイル: test_Search.py プロジェクト: petrichor8/nzbhydra
    def testIndexersApiLimits(self):

        config.settings.searching.generate_queries = []
        self.newznab1.hitLimit = 3
        self.newznab1.hitLimitResetTime = None
        config.settings.indexers = [self.newznab1]
        read_indexers_from_config()
        search_request = SearchRequest()
        indexers = search.pick_indexers(search_request)
        self.assertEqual(1, len(indexers))
        dbsearch = Search(internal=True, time=arrow.utcnow().datetime)
        dbsearch.save()
        indexer = Indexer().get(name="newznab1")
        
        #Two accesses one and 12 hours ago
        IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-1).datetime, type="search", url="", response_successful=True).save()
        IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-12).datetime, type="search", url="", response_successful=True).save()
        self.assertEqual(1, len(search.pick_indexers(search_request)))

        #Another one 20 hours ago, so limit should be reached
        IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-20).datetime, type="search", url="", response_successful=True).save()
        self.assertEqual(0, len(search.pick_indexers(search_request)))
コード例 #8
0
ファイル: test_Search.py プロジェクト: gspu/nzbhydra
    def testIndexersApiLimits(self):

        config.settings.searching.generate_queries = []
        self.newznab1.hitLimit = 3
        self.newznab1.hitLimitResetTime = None
        config.settings.indexers = [self.newznab1]
        read_indexers_from_config()
        search_request = SearchRequest()
        indexers = search.pick_indexers(search_request)
        self.assertEqual(1, len(indexers))
        dbsearch = Search(internal=True, time=arrow.utcnow().datetime)
        dbsearch.save()
        indexer = Indexer().get(name="newznab1")
        
        #Two accesses one and 12 hours ago
        IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-1).datetime, type="search", url="", response_successful=True).save()
        IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-12).datetime, type="search", url="", response_successful=True).save()
        self.assertEqual(1, len(search.pick_indexers(search_request)))

        #Another one 20 hours ago, so limit should be reached
        IndexerApiAccess(indexer=indexer, search=dbsearch, time=arrow.utcnow().replace(hours=-20).datetime, type="search", url="", response_successful=True).save()
        self.assertEqual(0, len(search.pick_indexers(search_request)))
コード例 #9
0
ファイル: stats.py プロジェクト: nzbis/nzbhydra
def get_search_requests(page=0, limit=100, type=None):
    query = Search().select(Search.time, Search.internal, Search.query,
                            Search.identifier_key, Search.identifier_value,
                            Search.category, Search.season, Search.episode,
                            Search.type, Search.username)
    if type is not None and type != "All":
        query = query.where(
            Search.internal) if type == "Internal" else query.where(
                ~Search.internal)
    total_requests = query.count()
    requests = list(
        query.order_by(Search.time.desc()).paginate(page, limit).dicts())

    search_requests = {
        "totalRequests": total_requests,
        "searchRequests": requests
    }
    return search_requests
コード例 #10
0
ファイル: search.py プロジェクト: gh0std4ncer/nzbhydra
def search(internal, search_request):
    for k in list(pseudo_cache.keys()):
        if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow():
            pseudo_cache.pop(k)
    limit = search_request.limit  # todo use actual configured limit
    external_offset = int(search_request.offset)
    search_hash = search_request.search_hash
    if search_hash not in pseudo_cache.keys() or search_request.offset == 0: #If it's a new search (which starts with offset 0) do it again instead of using the cached results
        logger.debug("Didn't find this query in cache or want to do a new search")
        cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0}
        indexers_to_call, with_query_generation = pick_indexers(query_supplied=True if search_request.query is not None and search_request.query != "" else False, identifier_key=search_request.identifier_key, internal=internal, selected_indexers=search_request.indexers)
        for p in indexers_to_call:
            cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False}
        dbsearch = Search(internal=internal, query=search_request.query, category=search_request.category, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type)
        dbsearch.save()
        cache_entry["dbsearch"] = dbsearch

        if with_query_generation and search_request.identifier_key and search_request.title is None:
            search_request.title = infos.title_from_id(search_request.identifier_key, search_request.identifier_value)
        pseudo_cache[search_hash] = cache_entry
    else:
        cache_entry = pseudo_cache[search_hash]
        indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]]
        dbsearch = cache_entry["dbsearch"]
        logger.debug("Found search in cache")

        logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.")
    while len(cache_entry["results"]) < external_offset + limit and len(indexers_to_call) > 0:
        logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"])))
        logger.debug("%d indexers still have results" % len(indexers_to_call))
        search_request.offset = cache_entry["offset"]
        logger.debug("Searching indexers with offset %d" % search_request.offset)
        result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call})
        search_results = []
        indexers_to_call = []
        for indexer, queries_execution_result in result["results"].items():
            search_results.extend(queries_execution_result.results)
            logger.debug("%s returned %d results" % (indexer, len(queries_execution_result.results)))
            cache_entry["indexer_infos"][indexer].update({"search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known, "indexer_search": queries_execution_result.dbentry})
            if queries_execution_result.has_more:
                indexers_to_call.append(indexer)
                logger.debug("%s still has more results so we could use it the next round" % indexer)

            if queries_execution_result.total_known:
                if not cache_entry["indexer_infos"][indexer]["total_included"]:
                    cache_entry["total"] += queries_execution_result.total
                    logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total))
                    cache_entry["indexer_infos"][indexer]["total_included"] = True
            elif queries_execution_result.has_more:
                logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer)
                cache_entry["total"] += 100

        search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True)
        cache_entry["results"].extend(search_results)
        cache_entry["offset"] += limit
        #todo: perhaps move duplicate handling here. WOuld allow to recognize duplicates that were added, for example 100 were already loaded and then we get 101-200 und 100 and 101 are duplicates
        #todo: then make configurable if we want to delete duplicates for api, internal, both, none. would also mean that we return 100 actually different results, otherwise in the worst case we could for example return 50 originals and 50 duplicates
    
    
    if internal:
        logger.debug("We have %d cached results and them all because we search internally" % len(cache_entry["results"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:])
    else:
        logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)])
    cache_entry["last_access"] = arrow.utcnow()
    
    
    return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearch": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset}
コード例 #11
0
ファイル: search.py プロジェクト: putneyj/nzbhydra
def search(search_request):
    logger.info("Starting new search: %s" % search_request)
    if search_request.maxage is None and config.settings.searching.maxAge:
        search_request.maxage = config.settings.searching.maxAge
        logger.info("Will ignore results older than %d days" % search_request.maxage)

    # Clean up cache
    for k in list(pseudo_cache.keys()):
        if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow():
            pseudo_cache.pop(k)

    # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called
    keepFor = config.settings.main.keepSearchResultsForDays
    oldSearchResultsCount = countOldSearchResults(keepFor)
    if oldSearchResultsCount > 0:
        logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor))
        SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute()
    else:
        if logger.getEffectiveLevel() == logging.DEBUG:
            logger.debug("%d search results stored in database" % SearchResult.select().count())

    limit = search_request.limit
    external_offset = int(search_request.offset)
    search_hash = search_request.search_hash
    categoryResult = categories.getCategoryByAnyInput(search_request.category)
    search_request.category = categoryResult
    if search_hash not in pseudo_cache.keys() or search_request.offset == 0:  # If it's a new search (which starts with offset 0) do it again instead of using the cached results
        logger.debug("Didn't find this query in cache or want to do a new search")
        cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0, "rejected": SearchModule.getRejectedCountDict()}
        category = categoryResult.category
        indexers_to_call = pick_indexers(search_request)
        for p in indexers_to_call:
            cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False}

        dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode,
                          type=search_request.type, title=search_request.title, author=search_request.author, username=search_request.username)
        saveSearch(dbsearch)
        # dbsearch.save()
        cache_entry["dbsearch"] = dbsearch

        # Find ignored words and parse query for ignored words
        search_request.forbiddenWords = []
        search_request.requiredWords = []
        applyRestrictionsGlobal = config.settings.searching.applyRestrictions == "both" or (config.settings.searching.applyRestrictions == "internal" and search_request.internal) or (config.settings.searching.applyRestrictions == "external" and not search_request.internal)
        applyRestrictionsCategory = category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (search_request.category.category.applyRestrictions == "external" and not search_request.internal)
        if config.settings.searching.forbiddenWords and applyRestrictionsGlobal:
            logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords)
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))])
        if config.settings.searching.requiredWords and applyRestrictionsGlobal:
            logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords)
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))])

        if category.forbiddenWords and applyRestrictionsCategory:
            logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords))
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))])
        if category.requiredWords and applyRestrictionsCategory:
            logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords))
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))])

        if search_request.query:
            forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)]
            if len(forbiddenWords) > 0:
                logger.debug("Query before removing NOT terms: %s" % search_request.query)
                search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query)
                logger.debug("Query after removing NOT terms: %s" % search_request.query)
                logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords))

                search_request.forbiddenWords.extend(forbiddenWords)
        cache_entry["forbiddenWords"] = search_request.forbiddenWords
        cache_entry["requiredWords"] = search_request.requiredWords
        cache_entry["query"] = search_request.query

        pseudo_cache[search_hash] = cache_entry
    else:
        cache_entry = pseudo_cache[search_hash]
        indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]]
        dbsearch = cache_entry["dbsearch"]
        search_request.forbiddenWords = cache_entry["forbiddenWords"]
        search_request.requiredWords = cache_entry["requiredWords"]
        search_request.query = cache_entry["query"]
        logger.debug("Found search in cache")

        logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.")
    if search_request.loadAll:
        logger.debug("Requested to load all results. Will continue to search until all indexers are exhausted")
    while (len(cache_entry["results"]) < external_offset + limit or search_request.loadAll) and len(indexers_to_call) > 0:
        if len(cache_entry["results"]) < external_offset + limit:
            logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"])))
        elif search_request.loadAll:
            logger.debug("All results requested. Continuing to search.")
        logger.debug("%d indexers still have results" % len(indexers_to_call))
        search_request.offset = cache_entry["offset"]

        logger.debug("Searching indexers with offset %d" % search_request.offset)
        result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call})
        logger.debug("All search calls to indexers completed")
        search_results = []
        indexers_to_call = []

        waslocked = False
        before = arrow.now()
        if databaseLock.locked():
            logger.debug("Database accesses locked by other search. Will wait for our turn.")
            waslocked = True
        databaseLock.acquire()
        if waslocked:
            after = arrow.now()
            took = (after - before).seconds * 1000 + (after - before).microseconds / 1000
            logger.debug("Waited %dms for database lock" % took)
        for indexer, queries_execution_result in result["results"].items():
            with db.atomic():
                logger.info("%s returned %d results" % (indexer, len(queries_execution_result.results)))
                for result in queries_execution_result.results:
                    if result.title is None or result.link is None or result.indexerguid is None:
                        logger.info("Skipping result with missing data: %s" % result)
                        continue
                    try:
                        searchResultId = hashlib.sha1(str(indexer.indexer.id) + result.indexerguid).hexdigest()
                        tryGetOrCreateSearchResultDbEntry(searchResultId, indexer.indexer.id, result)
                        result.searchResultId = searchResultId
                        search_results.append(result)
                    except (IntegrityError, OperationalError) as e:
                        logger.error("Error while trying to save search result to database. Skipping it. Error: %s" % e)

            cache_entry["indexer_infos"][indexer].update(
                {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known,
                 "indexer_search": queries_execution_result.indexerSearchEntry, "rejected": queries_execution_result.rejected, "processed_results": queries_execution_result.loaded_results})
            if queries_execution_result.has_more:
                indexers_to_call.append(indexer)
                logger.debug("%s still has more results so we could use it the next round" % indexer)

            if queries_execution_result.total_known:
                if not cache_entry["indexer_infos"][indexer]["total_included"]:
                    cache_entry["total"] += queries_execution_result.total
                    logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total))
                    cache_entry["indexer_infos"][indexer]["total_included"] = True
            elif queries_execution_result.has_more:
                logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer)
                cache_entry["total"] += 100
            for rejectKey in cache_entry["rejected"].keys():
                if rejectKey in cache_entry["indexer_infos"][indexer]["rejected"].keys():
                    cache_entry["rejected"][rejectKey] += cache_entry["indexer_infos"][indexer]["rejected"][rejectKey]

        databaseLock.release()

        logger.debug("Searching for duplicates")
        numberResultsBeforeDuplicateRemoval = len(search_results)
        grouped_by_sameness, uniqueResultsPerIndexer = find_duplicates(search_results)
        allresults = []
        for group in grouped_by_sameness:
            if search_request.internal:
                for i in group:
                    # We give each group of results a unique value by which they can be identified later
                    i.hash = hash(group[0].details_link)
                    allresults.append(i)

            else:
                # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen
                group = sorted(group, key=lambda x: x.epoch, reverse=True)
                group = sorted(group, key=lambda x: x.indexerscore, reverse=True)
                allresults.append(group[0])
        search_results = allresults

        with databaseLock:
            for indexer, infos in cache_entry["indexer_infos"].iteritems():
                if indexer.name in uniqueResultsPerIndexer.keys():  # If the search failed it isn't contained in the duplicates list
                    uniqueResultsCount = uniqueResultsPerIndexer[infos["indexer"]]
                    processedResults = infos["processed_results"]
                    logger.debug("Indexer %s had a unique results share of %d%% (%d of %d total results were only provided by this indexer)" % (indexer.name, 100 / (numberResultsBeforeDuplicateRemoval / uniqueResultsCount), uniqueResultsCount, numberResultsBeforeDuplicateRemoval))
                    infos["indexer_search"].uniqueResults = uniqueResultsCount
                    infos["indexer_search"].processedResults = processedResults
                    infos["indexer_search"].save()

        if not search_request.internal:
            countAfter = len(search_results)
            countRemoved = numberResultsBeforeDuplicateRemoval - countAfter
            logger.info("Removed %d duplicates from %d results" % (countRemoved, numberResultsBeforeDuplicateRemoval))

        search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True)

        cache_entry["results"].extend(search_results)
        cache_entry["offset"] += limit

    if len(indexers_to_call) == 0:
        logger.info("All indexers exhausted")
    elif len(cache_entry["results"]) >= external_offset + limit:
        logger.debug("Loaded a total of %d results which is enough for the %d requested. Stopping search." % (len(cache_entry["results"]), (external_offset + limit)))

    if search_request.internal:
        logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:])
    else:
        logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)])
    cache_entry["last_access"] = arrow.utcnow()
    for k, v in cache_entry["rejected"].items():
        if v > 0:
            logger.info("Rejected %d because: %s" % (v, k))
    logger.info("Returning %d results" % len(nzb_search_results))
    return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset, "rejected": cache_entry["rejected"].items()}
コード例 #12
0
def search(internal, search_request):
    for k in list(pseudo_cache.keys()):
        if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow():
            pseudo_cache.pop(k)
    limit = search_request.limit
    external_offset = int(search_request.offset)
    search_hash = search_request.search_hash
    if search_hash not in pseudo_cache.keys(
    ) or search_request.offset == 0:  # If it's a new search (which starts with offset 0) do it again instead of using the cached results
        logger.debug(
            "Didn't find this query in cache or want to do a new search")
        cache_entry = {
            "results": [],
            "indexer_infos": {},
            "total": 0,
            "last_access": arrow.utcnow(),
            "offset": 0
        }
        indexers_to_call, with_query_generation = pick_indexers(
            query_supplied=True if search_request.query is not None
            and search_request.query != "" else False,
            identifier_key=search_request.identifier_key,
            internal=internal,
            selected_indexers=search_request.indexers)
        for p in indexers_to_call:
            cache_entry["indexer_infos"][p] = {
                "has_more": True,
                "search_request": search_request,
                "total_included": False
            }
        dbsearch = Search(internal=internal,
                          query=search_request.query,
                          category=search_request.category,
                          identifier_key=search_request.identifier_key,
                          identifier_value=search_request.identifier_value,
                          season=search_request.season,
                          episode=search_request.episode,
                          type=search_request.type,
                          username=request.authorization.username
                          if request.authorization is not None else None)
        #dbsearch.save()
        cache_entry["dbsearch"] = dbsearch

        if with_query_generation and search_request.identifier_key and search_request.title is None:
            try:
                search_request.title = infos.title_from_id(
                    search_request.identifier_key,
                    search_request.identifier_value)
            except:
                pass
        pseudo_cache[search_hash] = cache_entry
    else:
        cache_entry = pseudo_cache[search_hash]
        indexers_to_call = [
            indexer for indexer, info in cache_entry["indexer_infos"].items()
            if info["has_more"]
        ]
        dbsearch = cache_entry["dbsearch"]
        logger.debug("Found search in cache")

        logger.debug(
            "Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results."
        )
    while len(cache_entry["results"]) < external_offset + limit and len(
            indexers_to_call) > 0:
        logger.debug("We want %d results but have only %d so far" %
                     ((external_offset + limit), len(cache_entry["results"])))
        logger.debug("%d indexers still have results" % len(indexers_to_call))
        search_request.offset = cache_entry["offset"]
        logger.debug("Searching indexers with offset %d" %
                     search_request.offset)
        result = search_and_handle_db(
            dbsearch, {x: search_request
                       for x in indexers_to_call})
        search_results = []
        indexers_to_call = []

        for indexer, queries_execution_result in result["results"].items():
            search_results.extend(queries_execution_result.results)
            logger.debug("%s returned %d results" %
                         (indexer, len(queries_execution_result.results)))
            cache_entry["indexer_infos"][indexer].update({
                "did_search":
                queries_execution_result.didsearch,
                "indexer":
                indexer.name,
                "search_request":
                search_request,
                "has_more":
                queries_execution_result.has_more,
                "total":
                queries_execution_result.total,
                "total_known":
                queries_execution_result.total_known,
                "indexer_search":
                queries_execution_result.indexerSearchEntry
            })
            if queries_execution_result.has_more:
                indexers_to_call.append(indexer)
                logger.debug(
                    "%s still has more results so we could use it the next round"
                    % indexer)

            if queries_execution_result.total_known:
                if not cache_entry["indexer_infos"][indexer]["total_included"]:
                    cache_entry["total"] += queries_execution_result.total
                    logger.debug(
                        "%s reports %d total results. We'll include in the total this time only"
                        % (indexer, queries_execution_result.total))
                    cache_entry["indexer_infos"][indexer][
                        "total_included"] = True
            elif queries_execution_result.has_more:
                logger.debug(
                    "%s doesn't report an exact number of results so let's just add another 100 to the total"
                    % indexer)
                cache_entry["total"] += 100

        if internal or config.settings.searching.removeDuplicatesExternal:
            countBefore = len(search_results)
            grouped_by_sameness = find_duplicates(search_results)
            allresults = []
            for group in grouped_by_sameness:
                if internal:
                    for i in group:
                        # We give each group of results a unique value by which they can be identified later
                        i.hash = hash(group[0].guid)
                        allresults.append(i)

                else:
                    # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen
                    group = sorted(group, key=lambda x: x.epoch, reverse=True)
                    group = sorted(group,
                                   key=lambda x: x.indexerscore,
                                   reverse=True)
                    allresults.append(group[0])
            search_results = allresults
            if not internal:
                countAfter = len(search_results)
                countRemoved = countBefore - countAfter
                logger.info("Removed %d duplicates from %d results" %
                            (countRemoved, countBefore))
        search_results = sorted(search_results,
                                key=lambda x: x.epoch,
                                reverse=True)

        cache_entry["results"].extend(search_results)
        cache_entry["offset"] += limit

    if internal:
        logger.debug(
            "We have %d cached results and return them all because we search internally"
            % len(cache_entry["results"]))
        nzb_search_results = copy.deepcopy(
            cache_entry["results"][external_offset:])
    else:
        logger.debug(
            "We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search"
            % (len(cache_entry["results"]), external_offset,
               external_offset + limit, cache_entry["total"]))
        nzb_search_results = copy.deepcopy(
            cache_entry["results"][external_offset:(external_offset + limit)])
    cache_entry["last_access"] = arrow.utcnow()

    return {
        "results": nzb_search_results,
        "indexer_infos": cache_entry["indexer_infos"],
        "dbsearchid": cache_entry["dbsearch"].id,
        "total": cache_entry["total"],
        "offset": external_offset
    }
コード例 #13
0
def search(search_request):
    if search_request.maxage is None and config.settings.searching.maxAge:
        search_request.maxage = config.settings.searching.maxAge
        logger.info("Will ignore results older than %d days" % search_request.maxage)

    # Clean up cache
    for k in list(pseudo_cache.keys()):
        if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow():
            pseudo_cache.pop(k)

    # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called
    keepFor = config.settings.main.keepSearchResultsForDays
    oldSearchResultsCount = SearchResult.select().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).count()
    if oldSearchResultsCount > 0:
        logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor))
        SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute()
    else:
        if logger.getEffectiveLevel() == logging.DEBUG:
            logger.debug("%d search results stored in database" % SearchResult.select().count())

    limit = search_request.limit
    external_offset = int(search_request.offset)
    search_hash = search_request.search_hash
    categoryResult = categories.getCategoryByAnyInput(search_request.category)
    search_request.category = categoryResult
    if search_hash not in pseudo_cache.keys() or search_request.offset == 0:  # If it's a new search (which starts with offset 0) do it again instead of using the cached results
        logger.debug("Didn't find this query in cache or want to do a new search")
        cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0}
        category = categoryResult.category
        indexers_to_call = pick_indexers(search_request)
        for p in indexers_to_call:
            cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False}
        
        dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type,
                          username=search_request.username)
        # dbsearch.save()
        cache_entry["dbsearch"] = dbsearch

        # Find ignored words and parse query for ignored words
        search_request.forbiddenWords = []
        search_request.requiredWords = []
        applyRestrictionsGlobal = config.settings.searching.applyRestrictions == "both" or (config.settings.searching.applyRestrictions == "internal" and search_request.internal) or (config.settings.searching.applyRestrictions == "external" and not search_request.internal)
        applyRestrictionsCategory = category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (search_request.category.category.applyRestrictions == "external" and not search_request.internal)
        if config.settings.searching.forbiddenWords and applyRestrictionsGlobal:
            logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords)
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))])
        if config.settings.searching.requiredWords and applyRestrictionsGlobal:
            logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords)
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))])
        
        if category.forbiddenWords and applyRestrictionsCategory:
            logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords))
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))])
        if category.requiredWords and applyRestrictionsCategory:
            logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords))
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))])
        
        
        if search_request.query:
            forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)]
            if len(forbiddenWords) > 0:
                logger.debug("Query before removing NOT terms: %s" % search_request.query)
                search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query)
                logger.debug("Query after removing NOT terms: %s" % search_request.query)
                logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords))

                search_request.forbiddenWords.extend(forbiddenWords)

        pseudo_cache[search_hash] = cache_entry
    else:
        cache_entry = pseudo_cache[search_hash]
        indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]]
        dbsearch = cache_entry["dbsearch"]
        logger.debug("Found search in cache")

        logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.")
    while len(cache_entry["results"]) < external_offset + limit and len(indexers_to_call) > 0:
        logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"])))
        logger.debug("%d indexers still have results" % len(indexers_to_call))
        search_request.offset = cache_entry["offset"]
        
        logger.debug("Searching indexers with offset %d" % search_request.offset)
        result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call})
        logger.debug("All search calls to indexers completed")
        search_results = []
        indexers_to_call = []

        for indexer, queries_execution_result in result["results"].items():
            #Drastically improves db access time but means that if one database write fails all fail. That's a risk we need to take 
            with db.atomic():
                logger.debug("%s returned %d results. Writing them to database..." % (indexer, len(queries_execution_result.results)))
                for result in queries_execution_result.results:
                    if result.title is None or result.link is None or result.indexerguid is None:
                        logger.info("Skipping result with missing data: %s" % result)
                        continue
                    searchResult, _ = SearchResult().create_or_get(indexer=indexer.indexer, guid=result.indexerguid, title= result.title, link= result.link, details= result.details_link)
                    result.searchResultId = searchResult.id
                    search_results.append(result)
                logger.debug("Written results results to database")

                cache_entry["indexer_infos"][indexer].update(
                    {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known,
                     "indexer_search": queries_execution_result.indexerSearchEntry})
                if queries_execution_result.has_more:
                    indexers_to_call.append(indexer)
                    logger.debug("%s still has more results so we could use it the next round" % indexer)
        
                if queries_execution_result.total_known:
                    if not cache_entry["indexer_infos"][indexer]["total_included"]:
                        cache_entry["total"] += queries_execution_result.total
                        logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total))
                        cache_entry["indexer_infos"][indexer]["total_included"] = True
                elif queries_execution_result.has_more:
                    logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer)
                    cache_entry["total"] += 100

        if search_request.internal or config.settings.searching.removeDuplicatesExternal:
            logger.debug("Searching for duplicates")
            countBefore = len(search_results)
            grouped_by_sameness = find_duplicates(search_results)
            allresults = []
            for group in grouped_by_sameness:
                if search_request.internal:
                    for i in group:
                        # We give each group of results a unique value by which they can be identified later
                        i.hash = hash(group[0].details_link)
                        allresults.append(i)

                else:
                    # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen
                    group = sorted(group, key=lambda x: x.epoch, reverse=True)
                    group = sorted(group, key=lambda x: x.indexerscore, reverse=True)
                    allresults.append(group[0])
            search_results = allresults
            if not search_request.internal:
                countAfter = len(search_results)
                countRemoved = countBefore - countAfter
                logger.info("Removed %d duplicates from %d results" % (countRemoved, countBefore))
        search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True)

        cache_entry["results"].extend(search_results)
        cache_entry["offset"] += limit

    if search_request.internal:
        logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:])
    else:
        logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)])
    cache_entry["last_access"] = arrow.utcnow()
    logger.info("Returning %d results" % len(nzb_search_results))
    return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset}
コード例 #14
0
ファイル: test_Search.py プロジェクト: petrichor8/nzbhydra
    def testThatDatabaseValuesAreStored(self):
        with self.app.test_request_context('/'):
            with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
                newznabItems = [
                    [mockbuilder.buildNewznabItem(title="title1", pubdate=arrow.get(1000).format("ddd, DD MMM YYYY HH:mm:ss Z"), size=1000, indexer_name="newznab1")],
                    [mockbuilder.buildNewznabItem(title="title2", pubdate=arrow.get(1000).format("ddd, DD MMM YYYY HH:mm:ss Z"), size=1000, indexer_name="newznab2")]
                ]

                self.prepareSearchMocks(rsps, indexerCount=len(newznabItems), newznabItems=newznabItems)
                # Make the second access unsuccessful
                rsps._urls.pop(1)
                rsps.add(responses.GET, r".*",
                         body="an error message", status=500,
                         content_type='application/x-html')

                searchRequest = SearchRequest(type="search", query="aquery", category="acategory", identifier_key="imdbid", identifier_value="animdbid", season=1, episode=2, indexers="newznab1|newznab2")
                result = search.search(searchRequest)
                results = result["results"]
                self.assertEqual(1, len(results))

                dbSearch = Search().get()
                self.assertEqual(True, dbSearch.internal)
                self.assertEqual("aquery", dbSearch.query)
                self.assertEqual("All", dbSearch.category)
                self.assertEqual("imdbid", dbSearch.identifier_key)
                self.assertEqual("animdbid", dbSearch.identifier_value)
                self.assertEqual("1", dbSearch.season)
                self.assertEqual("2", dbSearch.episode)
                self.assertEqual("search", dbSearch.type)
                self.assertEqual(18, dbSearch.time.hour)

                indexerSearch1 = IndexerSearch.get(IndexerSearch.indexer == Indexer.get(Indexer.name == "newznab1"))
                self.assertEqual(indexerSearch1.search, dbSearch)
                self.assertEqual(18, indexerSearch1.time.hour)

                indexerSearch2 = IndexerSearch.get(IndexerSearch.indexer == Indexer.get(Indexer.name == "newznab2"))
                self.assertEqual(indexerSearch2.search, dbSearch)
                self.assertEqual(18, indexerSearch2.time.hour)

                calledUrls = sorted([x.request.url for x in rsps.calls])

                indexerApiAccess1 = IndexerApiAccess.get(IndexerApiAccess.indexer == Indexer.get(Indexer.name == "newznab1"))
                self.assertEqual(indexerSearch1, indexerApiAccess1.indexer_search)
                self.assertEqual(18, indexerApiAccess1.time.hour)
                self.assertEqual("search", indexerApiAccess1.type)
                self.assertEqual(calledUrls[0], indexerApiAccess1.url)
                self.assertTrue(indexerApiAccess1.response_successful)
                self.assertEqual(0, indexerApiAccess1.response_time)
                self.assertIsNone(indexerApiAccess1.error)

                indexerApiAccess2 = IndexerApiAccess.get(IndexerApiAccess.indexer == Indexer.get(Indexer.name == "newznab2"))
                self.assertEqual(indexerSearch2, indexerApiAccess2.indexer_search)
                self.assertEqual(18, indexerApiAccess2.time.hour)
                self.assertEqual("search", indexerApiAccess2.type)
                self.assertEqual(calledUrls[1], indexerApiAccess2.url)
                self.assertFalse(indexerApiAccess2.response_successful)
                self.assertIsNone(indexerApiAccess2.response_time)
                self.assertTrue("Connection refused" in indexerApiAccess2.error)

                indexerStatus2 = IndexerStatus.get(IndexerStatus.indexer == Indexer.get(Indexer.name == "newznab2"))
                self.assertEqual(1, indexerStatus2.level)
                self.assertTrue("Connection refused" in indexerStatus2.reason)