Ejemplo n.º 1
0
def internalapi_addnzb(args):
    logger.debug("Add NZB request with args %s" % args)
    searchResultIds = json.loads(args["searchresultids"])
    try:
        downloader = getDownloaderInstanceByName(args["downloader"])
    except DownloaderNotFoundException as e:
        logger.error(e.message)
        return jsonify({"success": False})
    added = 0
    for searchResultId in searchResultIds:
        try:
            searchResult = SearchResult.get(SearchResult.id == searchResultId)
        except SearchResult.DoesNotExist:
            logger.error("Unable to find search result with ID %d in database" % searchResultId)
            continue
        link = get_nzb_link_and_guid(searchResultId, True, downloader=downloader.setting.name)

        if downloader.setting.nzbAddingType == config.NzbAddingTypeSelection.link:  # We send a link to the downloader. The link is either to us (where it gets answered or redirected, thet later getnzb will be called) or directly to the indexer
            add_success = downloader.add_link(link, searchResult.title, args["category"])
        else:  # We download an NZB send it to the downloader
            nzbdownloadresult = download_nzb_and_log(searchResultId)
            if nzbdownloadresult is not None:
                add_success = downloader.add_nzb(nzbdownloadresult.content, SearchResult.get(SearchResult.id == searchResultId).title, args["category"])
            else:
                add_success = False
        if add_success:
            added += 1

    if added:
        return jsonify({"success": True, "added": added, "of": len(searchResultIds)})
    else:
        return jsonify({"success": False})
Ejemplo n.º 2
0
def tryGetOrCreateSearchResultDbEntry(searchResultId, indexerId, result):
    try:
        return SearchResult().get(SearchResult.id == searchResultId)
    except SearchResult.DoesNotExist:
        return SearchResult().create(id=searchResultId,
                                     indexer_id=indexerId,
                                     guid=result.indexerguid,
                                     title=result.title,
                                     link=result.link,
                                     details=result.details_link,
                                     firstFound=datetime.datetime.utcnow())
Ejemplo n.º 3
0
def api(args):
    logger.debug(request.url)
    logger.debug("API request: %s" % args)
    # Map newznab api parameters to internal
    args["category"] = args["cat"]
    args["episode"] = args["ep"]

    if args["q"] is not None and args["q"] != "":
        args["query"] = args["q"]  # Because internally we work with "query" instead of "q"
    if config.settings.main.apikey and ("apikey" not in args or args["apikey"] != config.settings.main.apikey):
        logger.error("Tried API access with invalid or missing API key")
        raise Unauthorized("API key not provided or invalid")
    elif args["t"] in ("search", "tvsearch", "movie", "book"):
        return api_search(args)
    elif args["t"] == "get":
        searchResultId = int(args["id"][len("nzbhydrasearchresult"):])
        searchResult = SearchResult.get(SearchResult.id == searchResultId)
        if config.settings.main.logging.logIpAddresses:
            logger.info("API request from %s to download %s from %s" % (getIp(), searchResult.title, searchResult.indexer.name))
        else:
            logger.info("API request to download %s from %s" % (searchResult.title, searchResult.indexer.name))
        return extract_nzb_infos_and_return_response(searchResultId)
    elif args["t"] == "caps":
        xml = render_template("caps.html")
        return Response(xml, mimetype="text/xml")
    elif args["t"] == "details":
        searchResultId = int(args["id"][len("nzbhydrasearchresult"):])
        searchResult = SearchResult.get(SearchResult.id == searchResultId)
        logger.info("API request from to get detils for %s from %s" % (searchResult.title, searchResult.indexer.name))
        item = get_entry_by_id(searchResult.indexer.name, searchResult.guid, searchResult.title)
        if item is None:
            logger.error("Unable to find or parse details for %s" % searchResult.title)
            return "Unable to get details", 500
        item.link = get_nzb_link_and_guid(searchResultId, False)[0]  # We need to make sure the link in the details refers to us
        return render_search_results_for_api([item], None, None, output=args["o"])
    elif args["t"] == "getnfo":
        searchResultId = int(args["id"][len("nzbhydrasearchresult"):])
        result = get_nfo(searchResultId)
        if result["has_nfo"]:
            if args["raw"] == 1:
                return result["nfo"]
            else:
                # TODO Return as json if requested
                return render_template("nfo.html", nfo=result["nfo"])
        else:
            return Response('<error code="300" description="No such item"/>', mimetype="text/xml")

    else:
        logger.error("Unknown API request. Supported functions: search, tvsearch, movie, get, caps, details, getnfo")
        return "Unknown API request. Supported functions: search, tvsearch, movie, get, caps, details, getnfo", 500
Ejemplo n.º 4
0
def get_nzb_response(searchResultId):
    try:
        searchResult = SearchResult.get(SearchResult.id == searchResultId)
    except SearchResult.DoesNotExist:
        logger.error("Unable to find search result with ID %s" %
                     searchResultId)
        return "Unable to find search result with ID %s" % searchResultId, 500
    nzbdownloadresult = download_nzb_and_log(searchResultId)
    if nzbdownloadresult is not None:
        bio = BytesIO(nzbdownloadresult.content)
        filename = searchResult.title + ".nzb" if searchResult.title is not None else "nzbhydra.nzb"
        response = send_file(bio,
                             mimetype='application/x-nzb;',
                             as_attachment=True,
                             attachment_filename=filename,
                             add_etags=False)
        response.headers["content-length"] = len(nzbdownloadresult.content)

        for header in nzbdownloadresult.headers.keys():
            if header.lower().startswith("x-dnzb") or header.lower() in (
                    "content-disposition", "content-type"):
                response.headers[header] = nzbdownloadresult.headers[header]
        logger.info("Returning downloaded NZB %s from %s" %
                    (searchResult.title, searchResult.indexer.name))
        return response
    else:
        logger.error("Error while trying to download NZB %s from %s" %
                     (searchResult.title, searchResult.indexer.name))
        return "Unable to download NZB", 500
Ejemplo n.º 5
0
def get_details(guid):
    searchResultId = int(guid[len("nzbhydrasearchresult"):])
    searchResult = SearchResult.get(SearchResult.id == searchResultId)
    details_link = get_details_link(searchResult.indexer.name, searchResult.guid)
    if details_link:
        return redirect(details_link)
    return "Unable to find details", 500
Ejemplo n.º 6
0
def download_nzb_and_log(searchResultId):
    link, papiaccess, _ = get_indexer_nzb_link(searchResultId, "serve", True)
    indexerName = None
    try:
        indexerName = SearchResult.get(SearchResult.id == searchResultId).indexer.name
        indexer = indexers.getIndexerByName(indexerName)
        r = indexer.get(link, timeout=10)
        r.raise_for_status()

        papiaccess.response_successful = True
        papiaccess.response_time = r.elapsed.microseconds / 1000

        return IndexerNzbDownloadResult(content=r.content, headers=r.headers)
    except IndexerNotFoundException:
        if indexerName:
            logger.error("Unable to find indexer with name %s" % indexerName)
        else:
            logger.error("Unable to find indexer for search result id %s" % searchResultId)
        return None
    except SearchResult.DoesNotExist:
        logger.error("Unable to find search result with ID %s" % searchResultId)
        return None
    except RequestException as e:
        logger.error("Error while connecting to URL %s: %s" % (link, str(e)))
        papiaccess.error = str(e)
        return None
    finally:
        papiaccess.save()
Ejemplo n.º 7
0
def get_indexer_nzb_link(searchResultId, mode, log_api_access, internal=False):
    """
    Build a link that leads to the actual NZB of the indexer using the given informations. We log this as indexer API access and NZB download because this is only called
    when the NZB will be actually downloaded later (by us or a downloader) 
    :return: str
    """
    searchResult = SearchResult.get(SearchResult.id == searchResultId)
    indexerName = searchResult.indexer.name
    indexer = indexers.getIndexerByName(indexerName)
    link = searchResult.link

    # Log to database
    papiaccess = IndexerApiAccess(
        indexer=indexer.indexer,
        type="nzb",
        url=link,
        response_successful=None) if log_api_access else None
    try:
        papiaccess.username = request.authorization.username if request.authorization is not None else None
    except RuntimeError:
        pass
    papiaccess.save()
    pnzbdl = IndexerNzbDownload(searchResult=searchResult,
                                apiAccess=papiaccess,
                                mode=mode,
                                title=searchResult.title,
                                internal=internal)
    pnzbdl.save()

    return link, papiaccess, pnzbdl
Ejemplo n.º 8
0
def download_nzb_and_log(searchResultId):
    link, papiaccess, _ = get_indexer_nzb_link(searchResultId, "serve", True)
    indexerName = None
    try:
        indexerName = SearchResult.get(
            SearchResult.id == searchResultId).indexer.name
        indexer = indexers.getIndexerByName(indexerName)
        r = indexer.get(link, timeout=10)
        r.raise_for_status()

        papiaccess.response_successful = True
        papiaccess.response_time = r.elapsed.microseconds / 1000

        return IndexerNzbDownloadResult(content=r.content, headers=r.headers)
    except IndexerNotFoundException:
        if indexerName:
            logger.error("Unable to find indexer with name %s" % indexerName)
        else:
            logger.error("Unable to find indexer for search result id %s" %
                         searchResultId)
        return None
    except SearchResult.DoesNotExist:
        logger.error("Unable to find search result with ID %s" %
                     searchResultId)
        return None
    except RequestException as e:
        logger.error("Error while connecting to URL %s: %s" % (link, str(e)))
        papiaccess.error = str(e)
        return None
    finally:
        papiaccess.save()
Ejemplo n.º 9
0
def getnzb(args):
    logger.debug("Get NZB request with args %s" % args)
    searchResult = SearchResult.get(SearchResult.id == args["searchresultid"])
    if config.settings.main.logging.logIpAddresses:
        logger.info("API request from %s to download %s from %s" % (getIp(), searchResult.title, searchResult.indexer.name))
    else:
        logger.info("API request to download %s from %s" % (searchResult.title, searchResult.indexer.name))
    return extract_nzb_infos_and_return_response(args["searchresultid"], args["downloader"])
Ejemplo n.º 10
0
def get_nfo(searchresultid):
    try:
        searchResult = SearchResult.get(SearchResult.id == searchresultid)
        indexer = indexers.getIndexerByName(searchResult.indexer.name)
        has_nfo, nfo, message = indexer.get_nfo(searchResult.guid)
        return {"has_nfo": has_nfo, "nfo": nfo, "message": message}
    except IndexerNotFoundException as e:
        logger.error(e.message)
        return {"has_nfo": False, "error": "Unable to find indexer"}
Ejemplo n.º 11
0
def get_nfo(searchresultid):
    try:
        searchResult = SearchResult.get(SearchResult.id == searchresultid)
        indexer = indexers.getIndexerByName(searchResult.indexer.name)
        has_nfo, nfo, message = indexer.get_nfo(searchResult.guid)
        return {"has_nfo": has_nfo, "nfo": nfo, "message": message}
    except IndexerNotFoundException as e:
        logger.error(e.message)
        return {"has_nfo": False, "error": "Unable to find indexer"}
Ejemplo n.º 12
0
Archivo: api.py Proyecto: gspu/nzbhydra
def getNzbById(searchResultId):
    # type: (int) -> (IndexerNzbDownloadResult, SearchResult)
    """
    :rtype: (IndexerNzbDownloadResult, SearchResult)
    """
    try:
        searchResult = SearchResult.get(SearchResult.id == searchResultId)
    except SearchResult.DoesNotExist:
        logger.error("Unable to find search result with ID %s" % searchResultId)
        raise NzbDownloadException("Unable to find search result with ID %s" % searchResultId)
    nzbdownloadresult = download_nzb_and_log(searchResultId)
    if nzbdownloadresult is None:
        logger.error("Error while trying to download NZB %s from %s" % (searchResult.title, searchResult.indexer.name))
        raise NzbDownloadException("Unable to download NZB")
    return nzbdownloadresult, searchResult
Ejemplo n.º 13
0
def get_nzb_response(searchResultId):
    searchResult = SearchResult.get(SearchResult.id == searchResultId)
    nzbdownloadresult = download_nzb_and_log(searchResultId)
    if nzbdownloadresult is not None:
        bio = BytesIO(nzbdownloadresult.content)
        filename = searchResult.title + ".nzb" if searchResult.title is not None else "nzbhydra.nzb"
        response = send_file(bio, mimetype='application/x-nzb;', as_attachment=True, attachment_filename=filename, add_etags=False)
        response.headers["content-length"] = len(nzbdownloadresult.content)

        for header in nzbdownloadresult.headers.keys():
            if header.lower().startswith("x-dnzb") or header.lower() in ("content-disposition", "content-type"):
                response.headers[header] = nzbdownloadresult.headers[header]
        logger.info("Returning downloaded NZB %s from %s" % (searchResult.title, searchResult.indexer.name))
        return response
    else:
        logger.error("Error while trying to download NZB %s from %s" % (searchResult.title, searchResult.indexer.name))
        return "Unable to download NZB", 500
Ejemplo n.º 14
0
def getNzbById(searchResultId):
    # type: (int) -> (IndexerNzbDownloadResult, SearchResult)
    """
    :rtype: (IndexerNzbDownloadResult, SearchResult)
    """
    try:
        searchResult = SearchResult.get(SearchResult.id == searchResultId)
    except SearchResult.DoesNotExist:
        logger.error("Unable to find search result with ID %s" %
                     searchResultId)
        raise NzbDownloadException("Unable to find search result with ID %s" %
                                   searchResultId)
    nzbdownloadresult = download_nzb_and_log(searchResultId)
    if nzbdownloadresult is None:
        logger.error("Error while trying to download NZB %s from %s" %
                     (searchResult.title, searchResult.indexer.name))
        raise NzbDownloadException("Unable to download NZB")
    return nzbdownloadresult, searchResult
Ejemplo n.º 15
0
def get_indexer_nzb_link(searchResultId, mode, log_api_access, internal=False):
    """
    Build a link that leads to the actual NZB of the indexer using the given informations. We log this as indexer API access and NZB download because this is only called
    when the NZB will be actually downloaded later (by us or a downloader) 
    :return: str
    """
    searchResult = SearchResult.get(SearchResult.id == searchResultId)
    indexerName = searchResult.indexer.name
    indexer = indexers.getIndexerByName(indexerName)
    link = searchResult.link

    # Log to database
    papiaccess = IndexerApiAccess(indexer=indexer.indexer, type="nzb", url=link, response_successful=None) if log_api_access else None
    try:
        papiaccess.username = request.authorization.username if request.authorization is not None else None
    except RuntimeError:
        pass
    papiaccess.save()
    pnzbdl = IndexerNzbDownload(searchResult=searchResult, apiAccess=papiaccess, mode=mode, title=searchResult.title, internal=internal)
    pnzbdl.save()

    return link, papiaccess, pnzbdl
Ejemplo n.º 16
0
def countOldSearchResults(keepFor):
    return SearchResult.select().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).count()
Ejemplo n.º 17
0
def search(search_request):
    logger.info("Starting new search: %s" % search_request)
    if search_request.maxage is None and config.settings.searching.maxAge:
        search_request.maxage = config.settings.searching.maxAge
        logger.info("Will ignore results older than %d days" % search_request.maxage)

    # Clean up cache
    for k in list(pseudo_cache.keys()):
        if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow():
            pseudo_cache.pop(k)

    # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called
    keepFor = config.settings.main.keepSearchResultsForDays
    oldSearchResultsCount = countOldSearchResults(keepFor)
    if oldSearchResultsCount > 0:
        logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor))
        SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute()
    else:
        if logger.getEffectiveLevel() == logging.DEBUG:
            logger.debug("%d search results stored in database" % SearchResult.select().count())

    limit = search_request.limit
    external_offset = int(search_request.offset)
    search_hash = search_request.search_hash
    categoryResult = categories.getCategoryByAnyInput(search_request.category)
    search_request.category = categoryResult
    if search_hash not in pseudo_cache.keys() or search_request.offset == 0:  # If it's a new search (which starts with offset 0) do it again instead of using the cached results
        logger.debug("Didn't find this query in cache or want to do a new search")
        cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0, "rejected": SearchModule.getRejectedCountDict()}
        category = categoryResult.category
        indexers_to_call = pick_indexers(search_request)
        for p in indexers_to_call:
            cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False}

        dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode,
                          type=search_request.type, title=search_request.title, author=search_request.author, username=search_request.username)
        saveSearch(dbsearch)
        # dbsearch.save()
        cache_entry["dbsearch"] = dbsearch

        # Find ignored words and parse query for ignored words
        search_request.forbiddenWords = []
        search_request.requiredWords = []
        applyRestrictionsGlobal = config.settings.searching.applyRestrictions == "both" or (config.settings.searching.applyRestrictions == "internal" and search_request.internal) or (config.settings.searching.applyRestrictions == "external" and not search_request.internal)
        applyRestrictionsCategory = category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (search_request.category.category.applyRestrictions == "external" and not search_request.internal)
        if config.settings.searching.forbiddenWords and applyRestrictionsGlobal:
            logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords)
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))])
        if config.settings.searching.requiredWords and applyRestrictionsGlobal:
            logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords)
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))])

        if category.forbiddenWords and applyRestrictionsCategory:
            logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords))
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))])
        if category.requiredWords and applyRestrictionsCategory:
            logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords))
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))])

        if search_request.query:
            forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)]
            if len(forbiddenWords) > 0:
                logger.debug("Query before removing NOT terms: %s" % search_request.query)
                search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query)
                logger.debug("Query after removing NOT terms: %s" % search_request.query)
                logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords))

                search_request.forbiddenWords.extend(forbiddenWords)
        cache_entry["forbiddenWords"] = search_request.forbiddenWords
        cache_entry["requiredWords"] = search_request.requiredWords
        cache_entry["query"] = search_request.query

        pseudo_cache[search_hash] = cache_entry
    else:
        cache_entry = pseudo_cache[search_hash]
        indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]]
        dbsearch = cache_entry["dbsearch"]
        search_request.forbiddenWords = cache_entry["forbiddenWords"]
        search_request.requiredWords = cache_entry["requiredWords"]
        search_request.query = cache_entry["query"]
        logger.debug("Found search in cache")

        logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.")
    if search_request.loadAll:
        logger.debug("Requested to load all results. Will continue to search until all indexers are exhausted")
    while (len(cache_entry["results"]) < external_offset + limit or search_request.loadAll) and len(indexers_to_call) > 0:
        if len(cache_entry["results"]) < external_offset + limit:
            logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"])))
        elif search_request.loadAll:
            logger.debug("All results requested. Continuing to search.")
        logger.debug("%d indexers still have results" % len(indexers_to_call))
        search_request.offset = cache_entry["offset"]

        logger.debug("Searching indexers with offset %d" % search_request.offset)
        result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call})
        logger.debug("All search calls to indexers completed")
        search_results = []
        indexers_to_call = []

        waslocked = False
        before = arrow.now()
        if databaseLock.locked():
            logger.debug("Database accesses locked by other search. Will wait for our turn.")
            waslocked = True
        databaseLock.acquire()
        if waslocked:
            after = arrow.now()
            took = (after - before).seconds * 1000 + (after - before).microseconds / 1000
            logger.debug("Waited %dms for database lock" % took)
        for indexer, queries_execution_result in result["results"].items():
            with db.atomic():
                logger.info("%s returned %d results" % (indexer, len(queries_execution_result.results)))
                for result in queries_execution_result.results:
                    if result.title is None or result.link is None or result.indexerguid is None:
                        logger.info("Skipping result with missing data: %s" % result)
                        continue
                    try:
                        searchResultId = hashlib.sha1(str(indexer.indexer.id) + result.indexerguid).hexdigest()
                        tryGetOrCreateSearchResultDbEntry(searchResultId, indexer.indexer.id, result)
                        result.searchResultId = searchResultId
                        search_results.append(result)
                    except (IntegrityError, OperationalError) as e:
                        logger.error("Error while trying to save search result to database. Skipping it. Error: %s" % e)

            cache_entry["indexer_infos"][indexer].update(
                {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known,
                 "indexer_search": queries_execution_result.indexerSearchEntry, "rejected": queries_execution_result.rejected, "processed_results": queries_execution_result.loaded_results})
            if queries_execution_result.has_more:
                indexers_to_call.append(indexer)
                logger.debug("%s still has more results so we could use it the next round" % indexer)

            if queries_execution_result.total_known:
                if not cache_entry["indexer_infos"][indexer]["total_included"]:
                    cache_entry["total"] += queries_execution_result.total
                    logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total))
                    cache_entry["indexer_infos"][indexer]["total_included"] = True
            elif queries_execution_result.has_more:
                logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer)
                cache_entry["total"] += 100
            for rejectKey in cache_entry["rejected"].keys():
                if rejectKey in cache_entry["indexer_infos"][indexer]["rejected"].keys():
                    cache_entry["rejected"][rejectKey] += cache_entry["indexer_infos"][indexer]["rejected"][rejectKey]

        databaseLock.release()

        logger.debug("Searching for duplicates")
        numberResultsBeforeDuplicateRemoval = len(search_results)
        grouped_by_sameness, uniqueResultsPerIndexer = find_duplicates(search_results)
        allresults = []
        for group in grouped_by_sameness:
            if search_request.internal:
                for i in group:
                    # We give each group of results a unique value by which they can be identified later
                    i.hash = hash(group[0].details_link)
                    allresults.append(i)

            else:
                # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen
                group = sorted(group, key=lambda x: x.epoch, reverse=True)
                group = sorted(group, key=lambda x: x.indexerscore, reverse=True)
                allresults.append(group[0])
        search_results = allresults

        with databaseLock:
            for indexer, infos in cache_entry["indexer_infos"].iteritems():
                if indexer.name in uniqueResultsPerIndexer.keys():  # If the search failed it isn't contained in the duplicates list
                    uniqueResultsCount = uniqueResultsPerIndexer[infos["indexer"]]
                    processedResults = infos["processed_results"]
                    logger.debug("Indexer %s had a unique results share of %d%% (%d of %d total results were only provided by this indexer)" % (indexer.name, 100 / (numberResultsBeforeDuplicateRemoval / uniqueResultsCount), uniqueResultsCount, numberResultsBeforeDuplicateRemoval))
                    infos["indexer_search"].uniqueResults = uniqueResultsCount
                    infos["indexer_search"].processedResults = processedResults
                    infos["indexer_search"].save()

        if not search_request.internal:
            countAfter = len(search_results)
            countRemoved = numberResultsBeforeDuplicateRemoval - countAfter
            logger.info("Removed %d duplicates from %d results" % (countRemoved, numberResultsBeforeDuplicateRemoval))

        search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True)

        cache_entry["results"].extend(search_results)
        cache_entry["offset"] += limit

    if len(indexers_to_call) == 0:
        logger.info("All indexers exhausted")
    elif len(cache_entry["results"]) >= external_offset + limit:
        logger.debug("Loaded a total of %d results which is enough for the %d requested. Stopping search." % (len(cache_entry["results"]), (external_offset + limit)))

    if search_request.internal:
        logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:])
    else:
        logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)])
    cache_entry["last_access"] = arrow.utcnow()
    for k, v in cache_entry["rejected"].items():
        if v > 0:
            logger.info("Rejected %d because: %s" % (v, k))
    logger.info("Returning %d results" % len(nzb_search_results))
    return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset, "rejected": cache_entry["rejected"].items()}
Ejemplo n.º 18
0
def search(search_request):
    if search_request.maxage is None and config.settings.searching.maxAge:
        search_request.maxage = config.settings.searching.maxAge
        logger.info("Will ignore results older than %d days" % search_request.maxage)

    # Clean up cache
    for k in list(pseudo_cache.keys()):
        if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow():
            pseudo_cache.pop(k)

    # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called
    keepFor = config.settings.main.keepSearchResultsForDays
    oldSearchResultsCount = SearchResult.select().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).count()
    if oldSearchResultsCount > 0:
        logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor))
        SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute()
    else:
        if logger.getEffectiveLevel() == logging.DEBUG:
            logger.debug("%d search results stored in database" % SearchResult.select().count())

    limit = search_request.limit
    external_offset = int(search_request.offset)
    search_hash = search_request.search_hash
    categoryResult = categories.getCategoryByAnyInput(search_request.category)
    search_request.category = categoryResult
    if search_hash not in pseudo_cache.keys() or search_request.offset == 0:  # If it's a new search (which starts with offset 0) do it again instead of using the cached results
        logger.debug("Didn't find this query in cache or want to do a new search")
        cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0}
        category = categoryResult.category
        indexers_to_call = pick_indexers(search_request)
        for p in indexers_to_call:
            cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False}
        
        dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type,
                          username=search_request.username)
        # dbsearch.save()
        cache_entry["dbsearch"] = dbsearch

        # Find ignored words and parse query for ignored words
        search_request.forbiddenWords = []
        search_request.requiredWords = []
        applyRestrictionsGlobal = config.settings.searching.applyRestrictions == "both" or (config.settings.searching.applyRestrictions == "internal" and search_request.internal) or (config.settings.searching.applyRestrictions == "external" and not search_request.internal)
        applyRestrictionsCategory = category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (search_request.category.category.applyRestrictions == "external" and not search_request.internal)
        if config.settings.searching.forbiddenWords and applyRestrictionsGlobal:
            logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords)
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))])
        if config.settings.searching.requiredWords and applyRestrictionsGlobal:
            logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords)
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))])
        
        if category.forbiddenWords and applyRestrictionsCategory:
            logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords))
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))])
        if category.requiredWords and applyRestrictionsCategory:
            logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords))
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))])
        
        
        if search_request.query:
            forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)]
            if len(forbiddenWords) > 0:
                logger.debug("Query before removing NOT terms: %s" % search_request.query)
                search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query)
                logger.debug("Query after removing NOT terms: %s" % search_request.query)
                logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords))

                search_request.forbiddenWords.extend(forbiddenWords)

        pseudo_cache[search_hash] = cache_entry
    else:
        cache_entry = pseudo_cache[search_hash]
        indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]]
        dbsearch = cache_entry["dbsearch"]
        logger.debug("Found search in cache")

        logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.")
    while len(cache_entry["results"]) < external_offset + limit and len(indexers_to_call) > 0:
        logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"])))
        logger.debug("%d indexers still have results" % len(indexers_to_call))
        search_request.offset = cache_entry["offset"]
        
        logger.debug("Searching indexers with offset %d" % search_request.offset)
        result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call})
        logger.debug("All search calls to indexers completed")
        search_results = []
        indexers_to_call = []

        for indexer, queries_execution_result in result["results"].items():
            #Drastically improves db access time but means that if one database write fails all fail. That's a risk we need to take 
            with db.atomic():
                logger.debug("%s returned %d results. Writing them to database..." % (indexer, len(queries_execution_result.results)))
                for result in queries_execution_result.results:
                    if result.title is None or result.link is None or result.indexerguid is None:
                        logger.info("Skipping result with missing data: %s" % result)
                        continue
                    searchResult, _ = SearchResult().create_or_get(indexer=indexer.indexer, guid=result.indexerguid, title= result.title, link= result.link, details= result.details_link)
                    result.searchResultId = searchResult.id
                    search_results.append(result)
                logger.debug("Written results results to database")

                cache_entry["indexer_infos"][indexer].update(
                    {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known,
                     "indexer_search": queries_execution_result.indexerSearchEntry})
                if queries_execution_result.has_more:
                    indexers_to_call.append(indexer)
                    logger.debug("%s still has more results so we could use it the next round" % indexer)
        
                if queries_execution_result.total_known:
                    if not cache_entry["indexer_infos"][indexer]["total_included"]:
                        cache_entry["total"] += queries_execution_result.total
                        logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total))
                        cache_entry["indexer_infos"][indexer]["total_included"] = True
                elif queries_execution_result.has_more:
                    logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer)
                    cache_entry["total"] += 100

        if search_request.internal or config.settings.searching.removeDuplicatesExternal:
            logger.debug("Searching for duplicates")
            countBefore = len(search_results)
            grouped_by_sameness = find_duplicates(search_results)
            allresults = []
            for group in grouped_by_sameness:
                if search_request.internal:
                    for i in group:
                        # We give each group of results a unique value by which they can be identified later
                        i.hash = hash(group[0].details_link)
                        allresults.append(i)

                else:
                    # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen
                    group = sorted(group, key=lambda x: x.epoch, reverse=True)
                    group = sorted(group, key=lambda x: x.indexerscore, reverse=True)
                    allresults.append(group[0])
            search_results = allresults
            if not search_request.internal:
                countAfter = len(search_results)
                countRemoved = countBefore - countAfter
                logger.info("Removed %d duplicates from %d results" % (countRemoved, countBefore))
        search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True)

        cache_entry["results"].extend(search_results)
        cache_entry["offset"] += limit

    if search_request.internal:
        logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:])
    else:
        logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)])
    cache_entry["last_access"] = arrow.utcnow()
    logger.info("Returning %d results" % len(nzb_search_results))
    return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset}
Ejemplo n.º 19
0
                indexer_status = database.IndexerStatus(indexer=indexer)
            indexer_status.save()
            iaa.save()
        except Exception as e:
            print(str(e))


def rndstr(n):
    return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(n))


database.db.init("c:\\temp\\playground.db")
database.db.connect()
database.Indexer.drop_table()
database.Indexer.create_table()
SearchResult.drop_table()
SearchResult.create_table()
TvIdCache.drop_table()
TvIdCache.create_table()


indexer1, created = database.Indexer.get_or_create(name="indexer1")
indexer2, created = database.Indexer.get_or_create(name="indexer2")
indexer3, created = database.Indexer.get_or_create(name="indexer3")
indexer4, created = database.Indexer.get_or_create(name="indexer4")
indexer5, created = database.Indexer.get_or_create(name="indexer5")
indexers = [indexer1, indexer2, indexer3, indexer4, indexer5]


now = time.time()
with database.db.atomic():
Ejemplo n.º 20
0
def search(search_request):
    # type: (bool, nzbhydra.search.SearchRequest) -> Dict[unicode, future.types.newint.newint]
    if search_request.maxage is None and config.settings.searching.maxAge:
        search_request.maxage = config.settings.searching.maxAge
        logger.info("Will ignore results older than %d days" % search_request.maxage)

    # Clean up cache
    for k in list(pseudo_cache.keys()):
        if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow():
            pseudo_cache.pop(k)

    # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called
    keepFor = config.settings.main.keepSearchResultsForDays
    oldSearchResultsCount = SearchResult.select().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).count()
    if oldSearchResultsCount > 0:
        logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor))
        SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute()

    limit = search_request.limit
    external_offset = int(search_request.offset)
    search_hash = search_request.search_hash
    if search_hash not in pseudo_cache.keys() or search_request.offset == 0:  # If it's a new search (which starts with offset 0) do it again instead of using the cached results
        logger.debug("Didn't find this query in cache or want to do a new search")
        cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0}
        indexers_to_call = pick_indexers(search_request)
        for p in indexers_to_call:
            cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False}
        categoryResult = categories.getCategoryByAnyInput(search_request.category)
        search_request.category = categoryResult
        category = categoryResult.category
        dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type,
                          username=search_request.username)
        # dbsearch.save()
        cache_entry["dbsearch"] = dbsearch

        # Find ignored words and pParse query for ignored words
        search_request.forbiddenWords = []
        search_request.requiredWords = []
        if config.settings.searching.forbiddenWords:
            logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords)
            search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))])
        if config.settings.searching.requiredWords:
            logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords)
            search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))])
        if category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (category.applyRestrictions == "external" and not search_request.internal):
            if category.forbiddenWords:
                logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords))
                search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))])
            if category.requiredWords:
                logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords))
                search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))])
        
        
        if search_request.query:
            forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)]
            if len(forbiddenWords) > 0:
                logger.debug("Query before removing NOT terms: %s" % search_request.query)
                search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query)
                logger.debug("Query after removing NOT terms: %s" % search_request.query)
                logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords))

                search_request.forbiddenWords.extend(forbiddenWords)

        pseudo_cache[search_hash] = cache_entry
    else:
        cache_entry = pseudo_cache[search_hash]
        indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]]
        dbsearch = cache_entry["dbsearch"]
        logger.debug("Found search in cache")

        logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.")
    while len(cache_entry["results"]) < external_offset + limit and len(indexers_to_call) > 0:
        logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"])))
        logger.debug("%d indexers still have results" % len(indexers_to_call))
        search_request.offset = cache_entry["offset"]
        
        logger.debug("Searching indexers with offset %d" % search_request.offset)
        result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call})
        
        search_results = []
        indexers_to_call = []

        for indexer, queries_execution_result in result["results"].items():
            with db.atomic():
                for result in queries_execution_result.results:
                    if result.title is None or result.link is None or result.indexerguid is None:
                        logger.info("Skipping result with missing data: %s" % result)
                        continue
                    searchResult = SearchResult().get_or_create(indexer=indexer.indexer, title=result.title, link=result.link, details=result.details_link, guid=result.indexerguid)
                    searchResult = searchResult[0]  # Second is a boolean determining if the search result was created
                    result.searchResultId = searchResult.id
                    search_results.append(result)

            logger.debug("%s returned %d results" % (indexer, len(queries_execution_result.results)))
            cache_entry["indexer_infos"][indexer].update(
                {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known,
                 "indexer_search": queries_execution_result.indexerSearchEntry})
            if queries_execution_result.has_more:
                indexers_to_call.append(indexer)
                logger.debug("%s still has more results so we could use it the next round" % indexer)

            if queries_execution_result.total_known:
                if not cache_entry["indexer_infos"][indexer]["total_included"]:
                    cache_entry["total"] += queries_execution_result.total
                    logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total))
                    cache_entry["indexer_infos"][indexer]["total_included"] = True
            elif queries_execution_result.has_more:
                logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer)
                cache_entry["total"] += 100

        if search_request.internal or config.settings.searching.removeDuplicatesExternal:
            countBefore = len(search_results)
            grouped_by_sameness = find_duplicates(search_results)
            allresults = []
            for group in grouped_by_sameness:
                if search_request.internal:
                    for i in group:
                        # We give each group of results a unique value by which they can be identified later
                        i.hash = hash(group[0].guid)
                        allresults.append(i)

                else:
                    # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen
                    group = sorted(group, key=lambda x: x.epoch, reverse=True)
                    group = sorted(group, key=lambda x: x.indexerscore, reverse=True)
                    allresults.append(group[0])
            search_results = allresults
            if not search_request.internal:
                countAfter = len(search_results)
                countRemoved = countBefore - countAfter
                logger.info("Removed %d duplicates from %d results" % (countRemoved, countBefore))
        search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True)

        cache_entry["results"].extend(search_results)
        cache_entry["offset"] += limit

    if search_request.internal:
        logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:])
    else:
        logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"]))
        nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)])
    cache_entry["last_access"] = arrow.utcnow()

    return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset}
Ejemplo n.º 21
0
database.db.create_table(database.SearchResult)
indexer1, created = database.Indexer.get_or_create(name="indexer1")
indexer2, created = database.Indexer.get_or_create(name="indexer2")
indexer3, created = database.Indexer.get_or_create(name="indexer3")
indexer4, created = database.Indexer.get_or_create(name="indexer4")
indexer5, created = database.Indexer.get_or_create(name="indexer5")
indexers = [indexer1, indexer2, indexer3, indexer4, indexer5]

database.SearchResult.delete().execute()
now = time.time()
with database.db.atomic():
    # Prefil with 10000
    for x in range(1, 6):
        for i in range(0, 2000):
            SearchResult.create(indexer=indexers[x - 1],
                                title="%s%d" % (rndstr(80), i),
                                guid="%s%d" % (rndstr(100), i),
                                link="%s%d" % (rndstr(120), i))
            #SearchResult.create(indexer=indexers[x - 1], title="%s" % i, guid="%s" % i, link="%s" % i)
after = time.time()
print(after - now)

now = time.time()
rows = []
with database.db.atomic():
    for i in range(0, 100):
        for x in range(1, 6):
            SearchResult.create_or_get(indexer=indexers[x - 1],
                                       title="%s%d" % (rndstr(80), i),
                                       guid="%s%d" % (rndstr(100), i),
                                       link="%s%d" % (rndstr(120), i))
            #SearchResult.get_or_create(indexer=indexers[x - 1], title="%s%d" % (rndstr(80), i), guid="%s%d" % (rndstr(100), i), link="%s%d" % (rndstr(120), i))