def internalapi_addnzb(args): logger.debug("Add NZB request with args %s" % args) searchResultIds = json.loads(args["searchresultids"]) try: downloader = getDownloaderInstanceByName(args["downloader"]) except DownloaderNotFoundException as e: logger.error(e.message) return jsonify({"success": False}) added = 0 for searchResultId in searchResultIds: try: searchResult = SearchResult.get(SearchResult.id == searchResultId) except SearchResult.DoesNotExist: logger.error("Unable to find search result with ID %d in database" % searchResultId) continue link = get_nzb_link_and_guid(searchResultId, True, downloader=downloader.setting.name) if downloader.setting.nzbAddingType == config.NzbAddingTypeSelection.link: # We send a link to the downloader. The link is either to us (where it gets answered or redirected, thet later getnzb will be called) or directly to the indexer add_success = downloader.add_link(link, searchResult.title, args["category"]) else: # We download an NZB send it to the downloader nzbdownloadresult = download_nzb_and_log(searchResultId) if nzbdownloadresult is not None: add_success = downloader.add_nzb(nzbdownloadresult.content, SearchResult.get(SearchResult.id == searchResultId).title, args["category"]) else: add_success = False if add_success: added += 1 if added: return jsonify({"success": True, "added": added, "of": len(searchResultIds)}) else: return jsonify({"success": False})
def tryGetOrCreateSearchResultDbEntry(searchResultId, indexerId, result): try: return SearchResult().get(SearchResult.id == searchResultId) except SearchResult.DoesNotExist: return SearchResult().create(id=searchResultId, indexer_id=indexerId, guid=result.indexerguid, title=result.title, link=result.link, details=result.details_link, firstFound=datetime.datetime.utcnow())
def api(args): logger.debug(request.url) logger.debug("API request: %s" % args) # Map newznab api parameters to internal args["category"] = args["cat"] args["episode"] = args["ep"] if args["q"] is not None and args["q"] != "": args["query"] = args["q"] # Because internally we work with "query" instead of "q" if config.settings.main.apikey and ("apikey" not in args or args["apikey"] != config.settings.main.apikey): logger.error("Tried API access with invalid or missing API key") raise Unauthorized("API key not provided or invalid") elif args["t"] in ("search", "tvsearch", "movie", "book"): return api_search(args) elif args["t"] == "get": searchResultId = int(args["id"][len("nzbhydrasearchresult"):]) searchResult = SearchResult.get(SearchResult.id == searchResultId) if config.settings.main.logging.logIpAddresses: logger.info("API request from %s to download %s from %s" % (getIp(), searchResult.title, searchResult.indexer.name)) else: logger.info("API request to download %s from %s" % (searchResult.title, searchResult.indexer.name)) return extract_nzb_infos_and_return_response(searchResultId) elif args["t"] == "caps": xml = render_template("caps.html") return Response(xml, mimetype="text/xml") elif args["t"] == "details": searchResultId = int(args["id"][len("nzbhydrasearchresult"):]) searchResult = SearchResult.get(SearchResult.id == searchResultId) logger.info("API request from to get detils for %s from %s" % (searchResult.title, searchResult.indexer.name)) item = get_entry_by_id(searchResult.indexer.name, searchResult.guid, searchResult.title) if item is None: logger.error("Unable to find or parse details for %s" % searchResult.title) return "Unable to get details", 500 item.link = get_nzb_link_and_guid(searchResultId, False)[0] # We need to make sure the link in the details refers to us return render_search_results_for_api([item], None, None, output=args["o"]) elif args["t"] == "getnfo": searchResultId = int(args["id"][len("nzbhydrasearchresult"):]) result = get_nfo(searchResultId) if result["has_nfo"]: if args["raw"] == 1: return result["nfo"] else: # TODO Return as json if requested return render_template("nfo.html", nfo=result["nfo"]) else: return Response('<error code="300" description="No such item"/>', mimetype="text/xml") else: logger.error("Unknown API request. Supported functions: search, tvsearch, movie, get, caps, details, getnfo") return "Unknown API request. Supported functions: search, tvsearch, movie, get, caps, details, getnfo", 500
def get_nzb_response(searchResultId): try: searchResult = SearchResult.get(SearchResult.id == searchResultId) except SearchResult.DoesNotExist: logger.error("Unable to find search result with ID %s" % searchResultId) return "Unable to find search result with ID %s" % searchResultId, 500 nzbdownloadresult = download_nzb_and_log(searchResultId) if nzbdownloadresult is not None: bio = BytesIO(nzbdownloadresult.content) filename = searchResult.title + ".nzb" if searchResult.title is not None else "nzbhydra.nzb" response = send_file(bio, mimetype='application/x-nzb;', as_attachment=True, attachment_filename=filename, add_etags=False) response.headers["content-length"] = len(nzbdownloadresult.content) for header in nzbdownloadresult.headers.keys(): if header.lower().startswith("x-dnzb") or header.lower() in ( "content-disposition", "content-type"): response.headers[header] = nzbdownloadresult.headers[header] logger.info("Returning downloaded NZB %s from %s" % (searchResult.title, searchResult.indexer.name)) return response else: logger.error("Error while trying to download NZB %s from %s" % (searchResult.title, searchResult.indexer.name)) return "Unable to download NZB", 500
def get_details(guid): searchResultId = int(guid[len("nzbhydrasearchresult"):]) searchResult = SearchResult.get(SearchResult.id == searchResultId) details_link = get_details_link(searchResult.indexer.name, searchResult.guid) if details_link: return redirect(details_link) return "Unable to find details", 500
def download_nzb_and_log(searchResultId): link, papiaccess, _ = get_indexer_nzb_link(searchResultId, "serve", True) indexerName = None try: indexerName = SearchResult.get(SearchResult.id == searchResultId).indexer.name indexer = indexers.getIndexerByName(indexerName) r = indexer.get(link, timeout=10) r.raise_for_status() papiaccess.response_successful = True papiaccess.response_time = r.elapsed.microseconds / 1000 return IndexerNzbDownloadResult(content=r.content, headers=r.headers) except IndexerNotFoundException: if indexerName: logger.error("Unable to find indexer with name %s" % indexerName) else: logger.error("Unable to find indexer for search result id %s" % searchResultId) return None except SearchResult.DoesNotExist: logger.error("Unable to find search result with ID %s" % searchResultId) return None except RequestException as e: logger.error("Error while connecting to URL %s: %s" % (link, str(e))) papiaccess.error = str(e) return None finally: papiaccess.save()
def get_indexer_nzb_link(searchResultId, mode, log_api_access, internal=False): """ Build a link that leads to the actual NZB of the indexer using the given informations. We log this as indexer API access and NZB download because this is only called when the NZB will be actually downloaded later (by us or a downloader) :return: str """ searchResult = SearchResult.get(SearchResult.id == searchResultId) indexerName = searchResult.indexer.name indexer = indexers.getIndexerByName(indexerName) link = searchResult.link # Log to database papiaccess = IndexerApiAccess( indexer=indexer.indexer, type="nzb", url=link, response_successful=None) if log_api_access else None try: papiaccess.username = request.authorization.username if request.authorization is not None else None except RuntimeError: pass papiaccess.save() pnzbdl = IndexerNzbDownload(searchResult=searchResult, apiAccess=papiaccess, mode=mode, title=searchResult.title, internal=internal) pnzbdl.save() return link, papiaccess, pnzbdl
def download_nzb_and_log(searchResultId): link, papiaccess, _ = get_indexer_nzb_link(searchResultId, "serve", True) indexerName = None try: indexerName = SearchResult.get( SearchResult.id == searchResultId).indexer.name indexer = indexers.getIndexerByName(indexerName) r = indexer.get(link, timeout=10) r.raise_for_status() papiaccess.response_successful = True papiaccess.response_time = r.elapsed.microseconds / 1000 return IndexerNzbDownloadResult(content=r.content, headers=r.headers) except IndexerNotFoundException: if indexerName: logger.error("Unable to find indexer with name %s" % indexerName) else: logger.error("Unable to find indexer for search result id %s" % searchResultId) return None except SearchResult.DoesNotExist: logger.error("Unable to find search result with ID %s" % searchResultId) return None except RequestException as e: logger.error("Error while connecting to URL %s: %s" % (link, str(e))) papiaccess.error = str(e) return None finally: papiaccess.save()
def getnzb(args): logger.debug("Get NZB request with args %s" % args) searchResult = SearchResult.get(SearchResult.id == args["searchresultid"]) if config.settings.main.logging.logIpAddresses: logger.info("API request from %s to download %s from %s" % (getIp(), searchResult.title, searchResult.indexer.name)) else: logger.info("API request to download %s from %s" % (searchResult.title, searchResult.indexer.name)) return extract_nzb_infos_and_return_response(args["searchresultid"], args["downloader"])
def get_nfo(searchresultid): try: searchResult = SearchResult.get(SearchResult.id == searchresultid) indexer = indexers.getIndexerByName(searchResult.indexer.name) has_nfo, nfo, message = indexer.get_nfo(searchResult.guid) return {"has_nfo": has_nfo, "nfo": nfo, "message": message} except IndexerNotFoundException as e: logger.error(e.message) return {"has_nfo": False, "error": "Unable to find indexer"}
def getNzbById(searchResultId): # type: (int) -> (IndexerNzbDownloadResult, SearchResult) """ :rtype: (IndexerNzbDownloadResult, SearchResult) """ try: searchResult = SearchResult.get(SearchResult.id == searchResultId) except SearchResult.DoesNotExist: logger.error("Unable to find search result with ID %s" % searchResultId) raise NzbDownloadException("Unable to find search result with ID %s" % searchResultId) nzbdownloadresult = download_nzb_and_log(searchResultId) if nzbdownloadresult is None: logger.error("Error while trying to download NZB %s from %s" % (searchResult.title, searchResult.indexer.name)) raise NzbDownloadException("Unable to download NZB") return nzbdownloadresult, searchResult
def get_nzb_response(searchResultId): searchResult = SearchResult.get(SearchResult.id == searchResultId) nzbdownloadresult = download_nzb_and_log(searchResultId) if nzbdownloadresult is not None: bio = BytesIO(nzbdownloadresult.content) filename = searchResult.title + ".nzb" if searchResult.title is not None else "nzbhydra.nzb" response = send_file(bio, mimetype='application/x-nzb;', as_attachment=True, attachment_filename=filename, add_etags=False) response.headers["content-length"] = len(nzbdownloadresult.content) for header in nzbdownloadresult.headers.keys(): if header.lower().startswith("x-dnzb") or header.lower() in ("content-disposition", "content-type"): response.headers[header] = nzbdownloadresult.headers[header] logger.info("Returning downloaded NZB %s from %s" % (searchResult.title, searchResult.indexer.name)) return response else: logger.error("Error while trying to download NZB %s from %s" % (searchResult.title, searchResult.indexer.name)) return "Unable to download NZB", 500
def get_indexer_nzb_link(searchResultId, mode, log_api_access, internal=False): """ Build a link that leads to the actual NZB of the indexer using the given informations. We log this as indexer API access and NZB download because this is only called when the NZB will be actually downloaded later (by us or a downloader) :return: str """ searchResult = SearchResult.get(SearchResult.id == searchResultId) indexerName = searchResult.indexer.name indexer = indexers.getIndexerByName(indexerName) link = searchResult.link # Log to database papiaccess = IndexerApiAccess(indexer=indexer.indexer, type="nzb", url=link, response_successful=None) if log_api_access else None try: papiaccess.username = request.authorization.username if request.authorization is not None else None except RuntimeError: pass papiaccess.save() pnzbdl = IndexerNzbDownload(searchResult=searchResult, apiAccess=papiaccess, mode=mode, title=searchResult.title, internal=internal) pnzbdl.save() return link, papiaccess, pnzbdl
def countOldSearchResults(keepFor): return SearchResult.select().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).count()
def search(search_request): logger.info("Starting new search: %s" % search_request) if search_request.maxage is None and config.settings.searching.maxAge: search_request.maxage = config.settings.searching.maxAge logger.info("Will ignore results older than %d days" % search_request.maxage) # Clean up cache for k in list(pseudo_cache.keys()): if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow(): pseudo_cache.pop(k) # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called keepFor = config.settings.main.keepSearchResultsForDays oldSearchResultsCount = countOldSearchResults(keepFor) if oldSearchResultsCount > 0: logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor)) SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute() else: if logger.getEffectiveLevel() == logging.DEBUG: logger.debug("%d search results stored in database" % SearchResult.select().count()) limit = search_request.limit external_offset = int(search_request.offset) search_hash = search_request.search_hash categoryResult = categories.getCategoryByAnyInput(search_request.category) search_request.category = categoryResult if search_hash not in pseudo_cache.keys() or search_request.offset == 0: # If it's a new search (which starts with offset 0) do it again instead of using the cached results logger.debug("Didn't find this query in cache or want to do a new search") cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0, "rejected": SearchModule.getRejectedCountDict()} category = categoryResult.category indexers_to_call = pick_indexers(search_request) for p in indexers_to_call: cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False} dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type, title=search_request.title, author=search_request.author, username=search_request.username) saveSearch(dbsearch) # dbsearch.save() cache_entry["dbsearch"] = dbsearch # Find ignored words and parse query for ignored words search_request.forbiddenWords = [] search_request.requiredWords = [] applyRestrictionsGlobal = config.settings.searching.applyRestrictions == "both" or (config.settings.searching.applyRestrictions == "internal" and search_request.internal) or (config.settings.searching.applyRestrictions == "external" and not search_request.internal) applyRestrictionsCategory = category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (search_request.category.category.applyRestrictions == "external" and not search_request.internal) if config.settings.searching.forbiddenWords and applyRestrictionsGlobal: logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords) search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))]) if config.settings.searching.requiredWords and applyRestrictionsGlobal: logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords) search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))]) if category.forbiddenWords and applyRestrictionsCategory: logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords)) search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))]) if category.requiredWords and applyRestrictionsCategory: logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords)) search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))]) if search_request.query: forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)] if len(forbiddenWords) > 0: logger.debug("Query before removing NOT terms: %s" % search_request.query) search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query) logger.debug("Query after removing NOT terms: %s" % search_request.query) logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords)) search_request.forbiddenWords.extend(forbiddenWords) cache_entry["forbiddenWords"] = search_request.forbiddenWords cache_entry["requiredWords"] = search_request.requiredWords cache_entry["query"] = search_request.query pseudo_cache[search_hash] = cache_entry else: cache_entry = pseudo_cache[search_hash] indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]] dbsearch = cache_entry["dbsearch"] search_request.forbiddenWords = cache_entry["forbiddenWords"] search_request.requiredWords = cache_entry["requiredWords"] search_request.query = cache_entry["query"] logger.debug("Found search in cache") logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.") if search_request.loadAll: logger.debug("Requested to load all results. Will continue to search until all indexers are exhausted") while (len(cache_entry["results"]) < external_offset + limit or search_request.loadAll) and len(indexers_to_call) > 0: if len(cache_entry["results"]) < external_offset + limit: logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"]))) elif search_request.loadAll: logger.debug("All results requested. Continuing to search.") logger.debug("%d indexers still have results" % len(indexers_to_call)) search_request.offset = cache_entry["offset"] logger.debug("Searching indexers with offset %d" % search_request.offset) result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call}) logger.debug("All search calls to indexers completed") search_results = [] indexers_to_call = [] waslocked = False before = arrow.now() if databaseLock.locked(): logger.debug("Database accesses locked by other search. Will wait for our turn.") waslocked = True databaseLock.acquire() if waslocked: after = arrow.now() took = (after - before).seconds * 1000 + (after - before).microseconds / 1000 logger.debug("Waited %dms for database lock" % took) for indexer, queries_execution_result in result["results"].items(): with db.atomic(): logger.info("%s returned %d results" % (indexer, len(queries_execution_result.results))) for result in queries_execution_result.results: if result.title is None or result.link is None or result.indexerguid is None: logger.info("Skipping result with missing data: %s" % result) continue try: searchResultId = hashlib.sha1(str(indexer.indexer.id) + result.indexerguid).hexdigest() tryGetOrCreateSearchResultDbEntry(searchResultId, indexer.indexer.id, result) result.searchResultId = searchResultId search_results.append(result) except (IntegrityError, OperationalError) as e: logger.error("Error while trying to save search result to database. Skipping it. Error: %s" % e) cache_entry["indexer_infos"][indexer].update( {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known, "indexer_search": queries_execution_result.indexerSearchEntry, "rejected": queries_execution_result.rejected, "processed_results": queries_execution_result.loaded_results}) if queries_execution_result.has_more: indexers_to_call.append(indexer) logger.debug("%s still has more results so we could use it the next round" % indexer) if queries_execution_result.total_known: if not cache_entry["indexer_infos"][indexer]["total_included"]: cache_entry["total"] += queries_execution_result.total logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total)) cache_entry["indexer_infos"][indexer]["total_included"] = True elif queries_execution_result.has_more: logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer) cache_entry["total"] += 100 for rejectKey in cache_entry["rejected"].keys(): if rejectKey in cache_entry["indexer_infos"][indexer]["rejected"].keys(): cache_entry["rejected"][rejectKey] += cache_entry["indexer_infos"][indexer]["rejected"][rejectKey] databaseLock.release() logger.debug("Searching for duplicates") numberResultsBeforeDuplicateRemoval = len(search_results) grouped_by_sameness, uniqueResultsPerIndexer = find_duplicates(search_results) allresults = [] for group in grouped_by_sameness: if search_request.internal: for i in group: # We give each group of results a unique value by which they can be identified later i.hash = hash(group[0].details_link) allresults.append(i) else: # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen group = sorted(group, key=lambda x: x.epoch, reverse=True) group = sorted(group, key=lambda x: x.indexerscore, reverse=True) allresults.append(group[0]) search_results = allresults with databaseLock: for indexer, infos in cache_entry["indexer_infos"].iteritems(): if indexer.name in uniqueResultsPerIndexer.keys(): # If the search failed it isn't contained in the duplicates list uniqueResultsCount = uniqueResultsPerIndexer[infos["indexer"]] processedResults = infos["processed_results"] logger.debug("Indexer %s had a unique results share of %d%% (%d of %d total results were only provided by this indexer)" % (indexer.name, 100 / (numberResultsBeforeDuplicateRemoval / uniqueResultsCount), uniqueResultsCount, numberResultsBeforeDuplicateRemoval)) infos["indexer_search"].uniqueResults = uniqueResultsCount infos["indexer_search"].processedResults = processedResults infos["indexer_search"].save() if not search_request.internal: countAfter = len(search_results) countRemoved = numberResultsBeforeDuplicateRemoval - countAfter logger.info("Removed %d duplicates from %d results" % (countRemoved, numberResultsBeforeDuplicateRemoval)) search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True) cache_entry["results"].extend(search_results) cache_entry["offset"] += limit if len(indexers_to_call) == 0: logger.info("All indexers exhausted") elif len(cache_entry["results"]) >= external_offset + limit: logger.debug("Loaded a total of %d results which is enough for the %d requested. Stopping search." % (len(cache_entry["results"]), (external_offset + limit))) if search_request.internal: logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:]) else: logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)]) cache_entry["last_access"] = arrow.utcnow() for k, v in cache_entry["rejected"].items(): if v > 0: logger.info("Rejected %d because: %s" % (v, k)) logger.info("Returning %d results" % len(nzb_search_results)) return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset, "rejected": cache_entry["rejected"].items()}
def search(search_request): if search_request.maxage is None and config.settings.searching.maxAge: search_request.maxage = config.settings.searching.maxAge logger.info("Will ignore results older than %d days" % search_request.maxage) # Clean up cache for k in list(pseudo_cache.keys()): if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow(): pseudo_cache.pop(k) # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called keepFor = config.settings.main.keepSearchResultsForDays oldSearchResultsCount = SearchResult.select().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).count() if oldSearchResultsCount > 0: logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor)) SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute() else: if logger.getEffectiveLevel() == logging.DEBUG: logger.debug("%d search results stored in database" % SearchResult.select().count()) limit = search_request.limit external_offset = int(search_request.offset) search_hash = search_request.search_hash categoryResult = categories.getCategoryByAnyInput(search_request.category) search_request.category = categoryResult if search_hash not in pseudo_cache.keys() or search_request.offset == 0: # If it's a new search (which starts with offset 0) do it again instead of using the cached results logger.debug("Didn't find this query in cache or want to do a new search") cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0} category = categoryResult.category indexers_to_call = pick_indexers(search_request) for p in indexers_to_call: cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False} dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type, username=search_request.username) # dbsearch.save() cache_entry["dbsearch"] = dbsearch # Find ignored words and parse query for ignored words search_request.forbiddenWords = [] search_request.requiredWords = [] applyRestrictionsGlobal = config.settings.searching.applyRestrictions == "both" or (config.settings.searching.applyRestrictions == "internal" and search_request.internal) or (config.settings.searching.applyRestrictions == "external" and not search_request.internal) applyRestrictionsCategory = category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (search_request.category.category.applyRestrictions == "external" and not search_request.internal) if config.settings.searching.forbiddenWords and applyRestrictionsGlobal: logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords) search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))]) if config.settings.searching.requiredWords and applyRestrictionsGlobal: logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords) search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))]) if category.forbiddenWords and applyRestrictionsCategory: logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords)) search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))]) if category.requiredWords and applyRestrictionsCategory: logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords)) search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))]) if search_request.query: forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)] if len(forbiddenWords) > 0: logger.debug("Query before removing NOT terms: %s" % search_request.query) search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query) logger.debug("Query after removing NOT terms: %s" % search_request.query) logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords)) search_request.forbiddenWords.extend(forbiddenWords) pseudo_cache[search_hash] = cache_entry else: cache_entry = pseudo_cache[search_hash] indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]] dbsearch = cache_entry["dbsearch"] logger.debug("Found search in cache") logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.") while len(cache_entry["results"]) < external_offset + limit and len(indexers_to_call) > 0: logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"]))) logger.debug("%d indexers still have results" % len(indexers_to_call)) search_request.offset = cache_entry["offset"] logger.debug("Searching indexers with offset %d" % search_request.offset) result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call}) logger.debug("All search calls to indexers completed") search_results = [] indexers_to_call = [] for indexer, queries_execution_result in result["results"].items(): #Drastically improves db access time but means that if one database write fails all fail. That's a risk we need to take with db.atomic(): logger.debug("%s returned %d results. Writing them to database..." % (indexer, len(queries_execution_result.results))) for result in queries_execution_result.results: if result.title is None or result.link is None or result.indexerguid is None: logger.info("Skipping result with missing data: %s" % result) continue searchResult, _ = SearchResult().create_or_get(indexer=indexer.indexer, guid=result.indexerguid, title= result.title, link= result.link, details= result.details_link) result.searchResultId = searchResult.id search_results.append(result) logger.debug("Written results results to database") cache_entry["indexer_infos"][indexer].update( {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known, "indexer_search": queries_execution_result.indexerSearchEntry}) if queries_execution_result.has_more: indexers_to_call.append(indexer) logger.debug("%s still has more results so we could use it the next round" % indexer) if queries_execution_result.total_known: if not cache_entry["indexer_infos"][indexer]["total_included"]: cache_entry["total"] += queries_execution_result.total logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total)) cache_entry["indexer_infos"][indexer]["total_included"] = True elif queries_execution_result.has_more: logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer) cache_entry["total"] += 100 if search_request.internal or config.settings.searching.removeDuplicatesExternal: logger.debug("Searching for duplicates") countBefore = len(search_results) grouped_by_sameness = find_duplicates(search_results) allresults = [] for group in grouped_by_sameness: if search_request.internal: for i in group: # We give each group of results a unique value by which they can be identified later i.hash = hash(group[0].details_link) allresults.append(i) else: # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen group = sorted(group, key=lambda x: x.epoch, reverse=True) group = sorted(group, key=lambda x: x.indexerscore, reverse=True) allresults.append(group[0]) search_results = allresults if not search_request.internal: countAfter = len(search_results) countRemoved = countBefore - countAfter logger.info("Removed %d duplicates from %d results" % (countRemoved, countBefore)) search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True) cache_entry["results"].extend(search_results) cache_entry["offset"] += limit if search_request.internal: logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:]) else: logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)]) cache_entry["last_access"] = arrow.utcnow() logger.info("Returning %d results" % len(nzb_search_results)) return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset}
indexer_status = database.IndexerStatus(indexer=indexer) indexer_status.save() iaa.save() except Exception as e: print(str(e)) def rndstr(n): return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(n)) database.db.init("c:\\temp\\playground.db") database.db.connect() database.Indexer.drop_table() database.Indexer.create_table() SearchResult.drop_table() SearchResult.create_table() TvIdCache.drop_table() TvIdCache.create_table() indexer1, created = database.Indexer.get_or_create(name="indexer1") indexer2, created = database.Indexer.get_or_create(name="indexer2") indexer3, created = database.Indexer.get_or_create(name="indexer3") indexer4, created = database.Indexer.get_or_create(name="indexer4") indexer5, created = database.Indexer.get_or_create(name="indexer5") indexers = [indexer1, indexer2, indexer3, indexer4, indexer5] now = time.time() with database.db.atomic():
def search(search_request): # type: (bool, nzbhydra.search.SearchRequest) -> Dict[unicode, future.types.newint.newint] if search_request.maxage is None and config.settings.searching.maxAge: search_request.maxage = config.settings.searching.maxAge logger.info("Will ignore results older than %d days" % search_request.maxage) # Clean up cache for k in list(pseudo_cache.keys()): if pseudo_cache[k]["last_access"].replace(minutes=+5) < arrow.utcnow(): pseudo_cache.pop(k) # Clean up old search results. We do this here because we don't have any background jobs and this is the function most regularly called keepFor = config.settings.main.keepSearchResultsForDays oldSearchResultsCount = SearchResult.select().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).count() if oldSearchResultsCount > 0: logger.info("Deleting %d search results from database that are older than %d days" % (oldSearchResultsCount, keepFor)) SearchResult.delete().where(SearchResult.firstFound < (datetime.date.today() - datetime.timedelta(days=keepFor))).execute() limit = search_request.limit external_offset = int(search_request.offset) search_hash = search_request.search_hash if search_hash not in pseudo_cache.keys() or search_request.offset == 0: # If it's a new search (which starts with offset 0) do it again instead of using the cached results logger.debug("Didn't find this query in cache or want to do a new search") cache_entry = {"results": [], "indexer_infos": {}, "total": 0, "last_access": arrow.utcnow(), "offset": 0} indexers_to_call = pick_indexers(search_request) for p in indexers_to_call: cache_entry["indexer_infos"][p] = {"has_more": True, "search_request": search_request, "total_included": False} categoryResult = categories.getCategoryByAnyInput(search_request.category) search_request.category = categoryResult category = categoryResult.category dbsearch = Search(internal=search_request.internal, query=search_request.query, category=categoryResult.category.pretty, identifier_key=search_request.identifier_key, identifier_value=search_request.identifier_value, season=search_request.season, episode=search_request.episode, type=search_request.type, username=search_request.username) # dbsearch.save() cache_entry["dbsearch"] = dbsearch # Find ignored words and pParse query for ignored words search_request.forbiddenWords = [] search_request.requiredWords = [] if config.settings.searching.forbiddenWords: logger.debug("Using configured global forbidden words: %s" % config.settings.searching.forbiddenWords) search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.forbiddenWords.split(",")))]) if config.settings.searching.requiredWords: logger.debug("Using configured global required words: %s" % config.settings.searching.requiredWords) search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, config.settings.searching.requiredWords.split(",")))]) if category.applyRestrictions == "both" or (category.applyRestrictions == "internal" and search_request.internal) or (category.applyRestrictions == "external" and not search_request.internal): if category.forbiddenWords: logger.debug("Using configured forbidden words for category %s: %s" % (category.pretty, category.forbiddenWords)) search_request.forbiddenWords.extend([x.lower().strip() for x in list(filter(bool, category.forbiddenWords.split(",")))]) if category.requiredWords: logger.debug("Using configured required words for category %s: %s" % (category.pretty, category.requiredWords)) search_request.requiredWords.extend([x.lower().strip() for x in list(filter(bool, category.requiredWords.split(",")))]) if search_request.query: forbiddenWords = [str(x[1]) for x in re.findall(r"[\s|\b](\-\-|!)(?P<term>\w+)", search_request.query)] if len(forbiddenWords) > 0: logger.debug("Query before removing NOT terms: %s" % search_request.query) search_request.query = re.sub(r"[\s|\b](\-\-|!)(?P<term>\w+)", "", search_request.query) logger.debug("Query after removing NOT terms: %s" % search_request.query) logger.debug("Found NOT terms: %s" % ",".join(forbiddenWords)) search_request.forbiddenWords.extend(forbiddenWords) pseudo_cache[search_hash] = cache_entry else: cache_entry = pseudo_cache[search_hash] indexers_to_call = [indexer for indexer, info in cache_entry["indexer_infos"].items() if info["has_more"]] dbsearch = cache_entry["dbsearch"] logger.debug("Found search in cache") logger.debug("Will search at indexers as long as we don't have enough results for the current offset+limit and any indexer has more results.") while len(cache_entry["results"]) < external_offset + limit and len(indexers_to_call) > 0: logger.debug("We want %d results but have only %d so far" % ((external_offset + limit), len(cache_entry["results"]))) logger.debug("%d indexers still have results" % len(indexers_to_call)) search_request.offset = cache_entry["offset"] logger.debug("Searching indexers with offset %d" % search_request.offset) result = search_and_handle_db(dbsearch, {x: search_request for x in indexers_to_call}) search_results = [] indexers_to_call = [] for indexer, queries_execution_result in result["results"].items(): with db.atomic(): for result in queries_execution_result.results: if result.title is None or result.link is None or result.indexerguid is None: logger.info("Skipping result with missing data: %s" % result) continue searchResult = SearchResult().get_or_create(indexer=indexer.indexer, title=result.title, link=result.link, details=result.details_link, guid=result.indexerguid) searchResult = searchResult[0] # Second is a boolean determining if the search result was created result.searchResultId = searchResult.id search_results.append(result) logger.debug("%s returned %d results" % (indexer, len(queries_execution_result.results))) cache_entry["indexer_infos"][indexer].update( {"did_search": queries_execution_result.didsearch, "indexer": indexer.name, "search_request": search_request, "has_more": queries_execution_result.has_more, "total": queries_execution_result.total, "total_known": queries_execution_result.total_known, "indexer_search": queries_execution_result.indexerSearchEntry}) if queries_execution_result.has_more: indexers_to_call.append(indexer) logger.debug("%s still has more results so we could use it the next round" % indexer) if queries_execution_result.total_known: if not cache_entry["indexer_infos"][indexer]["total_included"]: cache_entry["total"] += queries_execution_result.total logger.debug("%s reports %d total results. We'll include in the total this time only" % (indexer, queries_execution_result.total)) cache_entry["indexer_infos"][indexer]["total_included"] = True elif queries_execution_result.has_more: logger.debug("%s doesn't report an exact number of results so let's just add another 100 to the total" % indexer) cache_entry["total"] += 100 if search_request.internal or config.settings.searching.removeDuplicatesExternal: countBefore = len(search_results) grouped_by_sameness = find_duplicates(search_results) allresults = [] for group in grouped_by_sameness: if search_request.internal: for i in group: # We give each group of results a unique value by which they can be identified later i.hash = hash(group[0].guid) allresults.append(i) else: # We sort by age first and then by indexerscore so the newest result with the highest indexer score is chosen group = sorted(group, key=lambda x: x.epoch, reverse=True) group = sorted(group, key=lambda x: x.indexerscore, reverse=True) allresults.append(group[0]) search_results = allresults if not search_request.internal: countAfter = len(search_results) countRemoved = countBefore - countAfter logger.info("Removed %d duplicates from %d results" % (countRemoved, countBefore)) search_results = sorted(search_results, key=lambda x: x.epoch, reverse=True) cache_entry["results"].extend(search_results) cache_entry["offset"] += limit if search_request.internal: logger.debug("We have %d cached results and return them all because we search internally" % len(cache_entry["results"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:]) else: logger.debug("We have %d cached results and return %d-%d of %d total available accounting for the limit set for the API search" % (len(cache_entry["results"]), external_offset, external_offset + limit, cache_entry["total"])) nzb_search_results = copy.deepcopy(cache_entry["results"][external_offset:(external_offset + limit)]) cache_entry["last_access"] = arrow.utcnow() return {"results": nzb_search_results, "indexer_infos": cache_entry["indexer_infos"], "dbsearchid": cache_entry["dbsearch"].id, "total": cache_entry["total"], "offset": external_offset}
database.db.create_table(database.SearchResult) indexer1, created = database.Indexer.get_or_create(name="indexer1") indexer2, created = database.Indexer.get_or_create(name="indexer2") indexer3, created = database.Indexer.get_or_create(name="indexer3") indexer4, created = database.Indexer.get_or_create(name="indexer4") indexer5, created = database.Indexer.get_or_create(name="indexer5") indexers = [indexer1, indexer2, indexer3, indexer4, indexer5] database.SearchResult.delete().execute() now = time.time() with database.db.atomic(): # Prefil with 10000 for x in range(1, 6): for i in range(0, 2000): SearchResult.create(indexer=indexers[x - 1], title="%s%d" % (rndstr(80), i), guid="%s%d" % (rndstr(100), i), link="%s%d" % (rndstr(120), i)) #SearchResult.create(indexer=indexers[x - 1], title="%s" % i, guid="%s" % i, link="%s" % i) after = time.time() print(after - now) now = time.time() rows = [] with database.db.atomic(): for i in range(0, 100): for x in range(1, 6): SearchResult.create_or_get(indexer=indexers[x - 1], title="%s%d" % (rndstr(80), i), guid="%s%d" % (rndstr(100), i), link="%s%d" % (rndstr(120), i)) #SearchResult.get_or_create(indexer=indexers[x - 1], title="%s%d" % (rndstr(80), i), guid="%s%d" % (rndstr(100), i), link="%s%d" % (rndstr(120), i))