def processResultList(resultlist, author, title, book): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) # bit of a misnomer now, rss can search both tor and nzb rss feeds for tor in resultlist: tor_Title = formatter.latinToAscii(formatter.replace_all(tor['tor_title'], dictrepl)).strip() tor_Title = re.sub(r"\s\s+", " ", tor_Title) # remove extra whitespace tor_Author_match = fuzz.token_set_ratio(author, tor_Title) tor_Title_match = fuzz.token_set_ratio(title, tor_Title) logger.debug("RSS Author/Title Match: %s/%s for %s" %(tor_Author_match, tor_Title_match, tor_Title)) rejected = False for word in reject_list: if word in tor_Title.lower() and not word in author.lower() and not word in book.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (tor_Title, word)) break if (tor_Title_match >= match_ratio and tor_Author_match >= match_ratio and not rejected): logger.debug(u'Found RSS: %s' % tor['tor_title']) bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_feed = tor['tor_feed'] tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB' controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: # check if one of the other downloaders got there first if '.nzb' in tor_url: snatch = NZBDownloadMethod(bookid, tor_prov, tor_Title, tor_url) else: # http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398 if not tor_url.startswith('magnet'): # magnets don't use auth pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS'] auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH'] # don't know what form of password hash is required, try sha1 tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd)) snatch = TORDownloadMethod(bookid, tor_prov, tor_Title, tor_url) if snatch: notifiers.notify_snatch(formatter.latinToAscii(tor_Title) + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No RSS found for " + (book["authorName"] + ' ' + book['bookName']).strip()) return False
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab myDB = database.DBConnection() searchlist = [] threading.currentThread().name = "SEARCHMAGS" if mags is None: # backlog search searchmags = myDB.select('SELECT Title, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title, LastAcquired, IssueDate from magazines \ WHERE Title="%s" AND Status="Active"' % (magazine['bookid'])) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 1: logger.info('Searching for one magazine') else: logger.info('Searching for %i magazines' % len(searchmags)) for searchmag in searchmags: bookid = searchmag[0] searchterm = searchmag[0] # frequency = searchmag[1] # last_acquired = searchmag[2] # issue_date = searchmag[3] dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchlist.append({"bookid": bookid, "searchterm": searchterm}) if searchlist == []: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] tor_resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'mag') if not nproviders: logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers') if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = providers.IterateOverTorrentSites(book, 'mag') if not nproviders: logger.warn('No torrent providers are set. Check config for TORRENT providers') for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if not resultlist: logger.debug("Adding magazine %s to queue." % book['searchterm']) else: bad_regex = 0 bad_date = 0 old_date = 0 total_nzbs = 0 new_date = 0 to_snatch = 0 maglist = [] issues = [] reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb['bookid'] nzbtitle = (u'%s' % nzb['nzbtitle']) nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] if nzbsize_temp is None: # not all torrents returned by torznab have a size nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] checkifmag = myDB.select('SELECT * from magazines WHERE Title="%s"' % bookid) if checkifmag: for results in checkifmag: control_date = results['IssueDate'] # frequency = results['Frequency'] # regex = results['Regex'] nzbtitle_formatted = nzbtitle.replace('.', ' ').replace('-', ' ').replace('/', ' ').replace( '+', ' ').replace('_', ' ').replace('(', '').replace(')', '').strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this # keyword_check = nzbtitle_formatted.replace(bookid, '') # remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb starts with magazine title, and ends with a date # eg The MagPI Issue 22 - July 2015 # do something like check left n words match title # then check last n words are a date name_match = 1 # assume name matches for now if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz mag_title_match = fuzz.token_set_ratio(common.remove_accents(bookid), common.remove_accents(nzbtitle_formatted)) if mag_title_match < lazylibrarian.MATCH_RATIO: logger.debug(u"Magazine token set Match failed: " + str(mag_title_match) + "% for " + nzbtitle_formatted) name_match = 0 lower_title = common.remove_accents(nzbtitle_formatted).lower() lower_bookid = common.remove_accents(bookid).lower() for word in reject_list: if word in lower_title and not word in lower_bookid: name_match = 0 logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if name_match: # some magazine torrent uploaders add their sig in [] or {} # Fortunately for us, they always seem to add it at the end # also some magazine torrent titles are "magazine_name some_form_of_date pdf" # so strip all the trailing junk... while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \ nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() == 'pdf': nzbtitle_exploded.pop() # gotta love the function names # need at least one word magazine title and two date components if len(nzbtitle_exploded) > 2: # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn MonthName YYYY regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2] regexA_month = formatter.month2num(common.remove_accents(regexA_month_temp)) if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100: regexA_year = 'fail' # force date failure #if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].zfill(2) if regexA_day.isdigit(): if int(regexA_day) > 31: # probably issue number nn regexA_day = '01' else: regexA_day = '01' # just MonthName YYYY #else: # regexA_day = '01' # monthly, or less frequent try: newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day # try to make sure the year/month/day are valid, exception if not # ie don't accept day > 31, or 30 in some months # also handles multiple date format named issues eg Jan 2014, 01 2014 # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day)) except ValueError: # regexB = MonthName DD YYYY regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3] regexB_month = formatter.month2num(common.remove_accents(regexB_month_temp)) regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100: regexB_year = 'fail' try: newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day)) except ValueError: # regexC = YYYY MM or YYYY MM DD # (can't get MM/DD if named YYYY Issue nn) # First try YYYY MM regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) regexC_day = '01' else: # try YYYY MM DD regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) else: regexC_year = 'fail' try: newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day)) except: logger.debug('Magazine %s not in proper date format.' % nzbtitle_formatted) bad_date = bad_date + 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues # continue else: continue if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc start_time = time.time() start_time -= 31 * 24 * 60 * 60 # number of seconds in 31 days control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = formatter.datecompare(newdatish, control_date) if comp_date > 0: # Should probably only upsert when downloaded and processed in case snatch fails # keep track of what we're going to download so we don't download dupes new_date = new_date + 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) to_snatch = to_snatch + 1 issues.append(issue) controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBtitle": nzbtitle, "AuxInfo": newdatish, "Status": "Wanted", "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert("wanted", newValueDict, controlValueDict) else: logger.debug('This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date = old_date + 1 else: logger.debug('Magazine [%s] does not completely match search term [%s].' % ( nzbtitle_formatted, bookid)) bad_regex = bad_regex + 1 logger.info('Found %s results for %s. %s are new, %s are old, %s fail date, %s fail name matching' % ( total_nzbs, bookid, new_date, old_date, bad_date, bad_regex)) logger.info("%s, %s issues to download" % (bookid, to_snatch)) for items in maglist: if items['nzbmode'] == "torznab": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) elif items['nzbmode'] == "torrent": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) else: snatch = NZBDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) if snatch: notifiers.notify_snatch(formatter.latinToAscii(items['nzbtitle']) + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') maglist = [] if reset == True: common.schedule_job(action='Restart', target='search_magazines') logger.info("Search for magazines complete")
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": " ", "*": "", "(": "", ")": "", "[": "", "]": "", "#": "", "0": "", "1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "", "9": "", "'": "", ":": "", "!": "", "-": " ", "\s\s": " ", } # ' the ': ' ', ' a ': ' ', ' and ': ' ', # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": "", "*": "", ":": "", ";": "", "'": "", } match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) author = formatter.latinToAscii(formatter.replace_all(book["authorName"], dic)) title = formatter.latinToAscii(formatter.replace_all(book["bookName"], dic)) matches = [] for nzb in resultlist: nzb_Title = formatter.latinToAscii(formatter.replace_all(nzb["nzbtitle"], dictrepl)).strip() nzb_Title = re.sub(r"\s\s+", " ", nzb_Title) # remove extra whitespace nzbAuthor_match = fuzz.token_set_ratio(author, nzb_Title) nzbBook_match = fuzz.token_set_ratio(title, nzb_Title) logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzb_Title)) rejected = False for word in reject_list: if word in nzb_Title.lower() and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (nzb_Title, word)) break nzbsize_temp = nzb["nzbsize"] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = round(float(nzbsize_temp) / 1048576, 2) maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0) if maxsize and nzbsize > maxsize: rejected = True logger.debug("Rejecting %s, too large" % nzb_Title) if nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected: # logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype)) bookid = book["bookid"] nzbTitle = (author + " - " + title + " LL.(" + book["bookid"] + ")").strip() nzburl = nzb["nzburl"] nzbprov = nzb["nzbprov"] nzbdate_temp = nzb["nzbdate"] nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb["nzbmode"] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": nzbsize, "NZBtitle": nzbTitle, "NZBmode": nzbmode, "Status": "Skipped", } score = (nzbBook_match + nzbAuthor_match) / 2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(formatter.getList(nzb_Title)) words -= len(formatter.getList(author)) words -= len(formatter.getList(title)) score -= abs(words) matches.append([score, nzb_Title, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] logger.info(u"Best match NZB (%s%%): %s using %s search" % (score, nzb_Title, searchtype)) myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"] ).fetchone() if not snatchedbooks: if nzbmode == "torznab": snatch = TORDownloadMethod( newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], ) else: snatch = NZBDownloadMethod( newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], ) if snatch: notifiers.notify_snatch(newValueDict["NZBtitle"] + " at " + formatter.now()) common.schedule_job(action="Start", target="processDir") return True logger.debug( "No nzb's found for " + (book["authorName"] + " " + book["bookName"]).strip() + " using searchtype " + searchtype ) return False
def search_rss_book(books=None, reset=False): if not(lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') common.schedule_job(action='Stop', target='search_rss_book') return # rename this thread threading.currentThread().name = "SEARCHRSSBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("RSS search requested for no books") return elif len(searchbooks) == 1: logger.info('RSS Searching for one book') else: logger.info('RSS Searching for %i books' % len(searchbooks)) resultlist, nproviders = providers.IterateOverRSSSites() if not nproviders: logger.warn('No rss providers are set, check config') return # No point in continuing dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} rss_count = 0 for book in searchbooks: bookid = book['BookID'] author = book['AuthorName'] title = book['BookName'] author = formatter.latinToAscii(formatter.replace_all(author, dic)) title = formatter.latinToAscii(formatter.replace_all(title, dic)) found = processResultList(resultlist, author, title, book) # if you can't find the book, try author without initials, # and title without any "(extended details, series etc)" if not found: if author[1] in '. ' or '(' in title: # anything to shorten? while author[1] in '. ': # strip any initials author = author[2:].strip() # and leading whitespace if '(' in title: title = title.split('(')[0] found = processResultList(resultlist, author, title, book) if not found: logger.debug("Searches returned no results. Adding book %s - %s to queue." % (author, title)) else: rss_count = rss_count + 1 if rss_count == 1: logger.info("RSS Search for Wanted items complete, found %s book" % rss_count) else: logger.info("RSS Search for Wanted items complete, found %s books" % rss_count) if reset == True: common.schedule_job(action='Restart', target='search_rss_book')
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab myDB = database.DBConnection() searchlist = [] threading.currentThread().name = "SEARCHMAGS" if mags is None: # backlog search searchmags = myDB.select('SELECT Title, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title, LastAcquired, IssueDate from magazines \ WHERE Title="%s" AND Status="Active"' % (magazine['bookid'])) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 1: logger.info('Searching for one magazine') else: logger.info('Searching for %i magazines' % len(searchmags)) for searchmag in searchmags: bookid = searchmag[0] searchterm = searchmag[0] # frequency = searchmag[1] # last_acquired = searchmag[2] # issue_date = searchmag[3] dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchlist.append({"bookid": bookid, "searchterm": searchterm}) if searchlist == []: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] tor_resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'mag') if not nproviders: logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers') if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = providers.IterateOverTorrentSites(book, 'mag') if not nproviders: logger.warn('No torrent providers are set. Check config for TORRENT providers') for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if not resultlist: logger.debug("Adding magazine %s to queue." % book['searchterm']) else: bad_regex = 0 bad_date = 0 old_date = 0 total_nzbs = 0 new_date = 0 to_snatch = 0 maglist = [] issues = [] reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb['bookid'] nzbtitle = (u'%s' % nzb['nzbtitle']) nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] if nzbsize_temp is None: # not all torrents returned by torznab have a size nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] checkifmag = myDB.select('SELECT * from magazines WHERE Title="%s"' % bookid) if checkifmag: for results in checkifmag: control_date = results['IssueDate'] # frequency = results['Frequency'] # regex = results['Regex'] nzbtitle_formatted = nzbtitle.replace('.', ' ').replace('-', ' ').replace('/', ' ').replace( '+', ' ').replace('_', ' ').replace('(', '').replace(')', '').strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this # keyword_check = nzbtitle_formatted.replace(bookid, '') # remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb starts with magazine title, and ends with a date # eg The MagPI Issue 22 - July 2015 # do something like check left n words match title # then check last n words are a date name_match = 1 # assume name matches for now if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz mag_title_match = fuzz.token_set_ratio( common.remove_accents(bookid), common.remove_accents(nzbtitle_formatted)) if mag_title_match < lazylibrarian.MATCH_RATIO: logger.debug( u"Magazine token set Match failed: " + str( mag_title_match) + "% for " + nzbtitle_formatted) name_match = 0 lower_title = common.remove_accents(nzbtitle_formatted).lower() lower_bookid = common.remove_accents(bookid).lower() for word in reject_list: if word in lower_title and not word in lower_bookid: name_match = 0 logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if name_match: # some magazine torrent uploaders add their sig in [] or {} # Fortunately for us, they always seem to add it at the end # also some magazine torrent titles are "magazine_name some_form_of_date pdf" # so strip all the trailing junk... while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \ nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() == 'pdf': nzbtitle_exploded.pop() # gotta love the function names # need at least one word magazine title and two date components if len(nzbtitle_exploded) > 2: # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2] regexA_month = formatter.month2num(common.remove_accents(regexA_month_temp)) if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100: regexA_year = 'fail' # force date failure # if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].rstrip(',').zfill(2) if regexA_day.isdigit(): if int(regexA_day) > 31: # probably issue number nn regexA_day = '01' else: regexA_day = '01' # just MonthName YYYY # else: # regexA_day = '01' # monthly, or less frequent try: newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day # try to make sure the year/month/day are valid, exception if not # ie don't accept day > 31, or 30 in some months # also handles multiple date format named issues eg Jan 2014, 01 2014 # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day)) except ValueError: # regexB = MonthName DD YYYY or MonthName DD, YYYY regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3] regexB_month = formatter.month2num(common.remove_accents(regexB_month_temp)) regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].rstrip(',').zfill(2) if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100: regexB_year = 'fail' try: newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day)) except ValueError: # regexC = YYYY MM or YYYY MM DD # (can't get MM/DD if named YYYY Issue nn) # First try YYYY MM regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) regexC_day = '01' else: # try YYYY MM DD regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) else: regexC_year = 'fail' try: newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day)) except: logger.debug('Magazine %s not in proper date format.' % nzbtitle_formatted) bad_date = bad_date + 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues # continue else: continue # store all the _new_ matching results, marking as "skipped" for now # we change the status to "wanted" on the ones we want to snatch later # don't add a new entry if this issue has been found on an earlier search # because status might have been user-set mag_entry = myDB.select('SELECT * from wanted WHERE NZBtitle="%s" and NZBprov="%s"' % (nzbtitle, nzbprov)) if not mag_entry: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": newdatish, "Status": "Skipped", "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert("wanted", newValueDict, controlValueDict) if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc start_time = time.time() start_time -= 31 * 24 * 60 * 60 # number of seconds in 31 days control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = formatter.datecompare(newdatish, control_date) if comp_date > 0: # Should probably only upsert when downloaded and processed in case snatch fails # keep track of what we're going to download so we don't download dupes new_date = new_date + 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) to_snatch = to_snatch + 1 issues.append(issue) controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBdate": formatter.now(), # when we asked for it "Status": "Wanted" } myDB.upsert("wanted", newValueDict, controlValueDict) else: logger.debug('This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date = old_date + 1 else: logger.debug('Magazine [%s] does not completely match search term [%s].' % ( nzbtitle_formatted, bookid)) bad_regex = bad_regex + 1 logger.info('Found %i results for %s. %i new, %i old, %i fail date, %i fail name: %i to download' % ( total_nzbs, bookid, new_date, old_date, bad_date, bad_regex, to_snatch)) for items in maglist: if items['nzbmode'] == "torznab": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) elif items['nzbmode'] == "torrent": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) else: snatch = NZBDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) if snatch: notifiers.notify_snatch(formatter.latinToAscii(items['nzbtitle']) + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') maglist = [] if reset: common.schedule_job(action='Restart', target='search_magazines') logger.info("Search for magazines complete")
def search_tor_book(books=None, reset=False): if not lazylibrarian.USE_TOR(): logger.warn('No Torrent providers set, check config') return # rename this thread threading.currentThread().name = "SEARCHTORBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname, BookAdded from books WHERE Status="Wanted" order by BookAdded desc') else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("TOR search requested for no books or invalid BookID") return elif len(searchbooks) == 1: logger.info('TOR Searching for one book') else: logger.info('TOR Searching for %i books' % len(searchbooks)) for searchbook in searchbooks: bookid = searchbook['BookID'] author = searchbook['AuthorName'] book = searchbook['BookName'] dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} dicSearchFormatting = {'.': ' +', ' + ': ' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) searchterm = author + ' ' + book # + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append({"bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip()}) tor_count = 0 for book in searchlist: resultlist, nproviders = providers.IterateOverTorrentSites(book, 'book') if not nproviders: logger.warn('No torrent providers are set, check config') return # No point in continuing found = processResultList(resultlist, book, "book") # if you can't find the book, try author/title without any "(extended details, series etc)" if not found and '(' in book['bookName']: resultlist, nproviders = providers.IterateOverTorrentSites(book, 'shortbook') found = processResultList(resultlist, book, "shortbook") # if you can't find the book under "books", you might find under general search if not found: resultlist, nproviders = providers.IterateOverTorrentSites(book, 'general') found = processResultList(resultlist, book, "general") # if you still can't find the book, try with author only if not found: resultlist, nproviders = providers.IterateOverTorrentSites(book, 'author') found = processResultList(resultlist, book, "author") if not found: logger.debug("Searches returned no results. Adding book %s to queue." % book['searchterm']) else: tor_count = tor_count + 1 if tor_count == 1: logger.info("TORSearch for Wanted items complete, found %s book" % tor_count) else: logger.info("TORSearch for Wanted items complete, found %s books" % tor_count) if reset: common.schedule_job(action='Restart', target='search_tor_book')
def search_rss_book(books=None, reset=False): if not(lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') common.schedule_job(action='Stop', target='search_rss_book') return # rename this thread threading.currentThread().name = "SEARCHRSSBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname, BookAdded from books WHERE Status="Wanted" order by BookAdded desc') else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("RSS search requested for no books or invalid BookID") return elif len(searchbooks) == 1: logger.info('RSS Searching for one book') else: logger.info('RSS Searching for %i books' % len(searchbooks)) resultlist, nproviders = providers.IterateOverRSSSites() if not nproviders: logger.warn('No rss providers are set, check config') return # No point in continuing dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} rss_count = 0 for book in searchbooks: bookid = book['BookID'] author = book['AuthorName'] title = book['BookName'] author = formatter.latinToAscii(formatter.replace_all(author, dic)) title = formatter.latinToAscii(formatter.replace_all(title, dic)) found = processResultList(resultlist, author, title, book) # if you can't find the book, try author without initials, # and title without any "(extended details, series etc)" if not found: if author[1] in '. ' or '(' in title: # anything to shorten? while author[1] in '. ': # strip any initials author = author[2:].strip() # and leading whitespace if '(' in title: title = title.split('(')[0] found = processResultList(resultlist, author, title, book) if not found: logger.debug("Searches returned no results. Adding book %s - %s to queue." % (author, title)) else: rss_count = rss_count + 1 plural = "s" if rss_count == 1: plural = "" logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural)) if reset: common.schedule_job(action='Restart', target='search_rss_book')
def search_tor_book(books=None, reset=False): if not lazylibrarian.USE_TOR(): logger.warn("No Torrent providers set, check config") return # rename this thread threading.currentThread().name = "SEARCHTORBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select( 'SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book["bookid"] ) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("TOR search requested for no books") return elif len(searchbooks) == 1: logger.info("TOR Searching for one book") else: logger.info("TOR Searching for %i books" % len(searchbooks)) for searchbook in searchbooks: bookid = searchbook["BookID"] author = searchbook["AuthorName"] book = searchbook["BookName"] dic = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": "", "*": "", ":": "", ";": "", } dicSearchFormatting = {".": " +", " + ": " "} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) searchterm = author + " " + book # + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub("[\.\-\/]", " ", searchterm).encode("utf-8") searchterm = re.sub(r"\(.*?\)", "", searchterm).encode("utf-8") searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append( {"bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip()} ) tor_count = 0 for book in searchlist: resultlist, nproviders = providers.IterateOverTorrentSites(book, "book") if not nproviders: logger.warn("No torrent providers are set, check config") return # No point in continuing found = processResultList(resultlist, book, "book") # if you can't find the book, try author/title without any "(extended details, series etc)" if not found and "(" in book["bookName"]: resultlist, nproviders = providers.IterateOverTorrentSites(book, "shortbook") found = processResultList(resultlist, book, "shortbook") # if you can't find the book under "books", you might find under general search if not found: resultlist, nproviders = providers.IterateOverTorrentSites(book, "general") found = processResultList(resultlist, book, "general") # if you still can't find the book, try with author only if not found: resultlist, nproviders = providers.IterateOverTorrentSites(book, "author") found = processResultList(resultlist, book, "author") if not found: logger.debug("Searches returned no results. Adding book %s to queue." % book["searchterm"]) else: tor_count = tor_count + 1 if tor_count == 1: logger.info("TORSearch for Wanted items complete, found %s book" % tor_count) else: logger.info("TORSearch for Wanted items complete, found %s books" % tor_count) if reset == True: common.schedule_job(action="Restart", target="search_tor_book")
def start(): global __INITIALIZED__, started if __INITIALIZED__: # Crons and scheduled jobs go here # list is duplicated in webServe so we can reschedule them SCHED.start() common.schedule_job("Start", "processDir") common.schedule_job("Start", "search_nzb_book") common.schedule_job("Start", "search_tor_book") common.schedule_job("Start", "search_rss_book") common.schedule_job("Start", "search_magazines") common.schedule_job("Start", "checkForUpdates") started = True
def processDir(force=False, reset=False): # rename this thread threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError: logger.error('Could not access [%s] directory ' % processpath) return False myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if force is False and len(snatched) == 0: logger.info('Nothing marked as snatched. Stopping postprocessor job.') common.schedule_job(action='Stop', target='processDir') elif len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') else: ppcount = 0 for book in snatched: found = False for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round unicode differences in torrent filenames. # there might be a better way... if isinstance(fname, str): matchname = fname.decode('utf-8') else: matchname = fname if 'LL.(' in matchname: matchname = matchname.split('LL.(')[0] match = fuzz.token_set_ratio(matchname, book['NZBtitle']) if match >= 95: pp_path = os.path.join(processpath, fname) logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle'])) found = True break if found: data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue # try: # os.chmod(dest_path, 0777) # except Exception, e: # logger.debug("Could not chmod post-process directory: " + str(dest_path)) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": formatter.now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file, "IssueID" : magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue magazinescan.create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount: logger.info('%s books/mags have been processed.' % ppcount) else: logger.info('No snatched books/mags have been found') if reset == True: common.schedule_job(action='Restart', target='processDir')
def search_tor_book(books=None, reset=False): if not lazylibrarian.USE_TOR(): logger.warn('No Torrent providers set, check config') return # rename this thread threading.currentThread().name = "SEARCHTORBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select( 'SELECT BookID, AuthorName, Bookname, BookAdded from books WHERE Status="Wanted" order by BookAdded desc' ) else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select( 'SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("TOR search requested for no books or invalid BookID") return elif len(searchbooks) == 1: logger.info('TOR Searching for one book') else: logger.info('TOR Searching for %i books' % len(searchbooks)) for searchbook in searchbooks: bookid = searchbook['BookID'] author = searchbook['AuthorName'] book = searchbook['BookName'] dic = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '' } dicSearchFormatting = {'.': ' +', ' + ': ' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii( formatter.replace_all(author, dicSearchFormatting)) searchterm = author + ' ' + book # + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append({ "bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip() }) tor_count = 0 for book in searchlist: resultlist, nproviders = providers.IterateOverTorrentSites( book, 'book') if not nproviders: logger.warn('No torrent providers are set, check config') return # No point in continuing found = processResultList(resultlist, book, "book") # if you can't find the book, try author/title without any "(extended details, series etc)" if not found and '(' in book['bookName']: resultlist, nproviders = providers.IterateOverTorrentSites( book, 'shortbook') found = processResultList(resultlist, book, "shortbook") # if you can't find the book under "books", you might find under general search if not found: resultlist, nproviders = providers.IterateOverTorrentSites( book, 'general') found = processResultList(resultlist, book, "general") # if you still can't find the book, try with author only if not found: resultlist, nproviders = providers.IterateOverTorrentSites( book, 'author') found = processResultList(resultlist, book, "author") if not found: logger.debug( "Searches returned no results. Adding book %s to queue." % book['searchterm']) else: tor_count = tor_count + 1 if tor_count == 1: logger.info("TORSearch for Wanted items complete, found %s book" % tor_count) else: logger.info("TORSearch for Wanted items complete, found %s books" % tor_count) if reset: common.schedule_job(action='Restart', target='search_tor_book')
def processDir(force=False, reset=False): # rename this thread threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return False myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if force is False and len(snatched) == 0: logger.info('Nothing marked as snatched. Stopping postprocessor job.') common.schedule_job(action='Stop', target='processDir') elif len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') else: logger.debug("Checking %s downloads for %s snatched files" % (len(downloads), len(snatched))) ppcount = 0 for book in snatched: found = False for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= 95: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here... if formatter.is_valid_booktype(fname, booktype="book") \ or formatter.is_valid_booktype(fname, booktype="mag"): dirname = os.path.join(processpath, os.path.splitext(fname)[0]) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): try: shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname)) fname = os.path.splitext(fname)[0] except Exception as why: logger.debug("Failed to move file %s to %s, %s" % (fname, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle'])) found = True break else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if found: data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = common.remove_accents(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = common.remove_accents(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue # try: # os.chmod(dest_path, 0777) # except Exception, e: # logger.debug("Could not chmod post-process directory: " + str(dest_path)) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": formatter.now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file, "IssueID": magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue magazinescan.create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount: logger.info('%s books/mags have been processed.' % ppcount) else: logger.info('No snatched books/mags have been found') if reset: common.schedule_job(action='Restart', target='processDir')
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) matches = [] for tor in resultlist: torTitle = formatter.latinToAscii(formatter.replace_all(str(tor['tor_title']), dictrepl)).strip() torTitle = re.sub(r"\s\s+", " ", torTitle) # remove extra whitespace author = formatter.latinToAscii(formatter.replace_all(book['authorName'], dic)) title = formatter.latinToAscii(formatter.replace_all(book['bookName'], dic)) torAuthor_match = fuzz.token_set_ratio(author, torTitle) torBook_match = fuzz.token_set_ratio(title, torTitle) logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle)) rejected = False for word in reject_list: if word in torTitle.lower() and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (torTitle, word)) break if (torAuthor_match >= match_ratio and torBook_match >= match_ratio and not rejected): #logger.debug(u'Found Torrent: %s using %s search' % (tor['tor_title'], searchtype)) bookid = book['bookid'] tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB' controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } score = (torBook_match + torAuthor_match)/2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(formatter.getList(torTitle)) words -= len(formatter.getList(author)) words -= len(formatter.getList(title)) score -= abs(words) matches.append([score, torTitle, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] logger.info(u'Best match TOR (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]).fetchone() if not snatchedbooks: snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) if snatch: notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No torrent's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + " using searchtype " + searchtype) return False
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for nzb in resultlist: nzbTitle = formatter.latinToAscii(formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip() nzbTitle = re.sub(r"\s\s+", " ", nzbTitle) # remove extra whitespace author = formatter.latinToAscii(formatter.replace_all(book['authorName'], dic)) title = formatter.latinToAscii(formatter.replace_all(book['bookName'], dic)) # nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle) # logger.debug(u"NZB Title sort Match %: " + str(nzbTitle_match) + " for " + nzbTitle) nzbAuthor_match = fuzz.token_set_ratio(author, nzbTitle) nzbBook_match = fuzz.token_set_ratio(title, nzbTitle) logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzbTitle)) rejected = False for word in reject_list: if word in nzbTitle.lower() and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (nzbTitle, word)) break if (nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected): logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype)) bookid = book['bookid'] nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": nzbsize, "NZBtitle": nzbTitle, "NZBmode": nzbmode, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: if nzbmode == "torznab": snatch = TORDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) else: snatch = NZBDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) if snatch: notifiers.notify_snatch(nzbTitle + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No nzb's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + " using searchtype " + searchtype) return False
def processResultList(resultlist, author, title, book): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) matches = [] # bit of a misnomer now, rss can search both tor and nzb rss feeds for tor in resultlist: torTitle = formatter.latinToAscii(formatter.replace_all(tor['tor_title'], dictrepl)).strip() torTitle = re.sub(r"\s\s+", " ", torTitle) # remove extra whitespace tor_Author_match = fuzz.token_set_ratio(author, torTitle) tor_Title_match = fuzz.token_set_ratio(title, torTitle) logger.debug("RSS Author/Title Match: %s/%s for %s" % (tor_Author_match, tor_Title_match, torTitle)) rejected = False for word in reject_list: if word in torTitle.lower() and not word in author.lower() and not word in book.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (torTitle, word)) break tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = round(float(tor_size_temp) / 1048576, 2) maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0) if maxsize and tor_size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % torTitle) if (tor_Title_match >= match_ratio and tor_Author_match >= match_ratio and not rejected): #logger.debug(u'Found RSS: %s' % tor['tor_title']) bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_feed = tor['tor_feed'] controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } score = (tor_Title_match + tor_Author_match)/2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(formatter.getList(torTitle)) words -= len(formatter.getList(author)) words -= len(formatter.getList(title)) score -= abs(words) matches.append([score, torTitle, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] logger.info(u'Best match RSS (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]).fetchone() if not snatchedbooks: # check if one of the other downloaders got there first tor_url = controlValueDict["NZBurl"] if '.nzb' in tor_url: snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) else: """ # http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398 if not tor_url.startswith('magnet'): # magnets don't use auth pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS'] auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH'] # don't know what form of password hash is required, try sha1 tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd)) """ snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], tor_url) if snatch: notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No RSS found for " + (book["authorName"] + ' ' + book['bookName']).strip()) return False
def search_nzb_book(books=None, reset=False): if not lazylibrarian.USE_NZB(): logger.warn('No NEWZNAB/TORZNAB providers set, check config') return # rename this thread threading.currentThread().name = "SEARCHNZBBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname, BookAdded from books WHERE Status="Wanted" order by BookAdded desc') else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("NZB search requested for no books or invalid BookID") return elif len(searchbooks) == 1: logger.info('NZB Searching for one book') else: logger.info('NZB Searching for %i books' % len(searchbooks)) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} dicSearchFormatting = {'.': ' +', ' + ': ' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) if '(' in book: # may have title (series/extended info) book = book.split('(')[0] # TRY SEARCH TERM just using author name and book author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) searchterm = author + ' ' + book searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append({"bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip()}) if not lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE and not lazylibrarian.NZBGET_HOST: logger.warn('No download method is set, use SABnzbd/NZBGet or blackhole, check config') nzb_count = 0 for book in searchlist: # first attempt, try author/title in category "book" resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'book') if not nproviders: logger.warn('No NewzNab or TorzNab providers are set, check config') return # no point in continuing found = processResultList(resultlist, book, "book") # if you can't find the book, try author/title without any "(extended details, series etc)" if not found and '(' in book['bookName']: resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'shortbook') found = processResultList(resultlist, book, "shortbook") # if you can't find the book under "books", you might find under general search if not found: resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'general') found = processResultList(resultlist, book, "general") # if you still can't find the book, try with author only if not found: resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'author') found = processResultList(resultlist, book, "author") if not found: logger.debug("NZB Searches returned no results. Adding book %s to queue." % book['searchterm']) else: nzb_count = nzb_count + 1 if nzb_count == 1: logger.info("NZBSearch for Wanted items complete, found %s book" % nzb_count) else: logger.info("NZBSearch for Wanted items complete, found %s books" % nzb_count) if reset: common.schedule_job(action='Restart', target='search_nzb_book')
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": " ", "*": "", "(": "", ")": "", "[": "", "]": "", "#": "", "0": "", "1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "", "9": "", "'": "", ":": "", "!": "", "-": " ", "\s\s": " ", } # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": "", "*": "", ":": "", ";": "", } match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for tor in resultlist: tor_Title = formatter.latinToAscii(formatter.replace_all(str(tor["tor_title"]), dictrepl)).strip() tor_Title = re.sub(r"\s\s+", " ", tor_Title) # remove extra whitespace author = formatter.latinToAscii(formatter.replace_all(book["authorName"], dic)) title = formatter.latinToAscii(formatter.replace_all(book["bookName"], dic)) torAuthor_match = fuzz.token_set_ratio(author, tor_Title) torBook_match = fuzz.token_set_ratio(title, tor_Title) logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, tor_Title)) # tor_Title_match = fuzz.token_set_ratio(book['searchterm'], tor_Title) # logger.debug("Torrent Title Match %: " + str(tor_Title_match) + " for " + tor_Title) # if (tor_Title_match >= match_ratio): rejected = False for word in reject_list: if word in tor_Title.lower() and not word in author.lower() and not word in title_lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (tor_Title, word)) break if torAuthor_match >= match_ratio and torBook_match >= match_ratio and not rejected: logger.debug(u"Found Torrent: %s using %s search" % (tor["tor_title"], searchtype)) bookid = book["bookid"] tor_Title = (author + " - " + title + " LL.(" + book["bookid"] + ")").strip() tor_url = tor["tor_url"] tor_prov = tor["tor_prov"] tor_size_temp = tor["tor_size"] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + " MB" controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped", } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid ).fetchone() if not snatchedbooks: snatch = TORDownloadMethod(bookid, tor_prov, tor_Title, tor_url) if snatch: notifiers.notify_snatch(formatter.latinToAscii(tor_Title) + " at " + formatter.now()) common.schedule_job(action="Start", target="processDir") return True logger.debug( "No torrent's found for " + (book["authorName"] + " " + book["bookName"]).strip() + " using searchtype " + searchtype ) return False