def import_book(pp_path=None, bookID=None): # Separated this into a function so we can more easily import books from an alternate directory # and move them into LL folder structure given just the bookID, returns True or False # eg if import_book(source_directory, bookID): # ppcount = ppcount + 1 # myDB = database.DBConnection() data = myDB.select('SELECT * from books WHERE BookID="%s"' % bookID) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] # try: # auth_dir = os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode(lazylibrarian.SYS_ENCODING) # os.chmod(auth_dir, 0777) # except Exception, e: # logger.debug("Could not chmod author directory: " + str(auth_dir)) if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname) global_name = common.remove_accents(global_name) # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: # update nzbs controlValueDict = {"BookID": bookID} newValueDict = {"Status": "Processed", "NZBDate": formatter.now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) processExtras(myDB, dest_path, global_name, data) logger.info('Successfully processed: %s' % global_name) notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) return True else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) return False
def export_CSV(search_dir=None, status="Wanted"): """ Write a csv file to the search_dir containing all books marked as "Wanted" """ if not search_dir or os.path.isdir(search_dir) is False: logger.warn("Please check Alternate Directory setting") return False csvFile = os.path.join(search_dir, "%s - %s.csv" % (status, now().replace(':', '-'))) myDB = database.DBConnection() find_status = myDB.select('SELECT * FROM books WHERE Status = "%s"' % status) if not find_status: logger.warn(u"No books marked as %s" % status) else: count = 0 with open(csvFile, 'wb') as csvfile: csvwrite = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # write headers, change AuthorName BookName BookIsbn to match import csv names (Author, Title, ISBN10) csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID']) for resulted in find_status: logger.debug(u"Exported CSV for book %s" % resulted['BookName']) row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'], resulted['BookIsbn'], resulted['AuthorID']]) csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row]) count = count + 1 logger.info(u"CSV exported %s book%s to %s" % (count, plural(count), csvFile))
def markMags(self, AuthorName=None, action=None, redirect=None, **args): myDB = database.DBConnection() if not redirect: redirect = "magazines" authorcheck = None maglist = [] for nzburl in args: # ouch dirty workaround... if not nzburl == 'book_table_length': controlValueDict = {'NZBurl': nzburl} newValueDict = {'Status': action} myDB.upsert("wanted", newValueDict, controlValueDict) title = myDB.select("SELECT * from wanted WHERE NZBurl = ?", [nzburl]) for item in title: bookid = item['BookID'] nzbprov = item['NZBprov'] nzbtitle = item['NZBtitle'] nzburl = item['NZBurl'] maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl }) logger.info('Status set to %s for %s' % (action, nzbtitle)) # start searchthreads if action == 'Wanted': for items in maglist: logger.debug('Snatching %s' % items['nzbtitle']) snatch = DownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) notifiers.notify_snatch(items['nzbtitle']+' at '+formatter.now()) raise cherrypy.HTTPRedirect("magazines")
def log(self, message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() if level != 'DEBUG' or lazylibrarian.LOGFULL is True: # Limit the size of the "in-memory" log, as gets slow if too long # Set a fairly arbitrary 500 message limit for now # TODO make this configurable? # Ensure messages are ascii as some author names contain accents and the web page doesnt like them lazylibrarian.LOGLIST.insert(0, (formatter.now(), formatter.latinToAscii(message), level, threadname)) if len(lazylibrarian.LOGLIST) > 500: del lazylibrarian.LOGLIST[-1] message = threadname + ' : ' + message if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warn(message) else: logger.error(message)
def log(self, message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Ensure messages are correctly encoded as some author names contain accents and the web page doesnt like them message = formatter.safe_unicode(message).encode( lazylibrarian.SYS_ENCODING) if level != 'DEBUG' or lazylibrarian.LOGFULL is True: # Limit the size of the "in-memory" log, as gets slow if too long lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, message)) if len(lazylibrarian.LOGLIST) > lazylibrarian.LOGLIMIT: del lazylibrarian.LOGLIST[-1] message = threadname + ' : ' + message if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warn(message) else: logger.error(message)
def exportCSV(search_dir=None, status="Wanted"): """ Write a csv file to the search_dir containing all books marked as "Wanted" """ if not search_dir: logger.warn("Alternate Directory must not be empty") return False csvFile = os.path.join(search_dir, "%s - %s.csv" % (status, formatter.now())) myDB = database.DBConnection() find_status = myDB.select('SELECT * FROM books WHERE Status = "%s"' % status) if not find_status: logger.warn("No books marked as %s" % status) else: with open(csvFile, 'wb') as csvfile: csvwrite = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # write headers, change AuthorName BookName BookIsbn to match import csv names (Author, Title, ISBN10) csvwrite.writerow([ 'BookID', 'Author', 'Title', 'ISBN', 'AuthorID' ]) for resulted in find_status: logger.debug("Exported CSV for book %s" % resulted['BookName'].encode('utf-8')) row = ([ resulted['BookID'], resulted['AuthorName'], resulted['BookName'], resulted['BookIsbn'], resulted['AuthorID'] ]) csvwrite.writerow([("%s" % s).encode('utf-8') for s in row]) logger.info("CSV exported to %s" % csvFile)
def log(self, message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Ensure messages are utf-8 as some author names contain accents and the web page doesnt like them message = formatter.safe_unicode(message).encode('utf-8') if level != 'DEBUG' or lazylibrarian.LOGFULL is True: # Limit the size of the "in-memory" log, as gets slow if too long lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, message)) if len(lazylibrarian.LOGLIST) > lazylibrarian.LOGLIMIT: del lazylibrarian.LOGLIST[-1] message = threadname + ' : ' + message if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warn(message) else: logger.error(message)
def export_CSV(search_dir=None, status="Wanted", library='eBook'): """ Write a csv file to the search_dir containing all books marked as "Wanted" """ # noinspection PyBroadException try: if not search_dir: msg = "Alternate Directory not configured" logger.warn(msg) return msg elif not os.path.isdir(search_dir): msg = "Alternate Directory [%s] not found" % search_dir logger.warn(msg) return msg elif not os.access(search_dir, os.W_OK | os.X_OK): msg = "Alternate Directory [%s] not writable" % search_dir logger.warn(msg) return msg csvFile = os.path.join(search_dir, "%s %s - %s.csv" % (status, library, now().replace(':', '-'))) myDB = database.DBConnection() cmd = 'SELECT BookID,AuthorName,BookName,BookIsbn,books.AuthorID FROM books,authors ' if library == 'eBook': cmd += 'WHERE books.Status=? and books.AuthorID = authors.AuthorID' else: cmd += 'WHERE AudioStatus=? and books.AuthorID = authors.AuthorID' find_status = myDB.select(cmd, (status,)) if not find_status: msg = "No %s marked as %s" % (library, status) logger.warn(msg) else: count = 0 if PY2: fmode = 'wb' else: fmode = 'w' with open(csvFile, fmode) as csvfile: csvwrite = writer(csvfile, delimiter=',', quotechar='"', quoting=QUOTE_MINIMAL) # write headers, change AuthorName BookName BookIsbn to match import csv names csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID']) for resulted in find_status: logger.debug("Exported CSV for %s %s" % (library, resulted['BookName'])) row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'], resulted['BookIsbn'], resulted['AuthorID']]) if PY2: csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row]) else: csvwrite.writerow([("%s" % s) for s in row]) count += 1 msg = "CSV exported %s %s%s to %s" % (count, library, plural(count), csvFile) logger.info(msg) return msg except Exception: msg = 'Unhandled exception in exportCSV: %s' % traceback.format_exc() logger.error(msg) return msg
def export_CSV(search_dir=None, status="Wanted"): """ Write a csv file to the search_dir containing all books marked as "Wanted" """ # noinspection PyBroadException try: if not search_dir: msg = "Alternate Directory not configured" logger.warn(msg) return msg elif not os.path.isdir(search_dir): msg = "Alternate Directory [%s] not found" % search_dir logger.warn(msg) return msg elif not os.access(search_dir, os.W_OK | os.X_OK): msg = "Alternate Directory [%s] not writable" % search_dir logger.warn(msg) return msg csvFile = os.path.join(search_dir, "%s - %s.csv" % (status, now().replace(':', '-'))) myDB = database.DBConnection() cmd = 'SELECT BookID,AuthorName,BookName,BookIsbn,books.AuthorID FROM books,authors ' cmd += 'WHERE books.Status=? and books.AuthorID = authors.AuthorID' find_status = myDB.select(cmd, (status,)) if not find_status: msg = "No books marked as %s" % status logger.warn(msg) else: count = 0 if PY2: fmode = 'wb' else: fmode = 'w' with open(csvFile, fmode) as csvfile: csvwrite = writer(csvfile, delimiter=',', quotechar='"', quoting=QUOTE_MINIMAL) # write headers, change AuthorName BookName BookIsbn to match import csv names csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID']) for resulted in find_status: logger.debug("Exported CSV for book %s" % resulted['BookName']) row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'], resulted['BookIsbn'], resulted['AuthorID']]) if PY2: csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row]) else: csvwrite.writerow([("%s" % s) for s in row]) count += 1 msg = "CSV exported %s book%s to %s" % (count, plural(count), csvFile) logger.info(msg) return msg except Exception: msg = 'Unhandled exception in exportCSV: %s' % traceback.format_exc() logger.error(msg) return msg
def log(self, message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Weblog: Same behaviour as the file log # Todo: Seperate option in settings/log page if lazylibrarian.LOGLEVEL <= 1: if level != 'DEBUG': lazylibrarian.LOGLIST.insert(0, (formatter.now(), message, level, threadname)) else: lazylibrarian.LOGLIST.insert(0, (formatter.now(), message, level, threadname)) message = threadname + ' : ' + message if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warn(message) else: logger.error(message)
def log(message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Get the frame data of the method that made the original logger call if len(inspect.stack()) > 2: frame = inspect.getframeinfo(inspect.stack()[2][0]) program = os.path.basename(frame.filename) method = frame.function lineno = frame.lineno else: program = "" method = "" lineno = "" if 'windows' in platform.system().lower( ): # windows cp1252 can't handle some accents message = formatter.unaccented(message) elif PY2: message = formatter.safe_unicode(message) message = message.encode(lazylibrarian.SYS_ENCODING) if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2: # Limit the size of the "in-memory" log, as gets slow if too long lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, threadname, program, method, lineno, message)) if len(lazylibrarian.LOGLIST) > formatter.check_int( lazylibrarian.CONFIG['LOGLIMIT'], 500): del lazylibrarian.LOGLIST[-1] message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno, message) if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warning(message) else: logger.error(message)
def log(self, message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() if level != 'DEBUG': lazylibrarian.LOGLIST.insert(0, (formatter.now(), message, level, threadname)) message = threadname + ' : ' + message if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warn(message) else: logger.error(message)
def log(self, message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() if level != 'DEBUG': lazylibrarian.LOGLIST.insert( 0, (formatter.now(), message, level, threadname)) message = threadname + ' : ' + message if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warn(message) else: logger.error(message)
def log(message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Get the frame data of the method that made the original logger call if len(inspect.stack()) > 2: frame = inspect.getframeinfo(inspect.stack()[2][0]) program = os.path.basename(frame.filename) method = frame.function lineno = frame.lineno else: program = "" method = "" lineno = "" # Ensure messages are correctly encoded as some author names contain accents and the web page doesnt like them message = formatter.safe_unicode(message) message = message.encode(lazylibrarian.SYS_ENCODING) if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2: # Limit the size of the "in-memory" log, as gets slow if too long lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, threadname, program, method, lineno, message)) if len(lazylibrarian.LOGLIST) > lazylibrarian.CONFIG['LOGLIMIT']: del lazylibrarian.LOGLIST[-1] message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno, message) if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warning(message) else: logger.error(message)
def log(message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Get the frame data of the method that made the original logger call if len(inspect.stack()) > 2: frame = inspect.getframeinfo(inspect.stack()[2][0]) program = os.path.basename(frame.filename) method = frame.function lineno = frame.lineno else: program = "" method = "" lineno = "" if 'windows' in platform.system().lower(): # windows cp1252 can't handle some accents message = formatter.unaccented(message) elif PY2: message = formatter.safe_unicode(message) message = message.encode(lazylibrarian.SYS_ENCODING) if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2: # Limit the size of the "in-memory" log, as gets slow if too long lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, threadname, program, method, lineno, message)) if len(lazylibrarian.LOGLIST) > formatter.check_int(lazylibrarian.CONFIG['LOGLIMIT'], 500): del lazylibrarian.LOGLIST[-1] message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno, message) if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warning(message) else: logger.error(message)
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if not mags: threading.currentThread().name = "SEARCHALLMAG" else: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if not mags: # backlog search searchmags = myDB.select('SELECT Title, Regex, DateType, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \ WHERE Title=? AND Status="Active"', (magazine['bookid'],)) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: threading.currentThread().name = "WEBSERVER" return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored # logger.debug("Removing old magazine search results") # myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Regex'] datetype = searchmag['DateType'] if not datetype: datetype = '' if not searchterm: dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} # strip accents from the magazine title for easier name-matching searchterm = unaccented_str(searchmag['Title']) if not searchterm: # unless there are no ascii characters left searchterm = searchmag['Title'] searchterm = replace_all(searchterm, dic) searchterm = re.sub('[.\-/]', ' ', searchterm) if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm, "datetype": datetype}) if not searchlist: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn('No nzb providers are available. Check config and blocklist') lazylibrarian.NO_NZB_MSG = timenow if lazylibrarian.USE_DIRECT(): dir_resultlist, nproviders = IterateOverDirectSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn('No direct providers are available. Check config and blocklist') lazylibrarian.NO_DIRECT_MSG = timenow if dir_resultlist: for item in dir_resultlist: # reformat the results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned 'nzbsize': item['tor_size'], 'nzbmode': 'direct' }) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn('No tor providers are available. Check config and blocklist') lazylibrarian.NO_TOR_MSG = timenow if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders, dltypes = IterateOverRSSSites() if not nproviders or 'M' not in dltypes: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow: logger.warn('No rss providers are available. Check config and blocklist') lazylibrarian.NO_RSS_MSG = timenow if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs if 'M' in item['types']: resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item['tor_date'], # may be fake date as none returned from rss torrents 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("No results for magazine %s" % book['searchterm']) else: bad_name = 0 bad_date = 0 old_date = 0 rejects = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] bookid = '' for nzb in resultlist: total_nzbs += 1 bookid = nzb['bookid'] # strip accents from the magazine title for easier name-matching nzbtitle = unaccented_str(nzb['nzbtitle']) if not nzbtitle: # unless it's not a latin-1 encodable name nzbtitle = nzb['nzbtitle'] nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize_temp = check_int(nzbsize_temp, 1000) # not all torrents returned by torznab have a size nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) so split into "words" dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ', '#': '# '} nzbtitle_formatted = replace_all(nzbtitle, dic) # remove extra spaces if they're in a row nzbtitle_formatted = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_formatted.split() results = myDB.match('SELECT * from magazines WHERE Title=?', (bookid,)) if not results: logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_name += 1 else: rejected = False maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0) if maxsize and nzbsize > maxsize: logger.debug("Rejecting %s, too large" % nzbtitle) rejected = True if not rejected: minsize = check_int(lazylibrarian.CONFIG['REJECT_MAGMIN'], 0) if minsize and nzbsize < minsize: logger.debug("Rejecting %s, too small" % nzbtitle) rejected = True if not rejected: bookid_exploded = replace_all(bookid, dic).split() # Check nzb has magazine title and a date/issue nr # eg The MagPI July 2015 if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check all the words in the mag title are in the nzbtitle rejected = False wlist = [] for word in nzbtitle_exploded: word = unaccented(word).lower() if word: wlist.append(word) for word in bookid_exploded: word = unaccented(word).lower() if word and word not in wlist: logger.debug("Rejecting %s, missing %s" % (nzbtitle, word)) rejected = True break if rejected: logger.debug( "Magazine title match failed " + bookid + " for " + nzbtitle_formatted) else: logger.debug( "Magazine title matched " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded)) rejected = True if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']: blocked = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl,)) if blocked: logger.debug("Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']: blocked = myDB.match('SELECT * from wanted WHERE NZBurl=?', (nzburl,)) if blocked: logger.debug("Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected: reject_list = getList(str(results['Reject']).lower()) reject_list += getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',') lower_title = unaccented(nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() if reject_list: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('Reject: %s' % str(reject_list)) logger.debug('Title: %s' % lower_title) logger.debug('Bookid: %s' % lower_bookid) for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if rejected: rejects += 1 else: regex_pass, issuedate, year = get_issue_date(nzbtitle_exploded) if regex_pass: logger.debug('Issue %s (regex %s) for %s ' % (issuedate, regex_pass, nzbtitle_formatted)) datetype_ok = True datetype = book['datetype'] if datetype: # check all wanted parts are in the regex result # Day Month Year Vol Iss (MM needs two months) if 'M' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 12]: datetype_ok = False elif 'D' in datetype and regex_pass not in [3, 5, 6]: datetype_ok = False elif 'MM' in datetype and regex_pass not in [1]: # bi monthly datetype_ok = False elif 'V' in datetype and 'I' in datetype and regex_pass not in [8, 9, 17, 18]: datetype_ok = False elif 'V' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 17, 18]: datetype_ok = False elif 'I' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 16, 17, 18]: datetype_ok = False elif 'Y' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 15, 16, 18]: datetype_ok = False else: datetype_ok = False logger.debug('Magazine %s not in a recognised date format.' % nzbtitle_formatted) bad_date += 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers issuedate = "1970-01-01" # provide a fake date for bad-date issues # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" or "Have" insert_table = "pastissues" comp_date = 0 if datetype_ok: control_date = results['IssueDate'] logger.debug("Control date: [%s]" % control_date) if not control_date: # we haven't got any copies of this magazine yet # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # For magazines with only an issue number use zero as we can't tell age if str(issuedate).isdigit(): logger.debug('Magazine comparing issue numbers (%s)' % issuedate) control_date = 0 elif re.match('\d+-\d\d-\d\d', str(issuedate)): start_time = time.time() start_time -= int( lazylibrarian.CONFIG['MAG_AGE']) * 24 * 60 * 60 # number of seconds in days if start_time < 0: # limit of unixtime (1st Jan 1970) start_time = 0 control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) logger.debug('Magazine date comparing to %s' % control_date) else: logger.debug('Magazine unable to find comparison type [%s]' % issuedate) control_date = 0 if str(control_date).isdigit() and str(issuedate).isdigit(): # for issue numbers, check if later than last one we have if regex_pass in [10, 12, 13] and year: issuedate = "%s%04d" % (year, int(issuedate)) else: issuedate = str(issuedate).zfill(4) if not control_date: comp_date = 1 else: comp_date = int(issuedate) - int(control_date) elif re.match('\d+-\d\d-\d\d', str(control_date)) and \ re.match('\d+-\d\d-\d\d', str(issuedate)): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare(issuedate, control_date) else: # invalid comparison of date and issue number comp_date = 0 if re.match('\d+-\d\d-\d\d', str(control_date)): if regex_pass > 9 and year: # we assumed it was an issue number, but it could be a date year = check_int(year, 0) if regex_pass in [10, 12, 13]: issuedate = int(issuedate[:4]) issuenum = check_int(issuedate, 0) if year and 1 <= issuenum <= 12: issuedate = "%04d-%02d-01" % (year, issuenum) comp_date = datecompare(issuedate, control_date) if not comp_date: logger.debug('Magazine %s failed: Expecting a date' % nzbtitle_formatted) else: logger.debug('Magazine %s failed: Expecting issue number' % nzbtitle_formatted) if not comp_date: bad_date += 1 issuedate = "1970-01-01" if issuedate == "1970-01-01": logger.debug('This issue of %s is unknown age; skipping.' % nzbtitle_formatted) elif not datetype_ok: logger.debug('This issue of %s not in a wanted date format.' % nzbtitle_formatted) elif comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date += 1 issue = bookid + ',' + issuedate if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) logger.debug('Magazine request number %s' % len(issues)) if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug(str(issues)) insert_table = "wanted" nzbdate = now() # when we asked for it else: logger.debug('This issue of %s is already flagged for download' % issue) else: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date += 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.match('SELECT Status from %s WHERE NZBtitle=? and NZBprov=?' % insert_table, (nzbtitle, nzbprov)) if mag_entry: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('%s is already in %s marked %s' % (nzbtitle, insert_table, mag_entry['Status'])) else: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } if insert_table == 'pastissues': # try to mark ones we've already got match = myDB.match("SELECT * from issues WHERE Title=? AND IssueDate=?", (bookid, issuedate)) if match: insert_status = "Have" else: insert_status = "Skipped" else: insert_status = "Wanted" newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": issuedate, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status)) msg = 'Found %i result%s for %s. %i new,' % (total_nzbs, plural(total_nzbs), bookid, new_date) msg += ' %i old, %i fail date, %i fail name,' % (old_date, bad_date, bad_name) msg += ' %i rejected: %i to download' % (rejects, len(maglist)) logger.info(msg) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch, res = TORDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') elif magazine['nzbmode'] == 'direct': snatch, res = DirectDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') elif magazine['nzbmode'] == 'nzb': snatch, res = NZBDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') else: res = 'Unhandled NZBmode [%s] for %s' % (magazine['nzbmode'], magazine["nzburl"]) logger.error(res) snatch = 0 if snatch: logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) custom_notify_snatch("%s %s" % (magazine['bookid'], magazine['nzburl'])) notify_snatch("Magazine %s from %s at %s" % (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now())) scheduleJob(action='Start', target='PostProcessor') else: myDB.action('UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?', (res, magazine["nzburl"])) if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete") except Exception: logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def import_book(pp_path=None, bookID=None): try: # Move a book into LL folder structure given just the folder and bookID, returns True or False # Called from "import_alternate" or if we find an "ll.(xxx)" folder that doesn't match a snatched book/mag # myDB = database.DBConnection() data = myDB.match('SELECT * from books WHERE BookID="%s"' % bookID) if data: authorname = data['AuthorName'] bookname = data['BookName'] processpath = lazylibrarian.DIRECTORY('Destination') if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname) global_name = unaccented(global_name) # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(processpath, dest_path).encode(lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: # update nzbs was_snatched = myDB.match('SELECT BookID, NZBprov FROM wanted WHERE BookID="%s"' % bookID) if was_snatched: controlValueDict = {"BookID": bookID} newValueDict = {"Status": "Processed", "NZBDate": now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname: if len(lazylibrarian.IMP_CALIBREDB): logger.debug('Calibre should have created the extras') else: processExtras(myDB, dest_path, global_name, data) if not lazylibrarian.DESTINATION_COPY and pp_path != processpath: if os.path.isdir(pp_path): # calibre might have already deleted it? try: shutil.rmtree(pp_path) except Exception as why: logger.debug("Unable to remove %s, %s" % (pp_path, str(why))) logger.info('Successfully processed: %s' % global_name) notify_download("%s from %s at %s" % (global_name, was_snatched['NZBprov'], now())) return True else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) was_snatched = len(myDB.select('SELECT BookID FROM wanted WHERE BookID="%s"' % bookID)) if was_snatched: controlValueDict = {"BookID": bookID} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # reset status so we try for a different version myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % bookID) try: os.rename(pp_path, pp_path + '.fail') except Exception as e: logger.debug("Unable to rename %s, %s" % (pp_path, str(e))) return False except Exception as e: logger.error('Unhandled exception in importBook: %s' % traceback.format_exc())
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab myDB = database.DBConnection() searchlist = [] threading.currentThread().name = "SEARCHMAGS" if mags is None: # backlog search searchmags = myDB.select('SELECT Title, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title, LastAcquired, IssueDate from magazines \ WHERE Title="%s" AND Status="Active"' % (magazine['bookid'])) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 1: logger.info('Searching for one magazine') else: logger.info('Searching for %i magazines' % len(searchmags)) for searchmag in searchmags: bookid = searchmag[0] searchterm = searchmag[0] # frequency = searchmag[1] # last_acquired = searchmag[2] # issue_date = searchmag[3] dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchlist.append({"bookid": bookid, "searchterm": searchterm}) if searchlist == []: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] tor_resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'mag') if not nproviders: logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers') if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = providers.IterateOverTorrentSites(book, 'mag') if not nproviders: logger.warn('No torrent providers are set. Check config for TORRENT providers') for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if not resultlist: logger.debug("Adding magazine %s to queue." % book['searchterm']) else: bad_regex = 0 bad_date = 0 old_date = 0 total_nzbs = 0 new_date = 0 to_snatch = 0 maglist = [] issues = [] reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb['bookid'] nzbtitle = (u'%s' % nzb['nzbtitle']) nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] if nzbsize_temp is None: # not all torrents returned by torznab have a size nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] checkifmag = myDB.select('SELECT * from magazines WHERE Title="%s"' % bookid) if checkifmag: for results in checkifmag: control_date = results['IssueDate'] # frequency = results['Frequency'] # regex = results['Regex'] nzbtitle_formatted = nzbtitle.replace('.', ' ').replace('-', ' ').replace('/', ' ').replace( '+', ' ').replace('_', ' ').replace('(', '').replace(')', '').strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this # keyword_check = nzbtitle_formatted.replace(bookid, '') # remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb starts with magazine title, and ends with a date # eg The MagPI Issue 22 - July 2015 # do something like check left n words match title # then check last n words are a date name_match = 1 # assume name matches for now if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz mag_title_match = fuzz.token_set_ratio(common.remove_accents(bookid), common.remove_accents(nzbtitle_formatted)) if mag_title_match < lazylibrarian.MATCH_RATIO: logger.debug(u"Magazine token set Match failed: " + str(mag_title_match) + "% for " + nzbtitle_formatted) name_match = 0 lower_title = common.remove_accents(nzbtitle_formatted).lower() lower_bookid = common.remove_accents(bookid).lower() for word in reject_list: if word in lower_title and not word in lower_bookid: name_match = 0 logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if name_match: # some magazine torrent uploaders add their sig in [] or {} # Fortunately for us, they always seem to add it at the end # also some magazine torrent titles are "magazine_name some_form_of_date pdf" # so strip all the trailing junk... while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \ nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() == 'pdf': nzbtitle_exploded.pop() # gotta love the function names # need at least one word magazine title and two date components if len(nzbtitle_exploded) > 2: # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn MonthName YYYY regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2] regexA_month = formatter.month2num(common.remove_accents(regexA_month_temp)) if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100: regexA_year = 'fail' # force date failure #if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].zfill(2) if regexA_day.isdigit(): if int(regexA_day) > 31: # probably issue number nn regexA_day = '01' else: regexA_day = '01' # just MonthName YYYY #else: # regexA_day = '01' # monthly, or less frequent try: newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day # try to make sure the year/month/day are valid, exception if not # ie don't accept day > 31, or 30 in some months # also handles multiple date format named issues eg Jan 2014, 01 2014 # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day)) except ValueError: # regexB = MonthName DD YYYY regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3] regexB_month = formatter.month2num(common.remove_accents(regexB_month_temp)) regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100: regexB_year = 'fail' try: newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day)) except ValueError: # regexC = YYYY MM or YYYY MM DD # (can't get MM/DD if named YYYY Issue nn) # First try YYYY MM regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) regexC_day = '01' else: # try YYYY MM DD regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) else: regexC_year = 'fail' try: newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day)) except: logger.debug('Magazine %s not in proper date format.' % nzbtitle_formatted) bad_date = bad_date + 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues # continue else: continue if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc start_time = time.time() start_time -= 31 * 24 * 60 * 60 # number of seconds in 31 days control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = formatter.datecompare(newdatish, control_date) if comp_date > 0: # Should probably only upsert when downloaded and processed in case snatch fails # keep track of what we're going to download so we don't download dupes new_date = new_date + 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) to_snatch = to_snatch + 1 issues.append(issue) controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBtitle": nzbtitle, "AuxInfo": newdatish, "Status": "Wanted", "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert("wanted", newValueDict, controlValueDict) else: logger.debug('This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date = old_date + 1 else: logger.debug('Magazine [%s] does not completely match search term [%s].' % ( nzbtitle_formatted, bookid)) bad_regex = bad_regex + 1 logger.info('Found %s results for %s. %s are new, %s are old, %s fail date, %s fail name matching' % ( total_nzbs, bookid, new_date, old_date, bad_date, bad_regex)) logger.info("%s, %s issues to download" % (bookid, to_snatch)) for items in maglist: if items['nzbmode'] == "torznab": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) elif items['nzbmode'] == "torrent": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) else: snatch = NZBDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) if snatch: notifiers.notify_snatch(formatter.latinToAscii(items['nzbtitle']) + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') maglist = [] if reset == True: common.schedule_job(action='Restart', target='search_magazines') logger.info("Search for magazines complete")
def test_notify(self): return self._notifyTwitter( "This is a test notification from LazyLibrarian / " + formatter.now(), force=True)
def processResultList(resultlist, author, title, book): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) # bit of a misnomer now, rss can search both tor and nzb rss feeds for tor in resultlist: tor_Title = formatter.latinToAscii(formatter.replace_all(tor['tor_title'], dictrepl)).strip() tor_Title = re.sub(r"\s\s+", " ", tor_Title) # remove extra whitespace tor_Author_match = fuzz.token_set_ratio(author, tor_Title) tor_Title_match = fuzz.token_set_ratio(title, tor_Title) logger.debug("RSS Author/Title Match: %s/%s for %s" %(tor_Author_match, tor_Title_match, tor_Title)) rejected = False for word in reject_list: if word in tor_Title.lower() and not word in author.lower() and not word in book.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (tor_Title, word)) break if (tor_Title_match >= match_ratio and tor_Author_match >= match_ratio and not rejected): logger.debug(u'Found RSS: %s' % tor['tor_title']) bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_feed = tor['tor_feed'] tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB' controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: # check if one of the other downloaders got there first if '.nzb' in tor_url: snatch = NZBDownloadMethod(bookid, tor_prov, tor_Title, tor_url) else: # http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398 if not tor_url.startswith('magnet'): # magnets don't use auth pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS'] auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH'] # don't know what form of password hash is required, try sha1 tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd)) snatch = TORDownloadMethod(bookid, tor_prov, tor_Title, tor_url) if snatch: notifiers.notify_snatch(formatter.latinToAscii(tor_Title) + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No RSS found for " + (book["authorName"] + ' ' + book['bookName']).strip()) return False
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": " ", "*": "", "(": "", ")": "", "[": "", "]": "", "#": "", "0": "", "1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "", "9": "", "'": "", ":": "", "!": "", "-": " ", "\s\s": " ", } # ' the ': ' ', ' a ': ' ', ' and ': ' ', # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": "", "*": "", ":": "", ";": "", "'": "", } match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) author = formatter.latinToAscii(formatter.replace_all(book["authorName"], dic)) title = formatter.latinToAscii(formatter.replace_all(book["bookName"], dic)) matches = [] for nzb in resultlist: nzb_Title = formatter.latinToAscii(formatter.replace_all(nzb["nzbtitle"], dictrepl)).strip() nzb_Title = re.sub(r"\s\s+", " ", nzb_Title) # remove extra whitespace nzbAuthor_match = fuzz.token_set_ratio(author, nzb_Title) nzbBook_match = fuzz.token_set_ratio(title, nzb_Title) logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzb_Title)) rejected = False for word in reject_list: if word in nzb_Title.lower() and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (nzb_Title, word)) break nzbsize_temp = nzb["nzbsize"] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = round(float(nzbsize_temp) / 1048576, 2) maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0) if maxsize and nzbsize > maxsize: rejected = True logger.debug("Rejecting %s, too large" % nzb_Title) if nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected: # logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype)) bookid = book["bookid"] nzbTitle = (author + " - " + title + " LL.(" + book["bookid"] + ")").strip() nzburl = nzb["nzburl"] nzbprov = nzb["nzbprov"] nzbdate_temp = nzb["nzbdate"] nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb["nzbmode"] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": nzbsize, "NZBtitle": nzbTitle, "NZBmode": nzbmode, "Status": "Skipped", } score = (nzbBook_match + nzbAuthor_match) / 2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(formatter.getList(nzb_Title)) words -= len(formatter.getList(author)) words -= len(formatter.getList(title)) score -= abs(words) matches.append([score, nzb_Title, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] logger.info(u"Best match NZB (%s%%): %s using %s search" % (score, nzb_Title, searchtype)) myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"] ).fetchone() if not snatchedbooks: if nzbmode == "torznab": snatch = TORDownloadMethod( newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], ) else: snatch = NZBDownloadMethod( newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], ) if snatch: notifiers.notify_snatch(newValueDict["NZBtitle"] + " at " + formatter.now()) common.schedule_job(action="Start", target="processDir") return True logger.debug( "No nzb's found for " + (book["authorName"] + " " + book["bookName"]).strip() + " using searchtype " + searchtype ) return False
def test_notify(self): return self._notifyTwitter("This is a test notification from LazyLibrarian / " + formatter.now(), force=True)
def downloadResult(match, book): """ match: best result from search providers book: book we are downloading (needed for reporting author name) return: 0 if failed to snatch 1 if already snatched 2 if we snatched it """ # noinspection PyBroadException try: myDB = database.DBConnection() newValueDict = match[1] controlValueDict = match[2] # It's possible to get book and wanted tables "Snatched" status out of sync # for example if a user marks a book as "Wanted" after a search task snatches it and before postprocessor runs # so check status in both tables here snatched = myDB.match( 'SELECT BookID from wanted WHERE BookID=? and AuxInfo=? and Status="Snatched"', (newValueDict["BookID"], newValueDict["AuxInfo"])) if snatched: logger.debug('%s %s %s already marked snatched in wanted table' % (newValueDict["AuxInfo"], book['authorName'], book['bookName'])) return 1 # someone else already found it if newValueDict["AuxInfo"] == 'eBook': snatched = myDB.match( 'SELECT BookID from books WHERE BookID=? and Status="Snatched"', (newValueDict["BookID"], )) else: snatched = myDB.match( 'SELECT BookID from books WHERE BookID=? and AudioStatus="Snatched"', (newValueDict["BookID"], )) if snatched: logger.debug('%s %s %s already marked snatched in book table' % (newValueDict["AuxInfo"], book['authorName'], book['bookName'])) return 1 # someone else already found it myDB.upsert("wanted", newValueDict, controlValueDict) if newValueDict['NZBmode'] == 'direct': snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], newValueDict["AuxInfo"]) elif newValueDict['NZBmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], newValueDict["AuxInfo"]) elif newValueDict['NZBmode'] == 'nzb': snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], newValueDict["AuxInfo"]) else: logger.error('Unhandled NZBmode [%s] for %s' % (newValueDict['NZBmode'], controlValueDict["NZBurl"])) snatch = 0 if snatch: logger.info('Downloading %s %s from %s' % (newValueDict["AuxInfo"], newValueDict["NZBtitle"], newValueDict["NZBprov"])) custom_notify_snatch( "%s %s" % (newValueDict["BookID"], newValueDict['AuxInfo'])) notify_snatch("%s %s from %s at %s" % (newValueDict["AuxInfo"], newValueDict["NZBtitle"], newValueDict["NZBprov"], now())) # at this point we could add NZBprov to the blocklist with a short timeout, a second or two? # This would implement a round-robin search system. Blocklist with an incremental counter. # If number of active providers == number blocklisted, so no unblocked providers are left, # either sleep for a while, or unblock the one with the lowest counter. scheduleJob(action='Start', target='processDir') return 2 # we found it return 0 except Exception: logger.error('Unhandled exception in downloadResult: %s' % traceback.format_exc()) return 0
def processDir(): # rename this thread threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) #TODO - try exception on os.listdir - it throws debug level #exception if dir doesn't exist - bloody hard to catch try: downloads = os.listdir(processpath) except OSError: logger.error('Could not access [%s] directory ' % processpath) myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if snatched is None: logger.info('No books are snatched. Nothing to process.') elif downloads is None: logger.info('No downloads are found. Nothing to process.') else: ppcount = 0 for book in snatched: if book['NZBtitle'] in downloads: pp_path = os.path.join(processpath, book['NZBtitle']) logger.info('Found folder %s.' % pp_path) data = myDB.select("SELECT * from books WHERE BookID='%s'" % book['BookID']) if data: for metadata in data: authorname = metadata['AuthorName'] authorimg = metadata['AuthorLink'] bookname = metadata['BookName'] bookdesc = metadata['BookDesc'] bookisbn = metadata['BookIsbn'] bookrate = metadata['BookRate'] bookimg = metadata['BookImg'] bookpage = metadata['BookPages'] booklink = metadata['BookLink'] bookdate = metadata['BookDate'] booklang = metadata['BookLang'] bookpub = metadata['BookPub'] #Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace( '$Author', authorname).replace('$Title', bookname) #dest_path = authorname+'/'+bookname global_name = lazylibrarian.EBOOK_DEST_FILE.replace( '$Author', authorname).replace('$Title', bookname) #global_name = bookname + ' - ' + authorname else: data = myDB.select( "SELECT * from magazines WHERE Title='%s'" % book['BookID']) for metadata in data: title = metadata['Title'] #AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple #files are downloading, there will be an error in post-processing, trying to go to the #same directory. dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', title) #dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace( '$IssueDate', book['AuxInfo']).replace('$Title', title) #global_name = book['AuxInfo']+' - '+title else: logger.info("Snatched NZB %s is not in download directory" % (book['NZBtitle'])) continue try: os.chmod( os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING), 0777) except Exception, e: logger.debug("Could not chmod post-process directory") dic = { '<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } dest_path = formatter.latinToAscii( formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: ppcount = ppcount + 1 # If you use auto add by Calibre you need the book in a single directory, not nested #So take the file you Copied/Moved to Dest_path and copy it to a Calibre auto add folder. if lazylibrarian.IMP_AUTOADD: processAutoAdd(dest_path) #update nzbs controlValueDict = {"NZBurl": book['NZBurl']} newValueDict = {"Status": "Processed"} myDB.upsert("wanted", newValueDict, controlValueDict) # try image if bookname is not None: processIMG(dest_path, bookimg, global_name) # try metadata processOPF(dest_path, authorname, bookname, bookisbn, book['BookID'], bookpub, bookdate, bookdesc, booklang, global_name) #update books controlValueDict = {"BookID": book['BookID']} newValueDict = {"Status": "Open"} myDB.upsert("books", newValueDict, controlValueDict) #update authors query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % authorname countbooks = myDB.action(query).fetchone() havebooks = int(countbooks[0]) controlValueDict = {"AuthorName": authorname} newValueDict = {"HaveBooks": havebooks} author_query = 'SELECT * FROM authors WHERE AuthorName="%s"' % authorname countauthor = myDB.action(author_query).fetchone() if countauthor: myDB.upsert("authors", newValueDict, controlValueDict) else: #update mags controlValueDict = {"Title": book['BookID']} newValueDict = {"IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) logger.info('Successfully processed: %s' % (global_name)) notifiers.notify_download(global_name + ' at ' + formatter.now()) else: logger.error( 'Postprocessing for %s has failed. Warning - AutoAdd will be repeated' % global_name) if ppcount: logger.debug('%s books are downloaded and processed.' % ppcount) else: logger.debug('No snatched books have been found')
def search_magazines(mags=None): # produce a list of magazines to search for, tor, nzb, torznab myDB = database.DBConnection() searchlist = [] threading.currentThread().name = "SEARCHMAGS" if mags is None: # backlog search searchmags = myDB.select('SELECT Title, Frequency, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title, Frequency, LastAcquired, IssueDate from magazines \ WHERE Title="%s" AND Status="Active"' % (magazine['bookid'])) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 1: logger.info('Searching for one magazine') else: logger.info('Searching for %i magazines' % len(searchmags)) for searchmag in searchmags: bookid = searchmag[0] searchterm = searchmag[0] frequency = searchmag[1] # last_acquired = searchmag[2] # issue_date = searchmag[3] dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchlist.append({"bookid": bookid, "searchterm": searchterm}) if searchlist == []: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] tor_resultlist = [] if lazylibrarian.USE_NZB: resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'mag') if not nproviders: logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers') if lazylibrarian.USE_TOR: tor_resultlist, nproviders = providers.IterateOverTorrentSites(book, 'mag') if not nproviders: logger.warn('No torrent providers are set. Check config for TORRENT providers') for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if not resultlist: logger.debug("Adding magazine %s to queue." % book['searchterm']) else: bad_regex = 0 bad_date = 0 old_date = 0 total_nzbs = 0 new_date = 0 to_snatch = 0 maglist = [] issues = [] for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb['bookid'] nzbtitle = (u'%s' % nzb['nzbtitle']) nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] if nzbsize_temp is None: # not all torrents returned by torznab have a size nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] checkifmag = myDB.select('SELECT * from magazines WHERE Title="%s"' % bookid) if checkifmag: for results in checkifmag: control_date = results['IssueDate'] frequency = results['Frequency'] # regex = results['Regex'] nzbtitle_formatted = nzbtitle.replace('.', ' ').replace('-', ' ').replace('/', ' ').replace( '+', ' ').replace('_', ' ').replace('(', '').replace(')', '').strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) # keyword_check = nzbtitle_formatted.replace(bookid, '') # remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb starts with magazine title, and ends with a date # eg The MagPI Issue 22 - July 2015 # do something like check left n words match title # then check last n words are a date name_match = 1 # assume name matches for now name_len = len(bookid_exploded) if len(nzbtitle_exploded) > name_len: # needs to be longer as it should include a date while name_len: name_len = name_len - 1 # fuzzy check on each word in the magazine name with any accents stripped # fuzz.ratio doesn't lowercase for us ratio = fuzz.ratio(common.remove_accents(nzbtitle_exploded[name_len].lower()), common.remove_accents(bookid_exploded[name_len].lower())) if ratio < 80: # hard coded fuzz ratio for now, works for close matches logger.debug("Magazine fuzz ratio failed [%d] [%s] [%s]" % ( ratio, bookid, nzbtitle_formatted)) name_match = 0 # name match failed if name_match: # some magazine torrent uploaders add their sig in [] or {} # Fortunately for us, they always seem to add it at the end # some magazine torrent titles are "magazine_name some_form_of_date pdf" # so strip all the trailing junk... while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \ nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() == 'pdf': nzbtitle_exploded.pop() # gotta love the function names if len(nzbtitle_exploded) > 1: # regexA = DD MonthName YYYY OR MonthName YYYY or nn MonthName YYYY regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] if regexA_year.isdigit(): if int(regexA_year) < 1900 or int(regexA_year) > 2100: regexA_year = 'Invalid' regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2] regexA_month = formatter.month2num(common.remove_accents(regexA_month_temp)) if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].zfill(2) if regexA_day.isdigit(): if int(regexA_day) > 31: # probably issue number nn regexA_day = '01' else: regexA_day = '01' # just MonthName YYYY else: regexA_day = '01' # monthly, or less frequent newdatish_regexA = regexA_year + regexA_month + regexA_day try: int(newdatish_regexA) newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day except: # regexB = MonthName DD YYYY regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3] regexB_month = formatter.month2num(common.remove_accents(regexB_month_temp)) newdatish_regexB = regexB_year + regexB_month + regexB_day try: int(newdatish_regexB) newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day except: # regexC = YYYY MM or YYYY MM DD or Issue nn YYYY # (can't get MM/DD if named Issue nn) newdatish_regexC = 'Invalid' # invalid unless works out otherwise regexC_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2] if regexC_temp.isdigit(): if int(regexC_temp) > 1900 and int(regexC_temp) < 2100: # YYYY MM or YYYY nn regexC_year = regexC_temp regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) regexC_day = '01' if regexC_month.isdigit(): # could be YYYY nn where nn is issue number if int(regexC_month) < 13: # if issue number > 12 date matching will fail newdatish_regexC = regexC_year + regexC_month + regexC_day else: regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3] if regexC_year.isdigit(): if int(regexC_year) > 1900 and int(regexC_year) < 2100: # YYYY MM DD or YYYY nn-nn regexC_month = regexC_temp.zfill(2) if int(regexC_month) < 13: # if issue number > 12 date matching will fail regexC_day = nzbtitle_exploded[len( nzbtitle_exploded) - 1].zfill(2) newdatish_regexC = regexC_year + regexC_month + regexC_day try: int(newdatish_regexC) newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day except: logger.debug('Magazine %s not in proper date format.' % nzbtitle_formatted) bad_date = bad_date + 1 # allow issues with good name but bad date to be included # so user can manually select them newdatish = "1970-01-01" # provide a fake date for bad-date issues # continue else: continue # Don't want to overwrite status = Skipped for NZBs that have been previously found wanted_status = myDB.select('SELECT * from wanted WHERE NZBtitle="%s"' % nzbtitle) if wanted_status: for results in wanted_status: status = results['Status'] else: status = "Skipped" controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": nzbdate, "NZBtitle": nzbtitle, "AuxInfo": newdatish, "Status": status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert("wanted", newValueDict, controlValueDict) if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc start_time = time.time() start_time -= 31 * 24 * 60 * 60 # number of seconds in 31 days control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = formatter.datecompare(newdatish, control_date) if comp_date > 0: # Should probably only upsert when downloaded and processed in case snatch fails # keep track of what we're going to download so we don't download dupes new_date = new_date + 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) to_snatch = to_snatch + 1 issues.append(issue) else: logger.debug('This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date = old_date + 1 else: logger.debug('Magazine [%s] does not completely match search term [%s].' % ( nzbtitle_formatted, bookid)) bad_regex = bad_regex + 1 logger.info('Found %s results for %s. %s are new, %s are old, %s fail date, %s fail name matching' % ( total_nzbs, bookid, new_date, old_date, bad_date, bad_regex)) logger.info("%s, %s issues to download" % (bookid, to_snatch)) for items in maglist: if items['nzbmode'] == "torznab": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) elif items['nzbmode'] == "torrent": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) else: snatch = NZBDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) if snatch: notifiers.notify_snatch(formatter.latinToAscii(items['nzbtitle']) + ' at ' + formatter.now()) postprocess.schedule_processor(action='Start') maglist = [] logger.info("Search for magazines complete")
def downloadResult(match, book): """ match: best result from search providers book: book we are downloading (needed for reporting author name) return: 0 if failed to snatch 1 if already snatched 2 if we snatched it """ # noinspection PyBroadException try: myDB = database.DBConnection() newValueDict = match[1] controlValueDict = match[2] # It's possible to get book and wanted tables "Snatched" status out of sync # for example if a user marks a book as "Wanted" after a search task snatches it and before postprocessor runs # so check status in both tables here snatched = myDB.match('SELECT BookID from wanted WHERE BookID=? and AuxInfo=? and Status="Snatched"', (newValueDict["BookID"], newValueDict["AuxInfo"])) if snatched: logger.debug('%s %s %s already marked snatched in wanted table' % (newValueDict["AuxInfo"], book['authorName'], book['bookName'])) return 1 # someone else already found it if newValueDict["AuxInfo"] == 'eBook': snatched = myDB.match('SELECT BookID from books WHERE BookID=? and Status="Snatched"', (newValueDict["BookID"],)) else: snatched = myDB.match('SELECT BookID from books WHERE BookID=? and AudioStatus="Snatched"', (newValueDict["BookID"],)) if snatched: logger.debug('%s %s %s already marked snatched in book table' % (newValueDict["AuxInfo"], book['authorName'], book['bookName'])) return 1 # someone else already found it myDB.upsert("wanted", newValueDict, controlValueDict) if newValueDict['NZBmode'] == 'direct': snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], newValueDict["AuxInfo"]) elif newValueDict['NZBmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], newValueDict["AuxInfo"]) elif newValueDict['NZBmode'] == 'nzb': snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], newValueDict["AuxInfo"]) else: logger.error('Unhandled NZBmode [%s] for %s' % (newValueDict['NZBmode'], controlValueDict["NZBurl"])) snatch = 0 if snatch: logger.info('Downloading %s %s from %s' % (newValueDict["AuxInfo"], newValueDict["NZBtitle"], newValueDict["NZBprov"])) custom_notify_snatch("%s %s" % (newValueDict["BookID"], newValueDict['AuxInfo'])) notify_snatch("%s %s from %s at %s" % (newValueDict["AuxInfo"], newValueDict["NZBtitle"], newValueDict["NZBprov"], now())) # at this point we could add NZBprov to the blocklist with a short timeout, a second or two? # This would implement a round-robin search system. Blocklist with an incremental counter. # If number of active providers == number blocklisted, so no unblocked providers are left, # either sleep for a while, or unblock the one with the lowest counter. scheduleJob(action='Start', target='processDir') return 2 # we found it return 0 except Exception: logger.error('Unhandled exception in downloadResult: %s' % traceback.format_exc()) return 0
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if mags is None: # backlog search searchmags = myDB.select('SELECT Title, LastAcquired, \ IssueDate from magazines WHERE Status="Active"' ) else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select( 'SELECT Title, LastAcquired, IssueDate from magazines \ WHERE Title="%s" AND Status="Active"' % (magazine['bookid'])) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored logger.debug(u"Removing old magazine search results") myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Title'] # frequency = searchmag[1] # last_acquired = searchmag[2] # issue_date = searchmag[3] dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '' } searchterm = unaccented_str(replace_all(searchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm}) if searchlist == []: logger.warn( 'There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] tor_resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: logger.warn( 'No nzb providers are set. Check config for NEWZNAB or TORZNAB providers' ) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag') if not nproviders: logger.warn( 'No torrent providers are set. Check config for TORRENT providers' ) if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders = IterateOverRSSSites(book, 'mag') if not nproviders: logger.warn( 'No rss providers are set. Check config for RSS providers') if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item[ 'tor_date'], # may be fake date as none returned from rss torrents, only rss nzb 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("Adding magazine %s to queue." % book['searchterm']) else: bad_regex = 0 bad_date = 0 old_date = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb['bookid'] nzbtitle = unaccented_str(nzb['nzbtitle']) nzbtitle = nzbtitle.replace('"', '').replace( "'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] if nzbsize_temp is None: # not all torrents returned by torznab have a size nzbsize_temp = 1000 nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] results = myDB.match( 'SELECT * from magazines WHERE Title="%s"' % bookid) if not results: logger.debug( 'Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_regex = bad_regex + 1 else: control_date = results['IssueDate'] reject_list = getList(results['Regex']) dic = { '.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '' } nzbtitle_formatted = replace_all(nzbtitle, dic).strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this # remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join( nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb starts with magazine title, and ends with a date # eg The MagPI Issue 22 - July 2015 # do something like check left n words match title # then check last n words are a date rejected = False if len(nzbtitle_exploded) > len( bookid_exploded ): # needs to be longer as it has to include a date # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz mag_title_match = fuzz.token_set_ratio( unaccented(bookid), unaccented(nzbtitle_formatted)) if mag_title_match < lazylibrarian.MATCH_RATIO: logger.debug(u"Magazine token set Match failed: " + str(mag_title_match) + "% for " + nzbtitle_formatted) rejected = True else: rejected = True if not rejected: already_failed = myDB.match( 'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % nzburl) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, already_failed['NZBprov'])) rejected = True if not rejected: lower_title = unaccented(nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break # maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0) # if maxsize and nzbsize > maxsize: # rejected = True # logger.debug("Rejecting %s, too large" % nzbtitle_formatted) if not rejected: # some magazine torrent uploaders add their sig in [] or {} # Fortunately for us, they always seem to add it at the end # also some magazine torrent titles are "magazine_name some_form_of_date pdf" # so strip all the trailing junk... while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \ nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() in ['pdf', 'true', 'truepdf']: nzbtitle_exploded.pop( ) # gotta love the function names # need at least one word magazine title and two date components if len(nzbtitle_exploded) > 2: # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY regexA_year = nzbtitle_exploded[ len(nzbtitle_exploded) - 1] regexA_month_temp = nzbtitle_exploded[ len(nzbtitle_exploded) - 2] regexA_month = month2num( unaccented(regexA_month_temp)) if not regexA_year.isdigit() or int( regexA_year) < 1900 or int( regexA_year) > 2100: regexA_year = 'fail' # force date failure # if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[ len(nzbtitle_exploded) - 3].rstrip(',').zfill(2) if regexA_day.isdigit(): if int(regexA_day ) > 31: # probably issue number nn regexA_day = '01' else: regexA_day = '01' # just MonthName YYYY # else: # regexA_day = '01' # monthly, or less frequent try: newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day # try to make sure the year/month/day are valid, exception if not # ie don't accept day > 31, or 30 in some months # also handles multiple date format named issues eg Jan 2014, 01 2014 # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day)) except ValueError: # regexB = MonthName DD YYYY or MonthName DD, YYYY regexB_year = nzbtitle_exploded[ len(nzbtitle_exploded) - 1] regexB_month_temp = nzbtitle_exploded[ len(nzbtitle_exploded) - 3] regexB_month = month2num( unaccented(regexB_month_temp)) regexB_day = nzbtitle_exploded[ len(nzbtitle_exploded) - 2].rstrip(',').zfill(2) if not regexB_year.isdigit() or int( regexB_year) < 1900 or int( regexB_year) > 2100: regexB_year = 'fail' try: newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date( int(regexB_year), int(regexB_month), int(regexB_day)) except ValueError: # regexC = YYYY MM or YYYY MM DD # (can't get MM/DD if named YYYY Issue nn) # First try YYYY MM regexC_year = nzbtitle_exploded[ len(nzbtitle_exploded) - 2] if regexC_year.isdigit( ) and int(regexC_year) > 1900 and int( regexC_year) < 2100: regexC_month = nzbtitle_exploded[ len(nzbtitle_exploded) - 1].zfill(2) regexC_day = '01' else: # try YYYY MM DD regexC_year = nzbtitle_exploded[ len(nzbtitle_exploded) - 3] regexC_month = 0 regexC_day = 0 if regexC_year.isdigit( ) and int(regexC_year) > 1900 and int( regexC_year) < 2100: regexC_month = nzbtitle_exploded[ len(nzbtitle_exploded) - 2].zfill(2) regexC_day = nzbtitle_exploded[ len(nzbtitle_exploded) - 1].zfill(2) else: regexC_year = 'fail' try: newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date( int(regexC_year), int(regexC_month), int(regexC_day)) except Exception: # regexD Issue/No/Vol nn, YYYY or Issue/No/Vol nn try: IssueLabel = nzbtitle_exploded[ len(nzbtitle_exploded) - 2] if IssueLabel.lower() in [ "issue", "no", "vol" ]: # issue nn regexD_issue = nzbtitle_exploded[ len(nzbtitle_exploded) - 1] if regexD_issue.isdigit(): newdatish = str( int(regexD_issue) ) # 4 == 04 == 004 else: IssueLabel = nzbtitle_exploded[ len(nzbtitle_exploded) - 3] if IssueLabel.lower() in [ "issue", "no", "vol" ]: # issue nn, YYYY regexD_issue = nzbtitle_exploded[ len(nzbtitle_exploded) - 2] regexD_issue = regexD_issue.strip( ',') if regexD_issue.isdigit(): newdatish = str( int(regexD_issue) ) # 4 == 04 == 004 else: raise ValueError regexD_year = nzbtitle_exploded[ len(nzbtitle_exploded) - 1] if regexD_year.isdigit(): if int( regexD_year ) < int(datetime.date. today().year): newdatish = 0 # it's old else: raise ValueError except Exception: logger.debug( 'Magazine %s not in proper date format.' % nzbtitle_formatted) bad_date = bad_date + 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues # continue else: logger.debug( 'Magazine [%s] does not match the search term [%s].' % (nzbtitle_formatted, bookid)) bad_regex = bad_regex + 1 continue # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" insert_table = "pastissues" insert_status = "Skipped" if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # or for magazines with only an issue number, use zero if '-' in str(newdatish): start_time = time.time() start_time -= 31 * 24 * 60 * 60 # number of seconds in 31 days control_date = time.strftime( "%Y-%m-%d", time.localtime(start_time)) else: control_date = 0 if '-' in str(control_date) and '-' in str(newdatish): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare(newdatish, control_date) elif '-' not in str(control_date) and '-' not in str( newdatish): # for issue numbers, check if later than last one we have comp_date = int(newdatish) - int(control_date) newdatish = "%s" % newdatish newdatish = newdatish.zfill( 4) # pad so we sort correctly else: # invalid comparison of date and issue number logger.debug( 'Magazine %s incorrect date or issue format.' % nzbtitle_formatted) bad_date = bad_date + 1 newdatish = "1970-01-01" # this is our fake date for ones we can't decipher comp_date = 0 if comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date = new_date + 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug( 'This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) insert_table = "wanted" insert_status = "Wanted" nzbdate = now() # when we asked for it else: logger.debug( 'This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug( 'This issue of %s is old; skipping.' % nzbtitle_formatted) old_date = old_date + 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.select( 'SELECT * from %s WHERE NZBtitle="%s" and NZBprov="%s"' % (insert_table, nzbtitle, nzbprov)) if not mag_entry: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": newdatish, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) else: # logger.debug('Magazine [%s] was rejected.' % nzbtitle_formatted) bad_regex = bad_regex + 1 logger.info( 'Found %i result%s for %s. %i new, %i old, %i fail date, %i fail name: %i to download' % (total_nzbs, plural(total_nzbs), bookid, new_date, old_date, bad_date, bad_regex, len(maglist))) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod(magazine['bookid'], magazine['nzbprov'], magazine['nzbtitle'], magazine['nzburl']) else: snatch = NZBDownloadMethod(magazine['bookid'], magazine['nzbprov'], magazine['nzbtitle'], magazine['nzburl']) if snatch: logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) notify_snatch("%s from %s at %s" % (unaccented( magazine['nzbtitle']), magazine["nzbprov"], now())) scheduleJob(action='Start', target='processDir') maglist = [] if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete")
def findBestResult(resultlist, book, searchtype, source): """ resultlist: collated results from search providers book: the book we want to find searchtype: book, magazine, shortbook, audiobook etc. source: nzb, tor, rss, direct return: highest scoring match, or None if no match """ # noinspection PyBroadException try: myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '.', ';': '', '\'': ''} if source == 'rss': author, title = get_searchterm(book, searchtype) else: author = unaccented_str(replace_all(book['authorName'], dic)) title = unaccented_str(replace_all(book['bookName'], dic)) if book['library'] == 'AudioBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO']) maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0) minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0) auxinfo = 'AudioBook' else: # elif book['library'] == 'eBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS']) maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0) minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0) auxinfo = 'eBook' if source == 'nzb': prefix = 'nzb' else: # rss and libgen return same names as torrents prefix = 'tor_' logger.debug('Searching %s %s results for best %s match' % (len(resultlist), source, auxinfo)) matches = [] for res in resultlist: resultTitle = unaccented_str(replace_all(res[prefix + 'title'], dictrepl)).strip() resultTitle = re.sub(r"\s\s+", " ", resultTitle) # remove extra whitespace Author_match = fuzz.token_set_ratio(author, resultTitle) Book_match = fuzz.token_set_ratio(title, resultTitle) if lazylibrarian.LOGLEVEL & lazylibrarian.log_fuzz: logger.debug("%s author/book Match: %s/%s %s at %s" % (source.upper(), Author_match, Book_match, resultTitle, res[prefix + 'prov'])) rejected = False url = res[prefix + 'url'] if url is None: rejected = True logger.debug("Rejecting %s, no URL found" % resultTitle) if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']: already_failed = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (url,)) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov'])) rejected = True if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']: already_failed = myDB.match('SELECT * from wanted WHERE NZBurl=?', (url,)) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov'])) rejected = True if not rejected and not url.startswith('http') and not url.startswith('magnet'): rejected = True logger.debug("Rejecting %s, invalid URL [%s]" % (resultTitle, url)) if not rejected: for word in reject_list: if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \ and word not in getList(title.lower()): rejected = True logger.debug("Rejecting %s, contains %s" % (resultTitle, word)) break size_temp = check_int(res[prefix + 'size'], 1000) # Need to cater for when this is NONE (Issue 35) size = round(float(size_temp) / 1048576, 2) if not rejected and maxsize and size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % resultTitle) if not rejected and minsize and size < minsize: rejected = True logger.debug("Rejecting %s, too small" % resultTitle) if not rejected: bookid = book['bookid'] # newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() # newTitle = resultTitle + ' LL.(' + book['bookid'] + ')' if source == 'nzb': mode = res['nzbmode'] # nzb, torznab else: mode = res['tor_type'] # torrent, magnet, nzb(from rss), direct controlValueDict = {"NZBurl": url} newValueDict = { "NZBprov": res[prefix + 'prov'], "BookID": bookid, "NZBdate": now(), # when we asked for it "NZBsize": size, "NZBtitle": resultTitle, "NZBmode": mode, "AuxInfo": auxinfo, "Status": "Skipped" } score = (Book_match + Author_match) / 2 # as a percentage # lose a point for each unwanted word in the title so we get the closest match # but for RSS ignore anything at the end in square braces [keywords, genres etc] if source == 'rss': wordlist = getList(resultTitle.rsplit('[', 1)[0].lower()) else: wordlist = getList(resultTitle.lower()) words = [x for x in wordlist if x not in getList(author.lower())] words = [x for x in words if x not in getList(title.lower())] typelist = '' if newValueDict['AuxInfo'] == 'eBook': words = [x for x in words if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE'])] typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) elif newValueDict['AuxInfo'] == 'AudioBook': words = [x for x in words if x not in getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])] typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE']) score -= len(words) # prioritise titles that include the ebook types we want # add more points for booktypes nearer the left in the list # eg if epub, mobi, pdf add 3 points if epub found, 2 for mobi, 1 for pdf booktypes = [x for x in wordlist if x in typelist] if booktypes: typelist = list(reversed(typelist)) for item in booktypes: for i in [i for i, x in enumerate(typelist) if x == item]: score += i + 1 matches.append([score, newValueDict, controlValueDict, res['priority']]) if matches: highest = max(matches, key=lambda s: (s[0], s[3])) score = highest[0] newValueDict = highest[1] # controlValueDict = highest[2] dlpriority = highest[3] if score < int(lazylibrarian.CONFIG['MATCH_RATIO']): logger.info('Nearest match (%s%%): %s using %s search for %s %s' % (score, newValueDict['NZBtitle'], searchtype, book['authorName'], book['bookName'])) else: logger.info('Best match (%s%%): %s using %s search, %s priority %s' % (score, newValueDict['NZBtitle'], searchtype, newValueDict['NZBprov'], dlpriority)) return highest else: logger.debug("No %s found for [%s] using searchtype %s" % (source, book["searchterm"], searchtype)) return None except Exception: logger.error('Unhandled exception in findBestResult: %s' % traceback.format_exc())
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = getList(lazylibrarian.REJECT_WORDS) author = unaccented_str(replace_all(book['authorName'], dic)) title = unaccented_str(replace_all(book['bookName'], dic)) matches = [] for tor in resultlist: torTitle = unaccented_str(tor['tor_title']) torTitle = replace_all(torTitle, dictrepl).strip() torTitle = re.sub(r"\s\s+", " ", torTitle) # remove extra whitespace torAuthor_match = fuzz.token_set_ratio(author, torTitle) torBook_match = fuzz.token_set_ratio(title, torTitle) logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle)) tor_url = tor['tor_url'] rejected = False already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % tor_url) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov'])) rejected = True if not rejected: for word in reject_list: if word in torTitle.lower() and word not in author.lower() and word not in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (torTitle, word)) break tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) tor_size_temp = check_int(tor_size_temp, 1000) tor_size = round(float(tor_size_temp) / 1048576, 2) maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0) if not rejected: if maxsize and tor_size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % torTitle) if not rejected: bookid = book['bookid'] tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor['tor_prov'], "BookID": bookid, "NZBdate": now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } score = (torBook_match + torAuthor_match) / 2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(getList(torTitle)) words -= len(getList(author)) words -= len(getList(title)) score -= abs(words) matches.append([score, torTitle, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] if score < match_ratio: logger.info(u'Nearest TOR match (%s%%): %s using %s search for %s %s' % (score, nzb_Title, searchtype, author, title)) return False logger.info(u'Best TOR match (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) snatchedbooks = myDB.match('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]) if snatchedbooks: logger.debug('%s already marked snatched' % nzb_Title) return True # someone else found it, not us else: myDB.upsert("wanted", newValueDict, controlValueDict) if newValueDict["NZBprov"] == 'libgen': # for libgen we use direct download links snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], nzb_Title) else: snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) if snatch: logger.info('Downloading %s from %s' % (newValueDict["NZBtitle"], newValueDict["NZBprov"])) notify_snatch("%s from %s at %s" % (newValueDict["NZBtitle"], newValueDict["NZBprov"], now())) scheduleJob(action='Start', target='processDir') return True + True # we found it else: logger.debug("No torrent's found for [%s] using searchtype %s" % (book["searchterm"], searchtype)) return False
def search_nzb_book(books=None, mags=None): if not (lazylibrarian.USE_NZB): return # rename this thread threading.currentThread().name = "SEARCHNZBBOOKS" myDB = database.DBConnection() searchlist = [] searchlist1 = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') # Clear cache if os.path.exists(".ProviderCache"): for f in os.listdir(".ProviderCache"): os.unlink("%s/%s" % (".ProviderCache", f)) # Clearing throttling timeouts t = SimpleCache.ThrottlingProcessor() t.lastRequestTime.clear() else: # The user has added a new book searchbooks = [] if books != False: for book in books: searchbook = myDB.select( 'SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book["bookid"]], ) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": "", "*": "", ":": "", ";": "", } dicSearchFormatting = {".": " +", " + ": " "} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) searchterm = author + " " + book # + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub("[\.\-\/]", " ", searchterm).encode("utf-8") searchterm = re.sub(r"\(.*?\)", "", searchterm).encode("utf-8") searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append( {"bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip()} ) if not lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE: logger.info("No download method is set, use SABnzbd or blackhole") # TODO - Move the newznab test to providers.py if not lazylibrarian.NEWZNAB and not lazylibrarian.NEWZNAB2 and not lazylibrarian.USENETCRAWLER: logger.info("No providers are set. try use NEWZNAB.") counter = 0 for book in searchlist: # print book.keys() resultlist = providers.IterateOverNewzNabSites(book, "book") # if you can't find teh book specifically, you might find under general search if not resultlist: logger.info("Searching for type book failed to find any books...moving to general search") resultlist = providers.IterateOverNewzNabSites(book, "general") if not resultlist: logger.debug("Adding book %s to queue." % book["searchterm"]) else: dictrepl = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": "", "*": "", "(": "", ")": "", "[": "", "]": "", "#": "", "0": "", "1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "", "9": "", "'": "", ":": "", "!": "", "-": "", "\s\s": " ", " the ": " ", " a ": " ", " and ": " ", " to ": " ", " of ": " ", " for ": " ", " my ": " ", " in ": " ", " at ": " ", " with ": " ", } logger.debug(u"searchterm %s" % book["searchterm"]) addedCounter = 0 for nzb in resultlist: nzbTitle = formatter.latinToAscii(formatter.replace_all(str(nzb["nzbtitle"]).lower(), dictrepl)).strip() nzbTitle = re.sub(r"\s\s+", " ", nzbTitle) # remove extra whitespace logger.debug(u"nzbName %s" % nzbTitle) match_ratio = int(lazylibrarian.MATCH_RATIO) nzbTitle_match = fuzz.token_sort_ratio(book["searchterm"].lower(), nzbTitle) logger.debug("NZB Title Match %: " + str(nzbTitle_match)) if nzbTitle_match > match_ratio: logger.info(u"Found NZB: %s" % nzb["nzbtitle"]) addedCounter = addedCounter + 1 bookid = book["bookid"] nzbTitle = (book["authorName"] + " - " + book["bookName"] + " LL.(" + book["bookid"] + ")").strip() nzburl = nzb["nzburl"] nzbprov = nzb["nzbprov"] nzbdate_temp = nzb["nzbdate"] nzbsize_temp = nzb["nzbsize"] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + " MB" nzbdate = formatter.nzbdate2format(nzbdate_temp) controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": nzbdate, "NZBsize": nzbsize, "NZBtitle": nzbTitle, "Status": "Skipped", } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid] ).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, nzbprov, nzbTitle, nzburl) notifiers.notify_snatch(nzbTitle + " at " + formatter.now()) break if addedCounter == 0: logger.info( "No nzb's found for " + (book["authorName"] + " " + book["bookName"]).strip() + ". Adding book to queue." ) counter = counter + 1 if not books or books == False: snatched = searchmag.searchmagazines(mags) for items in snatched: snatch = DownloadMethod(items["bookid"], items["nzbprov"], items["nzbtitle"], items["nzburl"]) notifiers.notify_snatch(items["nzbtitle"] + " at " + formatter.now()) logger.info("Search for Wanted items complete")
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if mags is None: threading.currentThread().name = "SEARCHALLMAG" else: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if mags is None: # backlog search searchmags = myDB.select('SELECT Title, Regex, DateType, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \ WHERE Title=? AND Status="Active"', (magazine['bookid'],)) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: threading.currentThread().name = "WEBSERVER" return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored # logger.debug("Removing old magazine search results") # myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Regex'] datetype = searchmag['DateType'] if not datetype: datetype = '' if not searchterm: dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} # strip accents from the magazine title for easier name-matching searchterm = unaccented_str(searchmag['Title']) if not searchterm: # unless there are no ascii characters left searchterm = searchmag['Title'] searchterm = replace_all(searchterm, dic) searchterm = re.sub('[.\-/]', ' ', searchterm) if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm, "datetype": datetype}) if not searchlist: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn('No nzb providers are available. Check config and blocklist') lazylibrarian.NO_NZB_MSG = timenow if lazylibrarian.USE_DIRECT(): dir_resultlist, nproviders = IterateOverDirectSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn('No direct providers are available. Check config and blocklist') lazylibrarian.NO_DIRECT_MSG = timenow if dir_resultlist: for item in dir_resultlist: # reformat the results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn('No tor providers are available. Check config and blocklist') lazylibrarian.NO_TOR_MSG = timenow if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders = IterateOverRSSSites() if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow: logger.warn('No rss providers are available. Check config and blocklist') lazylibrarian.NO_RSS_MSG = timenow if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item['tor_date'], # may be fake date as none returned from rss torrents, only rss nzb 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("No results for magazine %s" % book['searchterm']) else: bad_name = 0 bad_date = 0 old_date = 0 rejects = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] bookid = '' for nzb in resultlist: total_nzbs += 1 bookid = nzb['bookid'] # strip accents from the magazine title for easier name-matching nzbtitle = unaccented_str(nzb['nzbtitle']) if not nzbtitle: # unless it's not a latin-1 encodable name nzbtitle = nzb['nzbtitle'] nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize_temp = check_int(nzbsize_temp, 1000) # not all torrents returned by torznab have a size nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) so split into "words" dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ', '#': '# '} nzbtitle_formatted = replace_all(nzbtitle, dic).strip() # remove extra spaces if they're in a row nzbtitle_formatted = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_formatted.split(' ') results = myDB.match('SELECT * from magazines WHERE Title=?', (bookid,)) if not results: logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_name += 1 else: rejected = False maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0) if maxsize and nzbsize > maxsize: logger.debug("Rejecting %s, too large" % nzbtitle) rejected = True if not rejected: minsize = check_int(lazylibrarian.CONFIG['REJECT_MAGMIN'], 0) if minsize and nzbsize < minsize: logger.debug("Rejecting %s, too small" % nzbtitle) rejected = True if not rejected: if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # Check nzb has magazine title and a date/issue nr # eg The MagPI July 2015 if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check all the words in the mag title are in the nzbtitle rejected = False wlist = [] for word in nzbtitle_exploded: word = unaccented(word).lower() if word: wlist.append(word) for word in bookid_exploded: word = unaccented(word).lower() if word and word not in wlist: logger.debug("Rejecting %s, missing %s" % (nzbtitle, word)) rejected = True break if rejected: logger.debug( "Magazine title match failed " + bookid + " for " + nzbtitle_formatted) else: logger.debug( "Magazine title matched " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded)) rejected = True if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']: blocked = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl,)) if blocked: logger.debug("Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']: blocked = myDB.match('SELECT * from wanted WHERE NZBurl=?', (nzburl,)) if blocked: logger.debug("Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected: reject_list = getList(str(results['Reject']).lower()) reject_list += getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',') lower_title = unaccented(nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() if reject_list: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('Reject: %s' % str(reject_list)) logger.debug('Title: %s' % lower_title) logger.debug('Bookid: %s' % lower_bookid) for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if rejected: rejects += 1 else: regex_pass, issuedate, year = get_issue_date(nzbtitle_exploded) if regex_pass: logger.debug('Issue %s (regex %s) for %s ' % (issuedate, regex_pass, nzbtitle_formatted)) datetype_ok = True datetype = book['datetype'] if datetype: # check all wanted parts are in the regex result # Day Month Year Vol Iss (MM needs two months) if 'M' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 12]: datetype_ok = False elif 'D' in datetype and regex_pass not in [3, 5, 6]: datetype_ok = False elif 'MM' in datetype and regex_pass not in [1]: # bi monthly datetype_ok = False elif 'V' in datetype and 'I' in datetype and regex_pass not in [8, 9, 17, 18]: datetype_ok = False elif 'V' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 17, 18]: datetype_ok = False elif 'I' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 16, 17, 18]: datetype_ok = False elif 'Y' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 15, 16, 18]: datetype_ok = False else: datetype_ok = False logger.debug('Magazine %s not in a recognised date format.' % nzbtitle_formatted) bad_date += 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers issuedate = "1970-01-01" # provide a fake date for bad-date issues # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" or "Have" insert_table = "pastissues" comp_date = 0 if datetype_ok: control_date = results['IssueDate'] logger.debug("Control date: [%s]" % control_date) if not control_date: # we haven't got any copies of this magazine yet # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # For magazines with only an issue number use zero as we can't tell age if str(issuedate).isdigit(): logger.debug('Magazine comparing issue numbers (%s)' % issuedate) control_date = 0 elif re.match('\d+-\d\d-\d\d', str(issuedate)): start_time = time.time() start_time -= int( lazylibrarian.CONFIG['MAG_AGE']) * 24 * 60 * 60 # number of seconds in days if start_time < 0: # limit of unixtime (1st Jan 1970) start_time = 0 control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) logger.debug('Magazine date comparing to %s' % control_date) else: logger.debug('Magazine unable to find comparison type [%s]' % issuedate) control_date = 0 if str(control_date).isdigit() and str(issuedate).isdigit(): # for issue numbers, check if later than last one we have if regex_pass in [10, 12, 13] and year: issuedate = "%s%04d" % (year, int(issuedate)) else: issuedate = str(issuedate).zfill(4) if not control_date: comp_date = 1 else: comp_date = int(issuedate) - int(control_date) elif re.match('\d+-\d\d-\d\d', str(control_date)) and \ re.match('\d+-\d\d-\d\d', str(issuedate)): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare(issuedate, control_date) else: # invalid comparison of date and issue number comp_date = 0 if re.match('\d+-\d\d-\d\d', str(control_date)): if regex_pass > 9 and year: # we assumed it was an issue number, but it could be a date year = check_int(year, 0) if regex_pass in [10, 12, 13]: issuedate = int(issuedate[:4]) issuenum = check_int(issuedate, 0) if year and 1 <= issuenum <= 12: issuedate = "%04d-%02d-01" % (year, issuenum) comp_date = datecompare(issuedate, control_date) if not comp_date: logger.debug('Magazine %s failed: Expecting a date' % nzbtitle_formatted) else: logger.debug('Magazine %s failed: Expecting issue number' % nzbtitle_formatted) if not comp_date: bad_date += 1 issuedate = "1970-01-01" if issuedate == "1970-01-01": logger.debug('This issue of %s is unknown age; skipping.' % nzbtitle_formatted) elif not datetype_ok: logger.debug('This issue of %s not in a wanted date format.' % nzbtitle_formatted) elif comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date += 1 issue = bookid + ',' + issuedate if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) logger.debug('Magazine request number %s' % len(issues)) if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug(str(issues)) insert_table = "wanted" nzbdate = now() # when we asked for it else: logger.debug('This issue of %s is already flagged for download' % issue) else: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date += 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.match('SELECT Status from %s WHERE NZBtitle=? and NZBprov=?' % insert_table, (nzbtitle, nzbprov)) if mag_entry: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('%s is already in %s marked %s' % (nzbtitle, insert_table, mag_entry['Status'])) else: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } if insert_table == 'pastissues': # try to mark ones we've already got match = myDB.match("SELECT * from issues WHERE Title=? AND IssueDate=?", (bookid, issuedate)) if match: insert_status = "Have" else: insert_status = "Skipped" else: insert_status = "Wanted" newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": issuedate, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status)) msg = 'Found %i result%s for %s. %i new,' % (total_nzbs, plural(total_nzbs), bookid, new_date) msg += ' %i old, %i fail date, %i fail name,' % (old_date, bad_date, bad_name) msg += ' %i rejected: %i to download' % (rejects, len(maglist)) logger.info(msg) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch, res = TORDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'Magazine') elif magazine['nzbmode'] == 'direct': snatch, res = DirectDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'Magazine') elif magazine['nzbmode'] == 'nzb': snatch, res = NZBDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'Magazine') else: res = 'Unhandled NZBmode [%s] for %s' % (magazine['nzbmode'], magazine["nzburl"]) logger.error(res) snatch = 0 if snatch: logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) custom_notify_snatch("%s %s" % (magazine['bookid'], magazine['nzburl'])) notify_snatch("Magazine %s from %s at %s" % (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now())) scheduleJob(action='Start', target='PostProcessor') else: myDB.action('UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?', (res, magazine["nzburl"])) if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete") except Exception: logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: TORDownloadMethod(bookid, tor_prov, tor_Title, tor_url) notifiers.notify_snatch(formatter.latinToAscii(tor_Title) + ' at ' + formatter.now()) break if addedCounter == 0: logger.debug("No torrent's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + ". Adding book to queue.") counter = counter + 1 logger.info("TORSearch for Wanted items complete") def TORDownloadMethod(bookid=None, tor_prov=None, tor_title=None, tor_url=None): myDB = database.DBConnection() download = False full_url = tor_url # keep the url as stored in "wanted" table if (lazylibrarian.USE_TOR) and (lazylibrarian.TOR_DOWNLOADER_DELUGE or lazylibrarian.TOR_DOWNLOADER_UTORRENT or lazylibrarian.TOR_DOWNLOADER_BLACKHOLE or lazylibrarian.TOR_DOWNLOADER_TRANSMISSION): if tor_url.startswith('magnet'):
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if mags is None: threading.currentThread().name = "SEARCHALLMAG" else: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if mags is None: # backlog search searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \ IssueDate from magazines WHERE Status="Active"' ) else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select( 'SELECT Title, Regex, LastAcquired, IssueDate from magazines \ WHERE Title=? AND Status="Active"', (magazine['bookid'], )) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: threading.currentThread().name = "WEBSERVER" return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored logger.debug("Removing old magazine search results") myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Regex'] if not searchterm: dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '' } # strip accents from the magazine title for easier name-matching searchterm = unaccented_str(searchmag['Title']) if not searchterm: # unless there are no ascii characters left searchterm = searchmag['Title'] searchterm = replace_all(searchterm, dic) searchterm = re.sub('[.\-/]', ' ', searchterm) searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm}) if not searchlist: logger.warn( 'There is nothing to search for. Mark some magazines as active.' ) for book in searchlist: resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow if lazylibrarian.USE_DIRECT(): dir_resultlist, nproviders = IterateOverDirectSites( book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn( 'No direct providers are available. Check config and blocklist' ) lazylibrarian.NO_DIRECT_MSG = timenow if dir_resultlist: for item in dir_resultlist: # reformat the results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites( book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn( 'No tor providers are available. Check config and blocklist' ) lazylibrarian.NO_TOR_MSG = timenow if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders = IterateOverRSSSites() if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow: logger.warn( 'No rss providers are available. Check config and blocklist' ) lazylibrarian.NO_RSS_MSG = timenow if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item[ 'tor_date'], # may be fake date as none returned from rss torrents, only rss nzb 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("No results for magazine %s" % book['searchterm']) else: bad_name = 0 bad_date = 0 old_date = 0 rejects = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] bookid = '' for nzb in resultlist: total_nzbs += 1 bookid = nzb['bookid'] # strip accents from the magazine title for easier name-matching nzbtitle = unaccented_str(nzb['nzbtitle']) if not nzbtitle: # unless it's not a latin-1 encodable name nzbtitle = nzb['nzbtitle'] nzbtitle = nzbtitle.replace('"', '').replace( "'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize_temp = check_int( nzbsize_temp, 1000 ) # not all torrents returned by torznab have a size nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] results = myDB.match( 'SELECT * from magazines WHERE Title=?', (bookid, )) if not results: logger.debug( 'Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_name += 1 else: rejected = False maxsize = check_int( lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0) if maxsize and nzbsize > maxsize: logger.debug("Rejecting %s, too large" % nzbtitle) rejected = True if not rejected: minsize = check_int( lazylibrarian.CONFIG['REJECT_MAGMIN'], 0) if minsize and nzbsize < minsize: logger.debug("Rejecting %s, too small" % nzbtitle) rejected = True if not rejected: dic = { '.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '' } nzbtitle_formatted = replace_all(nzbtitle, dic).strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) # remove extra spaces if they're in a row if nzbtitle_formatted and nzbtitle_formatted[ 0] == '[' and nzbtitle_formatted[-1] == ']': nzbtitle_formatted = nzbtitle_formatted[1:-1] nzbtitle_exploded_temp = " ".join( nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split( ' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb has magazine title and a date/issue nr # eg The MagPI July 2015 if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check all the words in the mag title are in the nzbtitle rejected = False wlist = [] for word in nzbtitle_exploded: wlist.append(unaccented(word).lower()) for word in bookid_exploded: if unaccented(word).lower() not in wlist: rejected = True break if rejected: logger.debug( u"Magazine title match failed " + bookid + " for " + nzbtitle_formatted) else: logger.debug(u"Magazine matched " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded)) rejected = True if not rejected: blocked = myDB.match( 'SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl, )) if blocked: logger.debug( "Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected: reject_list = getList( str(results['Reject']).lower()) reject_list += getList( lazylibrarian.CONFIG['REJECT_MAGS']) lower_title = unaccented( nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() if reject_list: if lazylibrarian.LOGLEVEL > 2: logger.debug('Reject: %s' % str(reject_list)) logger.debug('Title: %s' % lower_title) logger.debug('Bookid: %s' % lower_bookid) for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break regex_pass = 0 if not rejected: # Magazine names have many different styles of date # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY # MonthName DD YYYY or MonthName DD, YYYY # YYYY MM or YYYY MM DD # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn # nn YYYY issue number without "Nr" before it # issue and year as a single 6 digit string eg 222015 newdatish = "none" # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and pos: month = month2num(nzbtitle_exploded[pos - 1]) if month: if pos - 1: day = check_int( nzbtitle_exploded[pos - 2], 1) if day > 31: # probably issue number nn day = 1 else: day = 1 newdatish = "%04d-%02d-%02d" % ( year, month, day) try: _ = datetime.date(year, month, day) regex_pass = 1 break except ValueError: regex_pass = 0 pos += 1 # MonthName DD YYYY or MonthName DD, YYYY if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and (pos - 1): month = month2num( nzbtitle_exploded[pos - 2]) if month: day = check_int( nzbtitle_exploded[ pos - 1].rstrip(','), 1) try: _ = datetime.date( year, month, day) newdatish = "%04d-%02d-%02d" % ( year, month, day) regex_pass = 2 break except ValueError: regex_pass = 0 pos += 1 # YYYY MM or YYYY MM DD if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and pos + 1 < len( nzbtitle_exploded): month = check_int( nzbtitle_exploded[pos + 1], 0) if month: if pos + 2 < len( nzbtitle_exploded): day = check_int( nzbtitle_exploded[pos + 2], 1) else: day = 1 try: _ = datetime.date( year, month, day) newdatish = "%04d-%02d-%02d" % ( year, month, day) regex_pass = 3 break except ValueError: regex_pass = 0 pos += 1 # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): if nzbtitle_exploded[pos].lower() in [ "issue", "no", "nr", "vol" ]: if pos + 1 < len(nzbtitle_exploded): issue = check_int( nzbtitle_exploded[pos + 1], 0) if issue: newdatish = str( issue) # 4 == 04 == 004 if pos + 2 < len( nzbtitle_exploded): year = check_year( nzbtitle_exploded[pos + 2]) if year and year < int( datetime.date. today().year): newdatish = '0' # it's old regex_pass = 4 # Issue/No/Nr/Vol nn, YYYY else: regex_pass = 5 # Issue/No/Nr/Vol nn break pos += 1 # nn YYYY issue number without "Nr" before it if not regex_pass: pos = 1 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year: issue = check_int( nzbtitle_exploded[pos - 1], 0) if issue: newdatish = str( issue) # 4 == 04 == 004 regex_pass = 6 if year < int(datetime.date.today( ).year): newdatish = '0' # it's old break pos += 1 # issue and year as a single 6 digit string eg 222015 if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): issue = nzbtitle_exploded[pos] if issue.isdigit() and len(issue) == 6: year = int(issue[2:]) issue = int(issue[:2]) newdatish = str( issue) # 4 == 04 == 004 regex_pass = 7 if year < int( datetime.date.today().year): newdatish = '0' # it's old break pos += 1 if not regex_pass: logger.debug( 'Magazine %s not in a recognised date format.' % nzbtitle_formatted) bad_date += 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues regex_pass = 99 if rejected: rejects += 1 else: if lazylibrarian.LOGLEVEL > 2: logger.debug("regex %s [%s] %s" % (regex_pass, nzbtitle_formatted, newdatish)) # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" insert_table = "pastissues" insert_status = "Skipped" control_date = results['IssueDate'] if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # or for magazines with only an issue number, use zero if str(newdatish).isdigit(): logger.debug( 'Magazine comparing issue numbers (%s)' % newdatish) control_date = 0 elif re.match('\d+-\d\d-\d\d', str(newdatish)): start_time = time.time() start_time -= int( lazylibrarian.CONFIG['MAG_AGE'] ) * 24 * 60 * 60 # number of seconds in days if start_time < 0: # limit of unixtime (1st Jan 1970) start_time = 0 control_date = time.strftime( "%Y-%m-%d", time.localtime(start_time)) logger.debug( 'Magazine date comparing to %s' % control_date) else: logger.debug( 'Magazine unable to find comparison type [%s]' % newdatish) control_date = 0 if str(control_date).isdigit() and str( newdatish).isdigit(): # for issue numbers, check if later than last one we have comp_date = int(newdatish) - int(control_date) newdatish = "%s" % newdatish newdatish = newdatish.zfill( 4) # pad so we sort correctly elif re.match('\d+-\d\d-\d\d', str(control_date)) and \ re.match('\d+-\d\d-\d\d', str(newdatish)): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare( newdatish, control_date) else: # invalid comparison of date and issue number if re.match('\d+-\d\d-\d\d', str(control_date)): logger.debug( 'Magazine %s failed: Expecting a date' % nzbtitle_formatted) else: logger.debug( 'Magazine %s failed: Expecting issue number' % nzbtitle_formatted) bad_date += 1 newdatish = "1970-01-01" # this is our fake date for ones we can't decipher comp_date = 0 if comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date += 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug( 'This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) logger.debug('Magazine request number %s' % len(issues)) if lazylibrarian.LOGLEVEL > 2: logger.debug(str(issues)) insert_table = "wanted" insert_status = "Wanted" nzbdate = now() # when we asked for it else: logger.debug( 'This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug( 'This issue of %s is old; skipping.' % nzbtitle_formatted) old_date += 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.match( 'SELECT * from %s WHERE NZBtitle=? and NZBprov=?' % insert_table, (nzbtitle, nzbprov)) if mag_entry: if lazylibrarian.LOGLEVEL > 2: logger.debug( '%s is already in %s marked %s' % (nzbtitle, insert_table, insert_status)) else: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": newdatish, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) if lazylibrarian.LOGLEVEL > 2: logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status)) msg = 'Found %i result%s for %s. %i new,' % ( total_nzbs, plural(total_nzbs), bookid, new_date) msg += ' %i old, %i fail date, %i fail name,' % ( old_date, bad_date, bad_name) msg += ' %i rejected: %i to download' % (rejects, len(maglist)) logger.info(msg) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod(magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') else: snatch = NZBDownloadMethod(magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') if snatch: logger.info( 'Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) notify_snatch("Magazine %s from %s at %s" % (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now())) custom_notify_snatch(magazine['bookid']) scheduleJob(action='Start', target='processDir') if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete") except Exception: logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def processDir(force=False, reset=False): # rename this thread threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return False myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if force is False and len(snatched) == 0: logger.info('Nothing marked as snatched. Stopping postprocessor job.') common.schedule_job(action='Stop', target='processDir') elif len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') else: logger.debug("Checking %s downloads for %s snatched files" % (len(downloads), len(snatched))) ppcount = 0 for book in snatched: found = False for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= 95: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here... if formatter.is_valid_booktype(fname, booktype="book") \ or formatter.is_valid_booktype(fname, booktype="mag"): dirname = os.path.join(processpath, os.path.splitext(fname)[0]) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): try: shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname)) fname = os.path.splitext(fname)[0] except Exception as why: logger.debug("Failed to move file %s to %s, %s" % (fname, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle'])) found = True break else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if found: data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = common.remove_accents(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = common.remove_accents(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue # try: # os.chmod(dest_path, 0777) # except Exception, e: # logger.debug("Could not chmod post-process directory: " + str(dest_path)) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": formatter.now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file, "IssueID": magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue magazinescan.create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount: logger.info('%s books/mags have been processed.' % ppcount) else: logger.info('No snatched books/mags have been found') if reset: common.schedule_job(action='Restart', target='processDir')
def downloadResult(match, book): """ match: best result from search providers book: book we are downloading return: True if already snatched, False if failed to snatch, >True if we snatched it """ try: myDB = database.DBConnection() resultTitle = match[1] newValueDict = match[2] controlValueDict = match[3] if book['library'] == 'AudioBook': auxinfo = 'AudioBook' else: # elif book['library'] == 'eBook': auxinfo = 'eBook' if auxinfo == 'eBook': snatchedbooks = myDB.match( 'SELECT BookID from books WHERE BookID=? and Status="Snatched"', (newValueDict["BookID"], )) else: snatchedbooks = myDB.match( 'SELECT BookID from books WHERE BookID=? and AudioStatus="Snatched"', (newValueDict["BookID"], )) if snatchedbooks: logger.debug('%s %s already marked snatched' % (book['authorName'], book['bookName'])) return True # someone else already found it else: myDB.upsert("wanted", newValueDict, controlValueDict) if 'libgen' in newValueDict[ "NZBprov"]: # for libgen we use direct download links snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], resultTitle, auxinfo) elif newValueDict['NZBmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], auxinfo) elif newValueDict['NZBmode'] == 'nzb': snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], auxinfo) else: logger.error( 'Unhandled NZBmode [%s] for %s' % (newValueDict['NZBmode'], controlValueDict["NZBurl"])) snatch = False if snatch: logger.info('Downloading %s %s from %s' % (auxinfo, newValueDict["NZBtitle"], newValueDict["NZBprov"])) notify_snatch("%s %s from %s at %s" % (auxinfo, newValueDict["NZBtitle"], newValueDict["NZBprov"], now())) custom_notify_snatch(newValueDict["BookID"]) # at this point we could add NZBprov to the blocklist with a short timeout, a second or two? # This would implement a round-robin search system. Blocklist with an incremental counter. # If number of active providers == number blocklisted, so no unblocked providers are left, # either sleep for a while, or unblock the one with the lowest counter. scheduleJob(action='Start', target='processDir') return True + True # we found it return False except Exception: logger.error('Unhandled exception in downloadResult: %s' % traceback.format_exc())
f.close() logger.info('NZB file saved to: ' + nzbpath) download = True except Exception, e: logger.error('%s not writable, NZB not saved. Error: %s' % (nzbpath, e)) download = False if download: myDB.action('UPDATE books SET status = "Snatched" WHERE BookID=?', [bookid]) controlValueDict = {"BookID": bookid} newValueDict = { "NZBprov": nzbprov, "NZBdate": formatter.now(), "NZBurl": nzburl, "NZBtitle": nzbtitle, "Status": "Snatched" } myDB.upsert("wanted", newValueDict, controlValueDict) else: controlValueDict = {"BookID": bookid} newValueDict = { "NZBprov": nzbprov, "NZBdate": formatter.now(), "NZBurl": nzburl, "NZBtitle": nzbtitle, "Status": "Failed"
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) matches = [] for tor in resultlist: torTitle = formatter.latinToAscii(formatter.replace_all(str(tor['tor_title']), dictrepl)).strip() torTitle = re.sub(r"\s\s+", " ", torTitle) # remove extra whitespace author = formatter.latinToAscii(formatter.replace_all(book['authorName'], dic)) title = formatter.latinToAscii(formatter.replace_all(book['bookName'], dic)) torAuthor_match = fuzz.token_set_ratio(author, torTitle) torBook_match = fuzz.token_set_ratio(title, torTitle) logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle)) rejected = False for word in reject_list: if word in torTitle.lower() and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (torTitle, word)) break if (torAuthor_match >= match_ratio and torBook_match >= match_ratio and not rejected): #logger.debug(u'Found Torrent: %s using %s search' % (tor['tor_title'], searchtype)) bookid = book['bookid'] tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB' controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } score = (torBook_match + torAuthor_match)/2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(formatter.getList(torTitle)) words -= len(formatter.getList(author)) words -= len(formatter.getList(title)) score -= abs(words) matches.append([score, torTitle, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] logger.info(u'Best match TOR (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]).fetchone() if not snatchedbooks: snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) if snatch: notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No torrent's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + " using searchtype " + searchtype) return False
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' ' } # ' the ': ' ', ' a ': ' ', ' and ': ' ', # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = getList(lazylibrarian.REJECT_WORDS) author = unaccented_str(replace_all(book['authorName'], dic)) title = unaccented_str(replace_all(book['bookName'], dic)) matches = [] for nzb in resultlist: nzb_Title = unaccented_str(replace_all(nzb['nzbtitle'], dictrepl)).strip() nzb_Title = re.sub(r"\s\s+", " ", nzb_Title) # remove extra whitespace nzbAuthor_match = fuzz.token_set_ratio(author, nzb_Title) nzbBook_match = fuzz.token_set_ratio(title, nzb_Title) logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzb_Title)) nzburl = nzb['nzburl'] rejected = False already_failed = myDB.action( 'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % nzburl).fetchone() if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (nzb_Title, already_failed['NZBprov'])) rejected = True if not rejected: for word in reject_list: if word in nzb_Title.lower( ) and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (nzb_Title, word)) break nzbsize_temp = nzb[ 'nzbsize'] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = round(float(nzbsize_temp) / 1048576, 2) maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0) if not rejected: if maxsize and nzbsize > maxsize: rejected = True logger.debug("Rejecting %s, too large" % nzb_Title) if not rejected: if nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio: bookid = book['bookid'] nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": now(), # when we asked for it "NZBsize": nzbsize, "NZBtitle": nzbTitle, "NZBmode": nzbmode, "Status": "Skipped" } score = (nzbBook_match + nzbAuthor_match) / 2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(getList(nzb_Title)) words -= len(getList(author)) words -= len(getList(title)) score -= abs(words) matches.append( [score, nzb_Title, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] logger.info(u'Best match NZB (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]).fetchone() if snatchedbooks: logger.debug('%s already marked snatched' % nzb_Title) return True # someone else found it else: logger.debug('%s adding to wanted' % nzb_Title) myDB.upsert("wanted", newValueDict, controlValueDict) if nzbmode == "torznab": snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) else: snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) if snatch: notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + now()) scheduleJob(action='Start', target='processDir') return True + True # we found it else: logger.debug("No nzb's found for [%s] using searchtype %s" % (book["searchterm"], searchtype)) return False
def processDir(): # rename this thread threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) # TODO - try exception on os.listdir - it throws debug level # exception if dir doesn't exist - bloody hard to catch try: downloads = os.listdir(processpath) except OSError: logger.error('Could not access [%s] directory ' % processpath) return False myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if snatched is None: logger.info('No books are snatched. Nothing to process.') elif downloads is None: logger.info('No downloads are found. Nothing to process.') else: ppcount = 0 for book in snatched: if book['NZBtitle'] in downloads: pp_path = os.path.join(processpath, book['NZBtitle']) logger.debug('Found book/mag folder %s.' % pp_path) data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep this for processing issues arriving out of order dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', book['BookID']) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace('$Title', book['BookID']) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched NZB %s is not in download directory" % (book['NZBtitle'])) continue dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) try: os.chmod(dest_path, 0777) except Exception, e: logger.debug("Could not chmod post-process directory: " + str(dest_path)) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name, book['BookID']) if processBook: ppcount = ppcount + 1 # update nzbs controlValueDict = {"NZBurl": book['NZBurl']} newValueDict = {"Status": "Processed", "NZBDate": formatter.today()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path) controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file} myDB.upsert("issues", newValueDict, controlValueDict) logger.info('Successfully processed: %s' % global_name) notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s' % pp_path) # # TODO Seems to be duplication here. Can we just scan once for snatched books # instead of scan for snatched and then scan for directories with "LL.(bookID)" in? # Should there be any directories with "LL.(bookID)" that aren't in snatched? # Maybe this was put in for manually downloaded books? # downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory: bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount: logger.info('%s books/mags have been processed.' % ppcount) else: logger.info('No snatched books/mags have been found')
def processDir(reset=False): try: threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DIRECTORY('Download') logger.debug('Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) == 0: logger.info('Nothing marked as snatched.') scheduleJob(action='Stop', target='processDir') return if len(downloads) == 0: logger.info('No downloads are found. Nothing to process yet.') return logger.info("Checking %s download%s for %s snatched file%s" % (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched)))) ppcount = 0 for book in snatched: # if torrent, see if we can get current status from the downloader as the name # may have been changed once magnet resolved, or download started or completed # depending on torrent downloader. Usenet doesn't change the name. We like usenet. torrentname = '' try: logger.debug("%s was sent to %s" % (book['NZBtitle'], book['Source'])) if book['Source'] == 'TRANSMISSION': torrentname = transmission.getTorrentFolder(book['DownloadID']) elif book['Source'] == 'UTORRENT': torrentname = utorrent.nameTorrent(book['DownloadID']) elif book['Source'] == 'RTORRENT': torrentname = rtorrent.getName(book['DownloadID']) elif book['Source'] == 'QBITTORRENT': torrentname = qbittorrent.getName(book['DownloadID']) elif book['Source'] == 'SYNOLOGY_TOR': torrentname = synology.getName(book['DownloadID']) elif book['Source'] == 'DELUGEWEBUI': torrentname = deluge.getTorrentFolder(book['DownloadID']) elif book['Source'] == 'DELUGERPC': client = DelugeRPCClient(lazylibrarian.DELUGE_HOST, int(lazylibrarian.DELUGE_PORT), lazylibrarian.DELUGE_USER, lazylibrarian.DELUGE_PASS) try: client.connect() result = client.call('core.get_torrent_status', book['DownloadID'], {}) # for item in result: # logger.debug ('Deluge RPC result %s: %s' % (item, result[item])) if 'name' in result: torrentname = unaccented_str(result['name']) except Exception as e: logger.debug('DelugeRPC failed %s' % str(e)) except Exception as e: logger.debug("Failed to get updated torrent name from %s for %s: %s" % (book['Source'], book['DownloadID'], str(e))) matchtitle = unaccented_str(book['NZBtitle']) if torrentname and torrentname != matchtitle: logger.debug("%s Changing [%s] to [%s]" % (book['Source'], matchtitle, torrentname)) myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl = "%s"' % (torrentname, book['NZBurl'])) matchtitle = torrentname # here we could also check percentage downloaded or eta or status? # If downloader says it hasn't completed, no need to look for it. matches = [] logger.info('Looking for %s in %s' % (matchtitle, processpath)) for fname in downloads: # skip if failed before or incomplete torrents, or incomplete btsync extn = os.path.splitext(fname)[1] if extn not in ['.fail', '.part', '.bts', '.!ut']: # This is to get round differences in torrent filenames. # Usenet is ok, but Torrents aren't always returned with the name we searched for # We ask the torrent downloader for the torrent name, but don't always get an answer # so we try to do a "best match" on the name, there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] match = 0 if matchtitle: if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match and match >= lazylibrarian.DLOAD_RATIO: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here. Book/mag file in download root. # move the file into it's own subdirectory so we don't move/delete things that aren't ours logger.debug('filename [%s] is a file' % os.path.join(processpath, fname)) if is_valid_booktype(fname, booktype="book") \ or is_valid_booktype(fname, booktype="mag"): logger.debug('filename [%s] is a valid book/mag' % os.path.join(processpath, fname)) if bts_file(processpath): logger.debug("Skipping %s, found a .bts file" % processpath) else: fname = os.path.splitext(fname)[0] dirname = os.path.join(processpath, fname) if not os.path.exists(dirname): try: os.makedirs(dirname) setperm(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): # move the book and any related files too # ie other book formats, or opf, jpg with same title # can't move metadata.opf or cover.jpg or similar # as can't be sure they are ours # not sure if we need a new listdir here, or whether we can use the old one list_dir = os.listdir(processpath) for ourfile in list_dir: if ourfile.startswith(fname): if is_valid_booktype(ourfile, booktype="book") \ or is_valid_booktype(ourfile, booktype="mag") \ or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']: try: if lazylibrarian.DESTINATION_COPY: shutil.copyfile(os.path.join(processpath, ourfile), os.path.join(dirname, ourfile)) setperm(os.path.join(dirname, ourfile)) else: shutil.move(os.path.join(processpath, ourfile), os.path.join(dirname, ourfile)) setperm(os.path.join(dirname, ourfile)) except Exception as why: logger.debug("Failed to copy/move file %s to %s, %s" % (ourfile, dirname, str(why))) pp_path = os.path.join(processpath, fname) if os.path.isdir(pp_path): logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, matchtitle)) if not os.listdir(pp_path): logger.debug("Skipping %s, folder is empty" % pp_path) elif bts_file(pp_path): logger.debug("Skipping %s, found a .bts file" % pp_path) else: matches.append([match, pp_path, book]) else: pp_path = os.path.join(processpath, fname) matches.append([match, pp_path, book]) # so we can report closest match else: logger.debug('Skipping %s' % fname) match = 0 if matches: highest = max(matches, key=lambda x: x[0]) match = highest[0] pp_path = highest[1] book = highest[2] if match and match >= lazylibrarian.DLOAD_RATIO: logger.debug(u'Found match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle'])) data = myDB.match('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: # it's a book logger.debug(u'Processing book %s' % book['BookID']) authorname = data['AuthorName'] bookname = data['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = unaccented(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(processpath, dest_path).encode(lazylibrarian.SYS_ENCODING) else: data = myDB.match('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # it's a magazine logger.debug(u'Processing magazine %s' % book['BookID']) # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = unaccented_str(replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(processpath, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = unaccented(global_name) else: # not recognised logger.debug('Nothing in database matching "%s"' % book['BookID']) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) if match: logger.debug(u'Closest match (%s%%): %s' % (match, pp_path)) #for match in matches: # logger.info('Match: %s%% %s' % (match[0], match[1])) continue processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"BookID": book['BookID'], "NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname: # it's a book, if None it's a magazine if len(lazylibrarian.IMP_CALIBREDB): logger.debug('Calibre should have created the extras for us') else: processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue: if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit(): older = int(mostrecentissue) > int(book['AuxInfo']) # issuenumber else: older = mostrecentissue > book['AuxInfo'] # YYYY-MM-DD else: older = False if older: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": today(), "IssueFile": dest_file, "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue create_cover(dest_file) # calibre or ll copied/moved the files we want, now delete source files to_delete = True if book['NZBmode'] in ['torrent', 'magnet']: # Only delete torrents if we don't want to keep seeding if lazylibrarian.KEEP_SEEDING: logger.warn('%s is seeding %s %s' % (book['Source'], book['NZBmode'], book['NZBtitle'])) to_delete = False else: # ask downloader to delete the torrent, but not the files # we may delete them later, depending on other settings if book['DownloadID'] != "unknown": logger.debug('Removing %s from %s' % (book['NZBtitle'], book['Source'].lower())) delete_task(book['Source'], book['DownloadID'], False) else: logger.warn("Unable to remove %s from %s, no DownloadID" % (book['NZBtitle'], book['Source'].lower())) if to_delete: # only delete the files if not in download root dir and if DESTINATION_COPY not set if not lazylibrarian.DESTINATION_COPY and (pp_path != processpath): if os.path.isdir(pp_path): # calibre might have already deleted it? try: shutil.rmtree(pp_path) except Exception as why: logger.debug("Unable to remove %s, %s" % (pp_path, str(why))) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notify_download("%s from %s at %s" % (global_name, book['NZBprov'], now())) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # if it's a book, reset status so we try for a different version # if it's a magazine, user can select a different one from pastissues table if bookname: myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID']) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except Exception as e: logger.debug("Unable to rename %s, %s" % (pp_path, str(e))) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: dname, extn = os.path.splitext(directory) if "LL.(" in dname and extn not in ['.fail', '.part', '.bts', '.!ut']: bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " found in download directory") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount == 0: logger.info('No snatched books/mags have been found') else: logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount))) # Now check for any that are still marked snatched... if lazylibrarian.TASK_AGE: snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) > 0: for snatch in snatched: # FUTURE: we could check percentage downloaded or eta? # if percentage is increasing, it's just slow try: when_snatched = time.strptime(snatch['NZBdate'], '%Y-%m-%d %H:%M:%S') when_snatched = time.mktime(when_snatched) diff = time.time() - when_snatched # time difference in seconds except: diff = 0 hours = int(diff / 3600) if hours >= lazylibrarian.TASK_AGE: logger.warn('%s was sent to %s %s hours ago, deleting failed task' % (snatch['NZBtitle'], snatch['Source'].lower(), hours)) # change status to "Failed", and ask downloader to delete task and files if snatch['BookID'] != 'unknown': myDB.action('UPDATE wanted SET Status="Failed" WHERE BookID="%s"' % snatch['BookID']) myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % snatch['BookID']) delete_task(snatch['Source'], snatch['DownloadID'], True) if reset: scheduleJob(action='Restart', target='processDir') except Exception as e: logger.error('Unhandled exception in processDir: %s' % traceback.format_exc())
def processDir(): # rename this thread threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) #TODO - try exception on os.listdir - it throws debug level #exception if dir doesn't exist - bloody hard to catch try : downloads = os.listdir(processpath) except OSError: logger.error('Could not access [%s] directory ' % processpath) myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if snatched is None: logger.info('No books are snatched. Nothing to process.') elif downloads is None: logger.info('No downloads are found. Nothing to process.') else: ppcount=0 for book in snatched: if book['NZBtitle'] in downloads: pp_path = os.path.join(processpath, book['NZBtitle']) logger.info('Found folder %s.' % pp_path) data = myDB.select("SELECT * from books WHERE BookID='%s'" % book['BookID']) if data: for metadata in data: authorname = metadata['AuthorName'] authorimg = metadata['AuthorLink'] bookname = metadata['BookName'] bookdesc = metadata['BookDesc'] bookisbn = metadata['BookIsbn'] bookrate = metadata['BookRate'] bookimg = metadata['BookImg'] bookpage = metadata['BookPages'] booklink = metadata['BookLink'] bookdate = metadata['BookDate'] booklang = metadata['BookLang'] bookpub = metadata['BookPub'] #Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname) #dest_path = authorname+'/'+bookname global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname) #global_name = bookname + ' - ' + authorname else: data = myDB.select("SELECT * from magazines WHERE Title='%s'" % book['BookID']) for metadata in data: title = metadata['Title'] #AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple #files are downloading, there will be an error in post-processing, trying to go to the #same directory. dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', title) #dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace('$Title', title) #global_name = book['AuxInfo']+' - '+title else: logger.info("Snatched NZB %s is not in download directory" % (book['NZBtitle'])) continue try: os.chmod(os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING), 0777); except Exception, e: logger.debug("Could not chmod post-process directory"); dic = {'<':'', '>':'', '...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':'', ';':'', '\'':''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: ppcount = ppcount+1 # If you use auto add by Calibre you need the book in a single directory, not nested #So take the file you Copied/Moved to Dest_path and copy it to a Calibre auto add folder. if lazylibrarian.IMP_AUTOADD: processAutoAdd(dest_path) #update nzbs controlValueDict = {"NZBurl": book['NZBurl']} newValueDict = {"Status": "Processed"} myDB.upsert("wanted", newValueDict, controlValueDict) # try image if bookname is not None: processIMG(dest_path, bookimg, global_name) # try metadata processOPF(dest_path, authorname, bookname, bookisbn, book['BookID'], bookpub, bookdate, bookdesc, booklang, global_name) #update books controlValueDict = {"BookID": book['BookID']} newValueDict = {"Status": "Open"} myDB.upsert("books", newValueDict, controlValueDict) #update authors query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % authorname countbooks = myDB.action(query).fetchone() havebooks = int(countbooks[0]) controlValueDict = {"AuthorName": authorname} newValueDict = {"HaveBooks": havebooks} author_query = 'SELECT * FROM authors WHERE AuthorName="%s"' % authorname countauthor = myDB.action(author_query).fetchone() if countauthor: myDB.upsert("authors", newValueDict, controlValueDict) else: #update mags controlValueDict = {"Title": book['BookID']} newValueDict = {"IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) logger.info('Successfully processed: %s' % (global_name)) notifiers.notify_download(global_name+' at '+formatter.now()) else: logger.error('Postprocessing for %s has failed. Warning - AutoAdd will be repeated' % global_name) if ppcount: logger.debug('%s books are downloaded and processed.' % ppcount) else: logger.debug('No snatched books have been found')
def processDir(reset=False): threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) == 0: logger.info('Nothing marked as snatched.') scheduleJob(action='Stop', target='processDir') return if len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') return logger.info("Checking %s download%s for %s snatched file%s" % (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched)))) ppcount = 0 for book in snatched: matches = [] for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] match = 0 if matchtitle: if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= lazylibrarian.DLOAD_RATIO: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here... if is_valid_booktype(fname, booktype="book") \ or is_valid_booktype(fname, booktype="mag"): dirname = os.path.join(processpath, os.path.splitext(fname)[0]) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): try: shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname)) fname = os.path.splitext(fname)[0] except Exception as why: logger.debug("Failed to move file %s to %s, %s" % (fname, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, book['NZBtitle'])) matches.append([match, pp_path, book]) else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if matches: highest = max(matches, key=lambda x: x[0]) match = highest[0] pp_path = highest[1] book = highest[2] logger.info(u'Best match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle'])) data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = unaccented(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = unaccented_str(replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = unaccented(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine if len(lazylibrarian.IMP_CALIBREDB): logger.debug('Calibre should have created the extras for us') else: processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue: if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit(): older = int(mostrecentissue) > int(book['AuxInfo']) # issuenumber else: older = mostrecentissue > book['AuxInfo'] # YYYY-MM-DD else: older = False if older: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": today(), "IssueFile": dest_file, "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notify_download("%s at %s" % (global_name, now())) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # if it's a book, reset status so we try for a different version # if it's a magazine, user can select a different one from pastissues table if bookname is not None: myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID']) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount == 0: logger.info('No snatched books/mags have been found') else: logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount))) if reset: scheduleJob(action='Restart', target='processDir')
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '.', ';': ''} match_ratio = int(lazylibrarian.CONFIG['MATCH_RATIO']) reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS']) author = unaccented_str(replace_all(book['authorName'], dic)) title = unaccented_str(replace_all(book['bookName'], dic)) matches = [] for tor in resultlist: torTitle = unaccented_str(tor['tor_title']) torTitle = replace_all(torTitle, dictrepl).strip() torTitle = re.sub(r"\s\s+", " ", torTitle) # remove extra whitespace torAuthor_match = fuzz.token_set_ratio(author, torTitle) torBook_match = fuzz.token_set_ratio(title, torTitle) logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle)) tor_url = tor['tor_url'] rejected = False already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % tor_url) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov'])) rejected = True if not rejected: for word in reject_list: if word in torTitle.lower() and word not in author.lower() and word not in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (torTitle, word)) break tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) tor_size_temp = check_int(tor_size_temp, 1000) tor_size = round(float(tor_size_temp) / 1048576, 2) maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0) if not rejected: if maxsize and tor_size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % torTitle) minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0) if not rejected: if minsize and tor_size < minsize: rejected = True logger.debug("Rejecting %s, too small" % torTitle) if not rejected: bookid = book['bookid'] tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor['tor_prov'], "BookID": bookid, "NZBdate": now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } score = (torBook_match + torAuthor_match) / 2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(getList(torTitle)) words -= len(getList(author)) words -= len(getList(title)) score -= abs(words) matches.append([score, torTitle, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] if score < match_ratio: logger.info(u'Nearest TOR match (%s%%): %s using %s search for %s %s' % (score, nzb_Title, searchtype, author, title)) return False logger.info(u'Best TOR match (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) snatchedbooks = myDB.match('SELECT BookID from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]) if snatchedbooks: logger.debug('%s already marked snatched' % nzb_Title) return True # someone else found it, not us else: myDB.upsert("wanted", newValueDict, controlValueDict) if newValueDict["NZBprov"] == 'libgen': # for libgen we use direct download links snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], nzb_Title) else: snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) if snatch: logger.info('Downloading %s from %s' % (newValueDict["NZBtitle"], newValueDict["NZBprov"])) notify_snatch("%s from %s at %s" % (newValueDict["NZBtitle"], newValueDict["NZBprov"], now())) custom_notify_snatch(newValueDict["BookID"]) scheduleJob(action='Start', target='processDir') return True + True # we found it else: logger.debug("No torrent's found for [%s] using searchtype %s" % (book["searchterm"], searchtype)) return False
def search_tor_book(books=None, mags=None): if not(lazylibrarian.USE_TOR): return # rename this thread threading.currentThread().name = "SEARCHTORBOOKS" myDB = database.DBConnection() searchlist = [] searchlist1 = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') # Clear cache if os.path.exists(".ProviderCache"): for f in os.listdir(".ProviderCache"): os.unlink("%s/%s" % (".ProviderCache", f)) # Clearing throttling timeouts t = SimpleCache.ThrottlingProcessor() t.lastRequestTime.clear() else: # The user has added a new book searchbooks = [] if books != False: for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']]) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = {'...':'', '.':' ', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':'', ';':''} dicSearchFormatting = {'.':' +', ' + ':' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) searchterm = author + ' ' + book # + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+" , " ", searchterm) # strip any double white space searchlist.append({"bookid": bookid, "bookName":searchbook[2], "authorName":searchbook[1], "searchterm": searchterm.strip()}) if not lazylibrarian.KAT: logger.info('No download method is set, use SABnzbd or blackhole') counter = 0 for book in searchlist: #print book.keys() resultlist = providers.IterateOverTorrentSites(book,'book') #if you can't find teh book specifically, you might find under general search if not resultlist: logger.info("Searching for type book failed to find any books...moving to general search") resultlist = providers.IterateOverTorrentSites(book,'general') if not resultlist: logger.debug("No result found, Adding book %s to queue " % book['searchterm']) else: dictrepl = {'...':'', '.':' ', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '(':'', ')':'', '[':'', ']':'', '#':'', '0':'', '1':'', '2':'', '3':'', '4':'', '5':'', '6':'', '7':'', '8':'' , '9':'', '\'':'', ':':'', '!':'', '-':'', '\s\s':' ', ' the ':' ', ' a ':' ', ' and ':' ', ' to ':' ', ' of ':' ', ' for ':' ', ' my ':' ', ' in ':' ', ' at ':' ', ' with ':' ' } logger.debug(u'searchterm %s' % book['searchterm']) addedCounter = 0 for tor in resultlist: tor_Title = formatter.latinToAscii(formatter.replace_all(str(tor['tor_title']).lower(), dictrepl)).strip() tor_Title = re.sub(r"\s\s+" , " ", tor_Title) #remove extra whitespace logger.debug(u'torName %s' % tor_Title) match_ratio = int(lazylibrarian.MATCH_RATIO) tor_Title_match = fuzz.token_sort_ratio(book['searchterm'].lower(), tor_Title) logger.debug("Torrent Title Match %: " + str(tor_Title_match)) if (tor_Title_match > match_ratio): logger.info(u'Found Torrent: %s' % tor['tor_title']) addedCounter = addedCounter + 1 bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_size_temp = tor['tor_size'] #Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576,2))+' MB' controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBsize": tor_size, "NZBtitle": tor_Title, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, tor_prov, tor_Title, tor_url) notifiers.notify_snatch(tor_Title+' at '+formatter.now()) break; if addedCounter == 0: logger.info("No torrent's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + ". Adding book to queue.") counter = counter + 1 # if not books or books==False: # snatched = searchmag.searchmagazines(mags) # for items in snatched: # snatch = DownloadMethod(items['bookid'], items['tor_prov'], items['tor_title'], items['tor_url']) # notifiers.notify_snatch(items['tor_title']+' at '+formatter.now()) logger.info("Search for Wanted items complete")
def processDir(force=False, reset=False): # rename this thread threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError: logger.error('Could not access [%s] directory ' % processpath) return False myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if force is False and len(snatched) == 0: logger.info('Nothing marked as snatched. Stopping postprocessor job.') common.schedule_job(action='Stop', target='processDir') elif len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') else: ppcount = 0 for book in snatched: found = False for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round unicode differences in torrent filenames. # there might be a better way... if isinstance(fname, str): matchname = fname.decode('utf-8') else: matchname = fname if 'LL.(' in matchname: matchname = matchname.split('LL.(')[0] match = fuzz.token_set_ratio(matchname, book['NZBtitle']) if match >= 95: pp_path = os.path.join(processpath, fname) logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle'])) found = True break if found: data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue # try: # os.chmod(dest_path, 0777) # except Exception, e: # logger.debug("Could not chmod post-process directory: " + str(dest_path)) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": formatter.now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file, "IssueID" : magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue magazinescan.create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount: logger.info('%s books/mags have been processed.' % ppcount) else: logger.info('No snatched books/mags have been found') if reset == True: common.schedule_job(action='Restart', target='processDir')
def findBestResult(resultlist, book, searchtype, source): """ resultlist: collated results from search providers book: the book we want to find searchtype: book, magazine, shortbook, audiobook etc. source: nzb, tor, rss, direct return: highest scoring match, or None if no match """ # noinspection PyBroadException try: myDB = database.DBConnection() dictrepl = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' ' } dic = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '.', ';': '', '\'': '' } if source == 'rss': author, title = get_searchterm(book, searchtype) else: author = unaccented_str(replace_all(book['authorName'], dic)) title = unaccented_str(replace_all(book['bookName'], dic)) if book['library'] == 'AudioBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO']) maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0) minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0) auxinfo = 'AudioBook' else: # elif book['library'] == 'eBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS']) maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0) minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0) auxinfo = 'eBook' if source == 'nzb': prefix = 'nzb' else: # rss and libgen return same names as torrents prefix = 'tor_' logger.debug('Searching %s %s results for best %s match' % (len(resultlist), source, auxinfo)) matches = [] for res in resultlist: resultTitle = unaccented_str( replace_all(res[prefix + 'title'], dictrepl)).strip() resultTitle = re.sub(r"\s\s+", " ", resultTitle) # remove extra whitespace Author_match = fuzz.token_set_ratio(author, resultTitle) Book_match = fuzz.token_set_ratio(title, resultTitle) if lazylibrarian.LOGLEVEL & lazylibrarian.log_fuzz: logger.debug("%s author/book Match: %s/%s %s at %s" % (source.upper(), Author_match, Book_match, resultTitle, res[prefix + 'prov'])) rejected = False url = res[prefix + 'url'] if url is None: rejected = True logger.debug("Rejecting %s, no URL found" % resultTitle) if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']: already_failed = myDB.match( 'SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (url, )) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov'])) rejected = True if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']: already_failed = myDB.match( 'SELECT * from wanted WHERE NZBurl=?', (url, )) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov'])) rejected = True if not rejected and not url.startswith( 'http') and not url.startswith('magnet'): rejected = True logger.debug("Rejecting %s, invalid URL [%s]" % (resultTitle, url)) if not rejected: for word in reject_list: if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \ and word not in getList(title.lower()): rejected = True logger.debug("Rejecting %s, contains %s" % (resultTitle, word)) break size_temp = check_int( res[prefix + 'size'], 1000) # Need to cater for when this is NONE (Issue 35) size = round(float(size_temp) / 1048576, 2) if not rejected and maxsize and size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % resultTitle) if not rejected and minsize and size < minsize: rejected = True logger.debug("Rejecting %s, too small" % resultTitle) if not rejected: bookid = book['bookid'] # newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() # newTitle = resultTitle + ' LL.(' + book['bookid'] + ')' if source == 'nzb': mode = res['nzbmode'] # nzb, torznab else: mode = res[ 'tor_type'] # torrent, magnet, nzb(from rss), direct controlValueDict = {"NZBurl": url} newValueDict = { "NZBprov": res[prefix + 'prov'], "BookID": bookid, "NZBdate": now(), # when we asked for it "NZBsize": size, "NZBtitle": resultTitle, "NZBmode": mode, "AuxInfo": auxinfo, "Status": "Skipped" } score = (Book_match + Author_match) / 2 # as a percentage # lose a point for each unwanted word in the title so we get the closest match # but for RSS ignore anything at the end in square braces [keywords, genres etc] if source == 'rss': wordlist = getList(resultTitle.rsplit('[', 1)[0].lower()) else: wordlist = getList(resultTitle.lower()) words = [ x for x in wordlist if x not in getList(author.lower()) ] words = [x for x in words if x not in getList(title.lower())] typelist = '' if newValueDict['AuxInfo'] == 'eBook': words = [ x for x in words if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE']) ] typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) elif newValueDict['AuxInfo'] == 'AudioBook': words = [ x for x in words if x not in getList( lazylibrarian.CONFIG['AUDIOBOOK_TYPE']) ] typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE']) score -= len(words) # prioritise titles that include the ebook types we want # add more points for booktypes nearer the left in the list # eg if epub, mobi, pdf add 3 points if epub found, 2 for mobi, 1 for pdf booktypes = [x for x in wordlist if x in typelist] if booktypes: typelist = list(reversed(typelist)) for item in booktypes: for i in [ i for i, x in enumerate(typelist) if x == item ]: score += i + 1 matches.append( [score, newValueDict, controlValueDict, res['priority']]) if matches: highest = max(matches, key=lambda s: (s[0], s[3])) score = highest[0] newValueDict = highest[1] # controlValueDict = highest[2] dlpriority = highest[3] if score < int(lazylibrarian.CONFIG['MATCH_RATIO']): logger.info( 'Nearest match (%s%%): %s using %s search for %s %s' % (score, newValueDict['NZBtitle'], searchtype, book['authorName'], book['bookName'])) else: logger.info( 'Best match (%s%%): %s using %s search, %s priority %s' % (score, newValueDict['NZBtitle'], searchtype, newValueDict['NZBprov'], dlpriority)) return highest else: logger.debug("No %s found for [%s] using searchtype %s" % (source, book["searchterm"], searchtype)) return None except Exception: logger.error('Unhandled exception in findBestResult: %s' % traceback.format_exc())
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': '', '\s\s': ' ', ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} match_ratio = int(lazylibrarian.MATCH_RATIO) for nzb in resultlist: nzbTitle = formatter.latinToAscii(formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip() nzbTitle = re.sub(r"\s\s+", " ", nzbTitle) # remove extra whitespace #nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle) #logger.debug(u"NZB Title sort Match %: " + str(nzbTitle_match) + " for " + nzbTitle) if searchtype == 'book' or searchtype == 'shortbook': nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle) logger.debug(u"NZB token set Match %: " + str(nzbTitle_match) + " for " + nzbTitle) elif searchtype == 'author': nzbTitle_match = fuzz.token_set_ratio(book['authorName'].encode('utf-8'), nzbTitle) logger.debug(u"NZB author Match %: " + str(nzbTitle_match) + " for " + nzbTitle) if nzbTitle_match > match_ratio: nzbTitle_match = fuzz.token_set_ratio(book['bookName'].encode('utf-8'), nzbTitle) logger.debug(u"NZB book Match %: " + str(nzbTitle_match) + " for " + nzbTitle) else: # searchtype == 'general': nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle) logger.debug(u"NZB Title general Match %: " + str(nzbTitle_match) + " for " + nzbTitle) if (nzbTitle_match > match_ratio): logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype)) bookid = book['bookid'] nzbTitle = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": nzbdate, "NZBsize": nzbsize, "NZBtitle": nzbTitle, "NZBmode": nzbmode, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: if nzbmode == "torznab": snatch = TORDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) else: snatch = NZBDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) if snatch: notifiers.notify_snatch(formatter.latinToAscii(nzbTitle) + ' at ' + formatter.now()) postprocess.schedule_processor(action='Start') return True logger.debug("No nzb's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + " using searchtype " + searchtype) return False
def search_tor_book(books=None, mags=None): if not (lazylibrarian.USE_TOR): return # rename this thread threading.currentThread().name = "SEARCHTORBOOKS" myDB = database.DBConnection() searchlist = [] searchlist1 = [] if books is None: # We are performing a backlog search searchbooks = myDB.select( 'SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"' ) # Clear cache if os.path.exists(".ProviderCache"): for f in os.listdir(".ProviderCache"): os.unlink("%s/%s" % (".ProviderCache", f)) # Clearing throttling timeouts t = SimpleCache.ThrottlingProcessor() t.lastRequestTime.clear() else: # The user has added a new book searchbooks = [] if books != False: for book in books: searchbook = myDB.select( 'SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']]) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '' } dicSearchFormatting = {'.': ' +', ' + ': ' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii( formatter.replace_all(author, dicSearchFormatting)) searchterm = author + ' ' + book # + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append({ "bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip() }) if not lazylibrarian.KAT: logger.info('No download method is set, use SABnzbd or blackhole') counter = 0 for book in searchlist: #print book.keys() resultlist = providers.IterateOverTorrentSites(book, 'book') #if you can't find teh book specifically, you might find under general search if not resultlist: logger.info( "Searching for type book failed to find any books...moving to general search" ) resultlist = providers.IterateOverTorrentSites(book, 'general') if not resultlist: logger.debug("Adding book %s to queue." % book['searchterm']) else: dictrepl = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': '', '\s\s': ' ', ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' ' } logger.debug(u'searchterm %s' % book['searchterm']) addedCounter = 0 for tor in resultlist: tor_Title = formatter.latinToAscii( formatter.replace_all( str(tor['tor_title']).lower(), dictrepl)).strip() tor_Title = re.sub(r"\s\s+", " ", tor_Title) #remove extra whitespace logger.debug(u'torName %s' % tor_Title) match_ratio = int(lazylibrarian.MATCH_RATIO) tor_Title_match = fuzz.token_sort_ratio( book['searchterm'].lower(), tor_Title) logger.debug("Torrent Title Match %: " + str(tor_Title_match)) if (tor_Title_match > match_ratio): logger.info(u'Found Torrent: %s' % tor['tor_title']) addedCounter = addedCounter + 1 bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_size_temp = tor[ 'tor_size'] #Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB' controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBsize": tor_size, "NZBtitle": tor_Title, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, tor_prov, tor_Title, tor_url) notifiers.notify_snatch(tor_Title + ' at ' + formatter.now()) break if addedCounter == 0: logger.info("No torrent's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + ". Adding book to queue.") counter = counter + 1 # if not books or books==False: # snatched = searchmag.searchmagazines(mags) # for items in snatched: # snatch = DownloadMethod(items['bookid'], items['tor_prov'], items['tor_title'], items['tor_url']) # notifiers.notify_snatch(items['tor_title']+' at '+formatter.now()) logger.info("Search for Wanted items complete")
def processResultList(resultlist, author, title, book): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) matches = [] # bit of a misnomer now, rss can search both tor and nzb rss feeds for tor in resultlist: torTitle = formatter.latinToAscii(formatter.replace_all(tor['tor_title'], dictrepl)).strip() torTitle = re.sub(r"\s\s+", " ", torTitle) # remove extra whitespace tor_Author_match = fuzz.token_set_ratio(author, torTitle) tor_Title_match = fuzz.token_set_ratio(title, torTitle) logger.debug("RSS Author/Title Match: %s/%s for %s" % (tor_Author_match, tor_Title_match, torTitle)) rejected = False for word in reject_list: if word in torTitle.lower() and not word in author.lower() and not word in book.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (torTitle, word)) break tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = round(float(tor_size_temp) / 1048576, 2) maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0) if maxsize and tor_size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % torTitle) if (tor_Title_match >= match_ratio and tor_Author_match >= match_ratio and not rejected): #logger.debug(u'Found RSS: %s' % tor['tor_title']) bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_feed = tor['tor_feed'] controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } score = (tor_Title_match + tor_Author_match)/2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(formatter.getList(torTitle)) words -= len(formatter.getList(author)) words -= len(formatter.getList(title)) score -= abs(words) matches.append([score, torTitle, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] logger.info(u'Best match RSS (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]).fetchone() if not snatchedbooks: # check if one of the other downloaders got there first tor_url = controlValueDict["NZBurl"] if '.nzb' in tor_url: snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) else: """ # http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398 if not tor_url.startswith('magnet'): # magnets don't use auth pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS'] auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH'] # don't know what form of password hash is required, try sha1 tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd)) """ snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], tor_url) if snatch: notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No RSS found for " + (book["authorName"] + ' ' + book['bookName']).strip()) return False
def processResultList(resultlist, authorname, bookname, book, searchtype): myDB = database.DBConnection() dictrepl = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' ' } match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = getList(lazylibrarian.REJECT_WORDS) matches = [] # bit of a misnomer now, rss can search both tor and nzb rss feeds for tor in resultlist: torTitle = unaccented_str(replace_all(tor['tor_title'], dictrepl)).strip() torTitle = re.sub(r"\s\s+", " ", torTitle) # remove extra whitespace tor_Author_match = fuzz.token_set_ratio(authorname, torTitle) tor_Title_match = fuzz.token_set_ratio(bookname, torTitle) logger.debug("RSS Author/Title Match: %s/%s for %s" % (tor_Author_match, tor_Title_match, torTitle)) tor_url = tor['tor_url'] rejected = False already_failed = myDB.match( 'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % tor_url) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov'])) rejected = True if not rejected: for word in reject_list: if word in torTitle.lower() and word not in authorname.lower( ) and word not in bookname.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (torTitle, word)) break tor_size_temp = tor[ 'tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = round(float(tor_size_temp) / 1048576, 2) maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0) if not rejected: if maxsize and tor_size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % torTitle) if not rejected: bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_prov = tor['tor_prov'] tor_feed = tor['tor_feed'] controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } score = (tor_Title_match + tor_Author_match) / 2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(getList(torTitle)) words -= len(getList(authorname)) words -= len(getList(bookname)) score -= abs(words) matches.append([score, torTitle, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] if score < match_ratio: logger.debug( u'Nearest RSS match (%s%%): %s using %s search for %s %s' % (score, nzb_Title, searchtype, authorname, bookname)) return False logger.info(u'Best RSS match (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) snatchedbooks = myDB.match( 'SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]) if snatchedbooks: # check if one of the other downloaders got there first logger.info('%s already marked snatched' % nzb_Title) return True else: myDB.upsert("wanted", newValueDict, controlValueDict) tor_url = controlValueDict["NZBurl"] if '.nzb' in tor_url: snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) else: """ # http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398 if not tor_url.startswith('magnet'): # magnets don't use auth pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS'] auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH'] # don't know what form of password hash is required, try sha1 tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd)) """ snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], tor_url) if snatch: logger.info( 'Downloading %s from %s' % (newValueDict["NZBtitle"], newValueDict["NZBprov"])) notify_snatch( "%s from %s at %s" % (newValueDict["NZBtitle"], newValueDict["NZBprov"], now())) scheduleJob(action='Start', target='processDir') return True + True # we found it else: logger.debug("No RSS found for " + (book["authorName"] + ' ' + book['bookName']).strip()) return False
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab myDB = database.DBConnection() searchlist = [] threading.currentThread().name = "SEARCHMAGS" if mags is None: # backlog search searchmags = myDB.select('SELECT Title, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title, LastAcquired, IssueDate from magazines \ WHERE Title="%s" AND Status="Active"' % (magazine['bookid'])) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 1: logger.info('Searching for one magazine') else: logger.info('Searching for %i magazines' % len(searchmags)) for searchmag in searchmags: bookid = searchmag[0] searchterm = searchmag[0] # frequency = searchmag[1] # last_acquired = searchmag[2] # issue_date = searchmag[3] dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchlist.append({"bookid": bookid, "searchterm": searchterm}) if searchlist == []: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] tor_resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'mag') if not nproviders: logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers') if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = providers.IterateOverTorrentSites(book, 'mag') if not nproviders: logger.warn('No torrent providers are set. Check config for TORRENT providers') for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if not resultlist: logger.debug("Adding magazine %s to queue." % book['searchterm']) else: bad_regex = 0 bad_date = 0 old_date = 0 total_nzbs = 0 new_date = 0 to_snatch = 0 maglist = [] issues = [] reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb['bookid'] nzbtitle = (u'%s' % nzb['nzbtitle']) nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] if nzbsize_temp is None: # not all torrents returned by torznab have a size nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] checkifmag = myDB.select('SELECT * from magazines WHERE Title="%s"' % bookid) if checkifmag: for results in checkifmag: control_date = results['IssueDate'] # frequency = results['Frequency'] # regex = results['Regex'] nzbtitle_formatted = nzbtitle.replace('.', ' ').replace('-', ' ').replace('/', ' ').replace( '+', ' ').replace('_', ' ').replace('(', '').replace(')', '').strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this # keyword_check = nzbtitle_formatted.replace(bookid, '') # remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb starts with magazine title, and ends with a date # eg The MagPI Issue 22 - July 2015 # do something like check left n words match title # then check last n words are a date name_match = 1 # assume name matches for now if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz mag_title_match = fuzz.token_set_ratio( common.remove_accents(bookid), common.remove_accents(nzbtitle_formatted)) if mag_title_match < lazylibrarian.MATCH_RATIO: logger.debug( u"Magazine token set Match failed: " + str( mag_title_match) + "% for " + nzbtitle_formatted) name_match = 0 lower_title = common.remove_accents(nzbtitle_formatted).lower() lower_bookid = common.remove_accents(bookid).lower() for word in reject_list: if word in lower_title and not word in lower_bookid: name_match = 0 logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if name_match: # some magazine torrent uploaders add their sig in [] or {} # Fortunately for us, they always seem to add it at the end # also some magazine torrent titles are "magazine_name some_form_of_date pdf" # so strip all the trailing junk... while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \ nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() == 'pdf': nzbtitle_exploded.pop() # gotta love the function names # need at least one word magazine title and two date components if len(nzbtitle_exploded) > 2: # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2] regexA_month = formatter.month2num(common.remove_accents(regexA_month_temp)) if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100: regexA_year = 'fail' # force date failure # if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].rstrip(',').zfill(2) if regexA_day.isdigit(): if int(regexA_day) > 31: # probably issue number nn regexA_day = '01' else: regexA_day = '01' # just MonthName YYYY # else: # regexA_day = '01' # monthly, or less frequent try: newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day # try to make sure the year/month/day are valid, exception if not # ie don't accept day > 31, or 30 in some months # also handles multiple date format named issues eg Jan 2014, 01 2014 # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day)) except ValueError: # regexB = MonthName DD YYYY or MonthName DD, YYYY regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3] regexB_month = formatter.month2num(common.remove_accents(regexB_month_temp)) regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].rstrip(',').zfill(2) if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100: regexB_year = 'fail' try: newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day)) except ValueError: # regexC = YYYY MM or YYYY MM DD # (can't get MM/DD if named YYYY Issue nn) # First try YYYY MM regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) regexC_day = '01' else: # try YYYY MM DD regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) else: regexC_year = 'fail' try: newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day)) except: logger.debug('Magazine %s not in proper date format.' % nzbtitle_formatted) bad_date = bad_date + 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues # continue else: continue # store all the _new_ matching results, marking as "skipped" for now # we change the status to "wanted" on the ones we want to snatch later # don't add a new entry if this issue has been found on an earlier search # because status might have been user-set mag_entry = myDB.select('SELECT * from wanted WHERE NZBtitle="%s" and NZBprov="%s"' % (nzbtitle, nzbprov)) if not mag_entry: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": newdatish, "Status": "Skipped", "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert("wanted", newValueDict, controlValueDict) if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc start_time = time.time() start_time -= 31 * 24 * 60 * 60 # number of seconds in 31 days control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = formatter.datecompare(newdatish, control_date) if comp_date > 0: # Should probably only upsert when downloaded and processed in case snatch fails # keep track of what we're going to download so we don't download dupes new_date = new_date + 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) to_snatch = to_snatch + 1 issues.append(issue) controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBdate": formatter.now(), # when we asked for it "Status": "Wanted" } myDB.upsert("wanted", newValueDict, controlValueDict) else: logger.debug('This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date = old_date + 1 else: logger.debug('Magazine [%s] does not completely match search term [%s].' % ( nzbtitle_formatted, bookid)) bad_regex = bad_regex + 1 logger.info('Found %i results for %s. %i new, %i old, %i fail date, %i fail name: %i to download' % ( total_nzbs, bookid, new_date, old_date, bad_date, bad_regex, to_snatch)) for items in maglist: if items['nzbmode'] == "torznab": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) elif items['nzbmode'] == "torrent": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) else: snatch = NZBDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) if snatch: notifiers.notify_snatch(formatter.latinToAscii(items['nzbtitle']) + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') maglist = [] if reset: common.schedule_job(action='Restart', target='search_magazines') logger.info("Search for magazines complete")
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for nzb in resultlist: nzbTitle = formatter.latinToAscii(formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip() nzbTitle = re.sub(r"\s\s+", " ", nzbTitle) # remove extra whitespace author = formatter.latinToAscii(formatter.replace_all(book['authorName'], dic)) title = formatter.latinToAscii(formatter.replace_all(book['bookName'], dic)) # nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle) # logger.debug(u"NZB Title sort Match %: " + str(nzbTitle_match) + " for " + nzbTitle) nzbAuthor_match = fuzz.token_set_ratio(author, nzbTitle) nzbBook_match = fuzz.token_set_ratio(title, nzbTitle) logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzbTitle)) rejected = False for word in reject_list: if word in nzbTitle.lower() and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (nzbTitle, word)) break if (nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected): logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype)) bookid = book['bookid'] nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": nzbsize, "NZBtitle": nzbTitle, "NZBmode": nzbmode, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: if nzbmode == "torznab": snatch = TORDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) else: snatch = NZBDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) if snatch: notifiers.notify_snatch(nzbTitle + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No nzb's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + " using searchtype " + searchtype) return False
def processDir(reset=False): threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir( lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) == 0: logger.info('Nothing marked as snatched.') scheduleJob(action='Stop', target='processDir') return if len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') return logger.info("Checking %s download%s for %s snatched file%s" % (len(downloads), plural( len(downloads)), len(snatched), plural(len(snatched)))) ppcount = 0 for book in snatched: matches = [] for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] match = 0 if matchtitle: if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= lazylibrarian.DLOAD_RATIO: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # not a directory, handle single file downloads here. Book/mag file in download root. # move the file into it's own subdirectory so we don't move/delete things that aren't ours if is_valid_booktype(fname, booktype="book") \ or is_valid_booktype(fname, booktype="mag"): fname = os.path.splitext(fname)[0] dirname = os.path.join(processpath, fname) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug( 'Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): # move the book and any related files too # ie other book formats, or opf, jpg with same title # can't move metadata.opf or cover.jpg or similar # as can't be sure they are ours # not sure if we need a new listdir here, or whether we can use the old one list_dir = os.listdir(processpath) for ourfile in list_dir: if ourfile.startswith(fname): if is_valid_booktype(ourfile, booktype="book") \ or is_valid_booktype(ourfile, booktype="mag") \ or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']: try: shutil.move( os.path.join( processpath, ourfile), os.path.join( dirname, ourfile)) except Exception as why: logger.debug( "Failed to move file %s to %s, %s" % (ourfile, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, book['NZBtitle'])) matches.append([match, pp_path, book]) else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if matches: highest = max(matches, key=lambda x: x[0]) match = highest[0] pp_path = highest[1] book = highest[2] logger.debug(u'Best match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle'])) data = myDB.match('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: logger.debug(u'Processing book %s' % book['BookID']) authorname = data['AuthorName'] bookname = data['BookName'] if 'windows' in platform.system().lower( ) and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace( '/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace( '$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace( '$Author', authorname).replace('$Title', bookname) global_name = unaccented(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = { '<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.match('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: logger.debug(u'Processing magazine %s' % book['BookID']) # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[ 'IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = { '<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } mag_name = unaccented_str(replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join( lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode( lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) global_name = unaccented(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug( "Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name, book['NZBmode']) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = { "Status": "Processed", "NZBDate": now() } # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine if len(lazylibrarian.IMP_CALIBREDB): logger.debug( 'Calibre should have created the extras for us') else: processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue: if mostrecentissue.isdigit() and str( book['AuxInfo']).isdigit(): older = int(mostrecentissue) > int( book['AuxInfo']) # issuenumber else: older = mostrecentissue > book['AuxInfo'] # YYYY-MM-DD else: older = False if older: # check this in case processing issues arriving out of order newValueDict = { "LastAcquired": today(), "IssueStatus": "Open" } else: newValueDict = { "IssueDate": book['AuxInfo'], "LastAcquired": today(), "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = { "Title": book['BookID'], "IssueDate": book['AuxInfo'] } newValueDict = { "IssueAcquired": today(), "IssueFile": dest_file, "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notify_download("%s from %s at %s" % (global_name, book['NZBprov'], now())) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # if it's a book, reset status so we try for a different version # if it's a magazine, user can select a different one from pastissues table if bookname is not None: myDB.action( 'UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID']) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except Exception as e: logger.debug("Unable to rename %s, %s" % (pp_path, str(e))) downloads = os.listdir( processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount == 0: logger.info('No snatched books/mags have been found') else: logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount))) if reset: scheduleJob(action='Restart', target='processDir')