def build_monthtable(): if len(formatter.getList(IMP_MONTHLANG)) == 0: # any extra languages wanted? return try: current_locale = locale.setlocale(locale.LC_ALL, "") # read current state. # getdefaultlocale() doesnt seem to work as expected on windows, returns 'None' except locale.Error as e: logger.debug("Error getting current locale : %s" % str(e)) return lang = str(current_locale) if not lang.startswith("en_"): # en_ is preloaded MONTHNAMES[0].append(lang) for f in range(1, 13): MONTHNAMES[f].append(common.remove_accents(calendar.month_name[f]).lower()) MONTHNAMES[0].append(lang) for f in range(1, 13): MONTHNAMES[f].append(common.remove_accents(calendar.month_abbr[f]).lower().strip(".")) logger.info( "Added month names for locale [%s], %s, %s ..." % (lang, MONTHNAMES[1][len(MONTHNAMES[1]) - 2], MONTHNAMES[1][len(MONTHNAMES[1]) - 1]) ) for lang in formatter.getList(IMP_MONTHLANG): try: if len(lang) > 1: locale.setlocale(locale.LC_ALL, lang) MONTHNAMES[0].append(lang) for f in range(1, 13): MONTHNAMES[f].append(common.remove_accents(calendar.month_name[f]).lower()) MONTHNAMES[0].append(lang) for f in range(1, 13): MONTHNAMES[f].append(common.remove_accents(calendar.month_abbr[f]).lower().strip(".")) locale.setlocale(locale.LC_ALL, current_locale) # restore entry state logger.info( "Added month names for locale [%s], %s, %s ..." % (lang, MONTHNAMES[1][len(MONTHNAMES[1]) - 2], MONTHNAMES[1][len(MONTHNAMES[1]) - 1]) ) except: locale.setlocale(locale.LC_ALL, current_locale) # restore entry state logger.warn("Unable to load requested locale [%s]" % lang) try: if "_" in lang: wanted_lang = lang.split("_")[0] else: wanted_lang = lang params = ["locale", "-a"] all_locales = subprocess.check_output(params).split() locale_list = [] for a_locale in all_locales: if a_locale.startswith(wanted_lang): locale_list.append(a_locale) if locale_list: logger.warn("Found these alternatives: " + str(locale_list)) else: logger.warn("Unable to find an alternative") except: logger.warn("Unable to get a list of alternatives") logger.info("Set locale back to entry state %s" % current_locale)
def get_searchterm(book, searchType): authorname = cleanName(book['authorName'], "'") bookname = cleanName(book['bookName'], "'") if searchType in ['book', 'audio'] or 'short' in searchType: if bookname == authorname and book['bookSub']: # books like "Spike Milligan: Man of Letters" # where we split the title/subtitle on ':' bookname = cleanName(book['bookSub']) if bookname.startswith(authorname) and len(bookname) > len(authorname): # books like "Spike Milligan In his own words" # where we don't want to look for "Spike Milligan Spike Milligan In his own words" bookname = bookname[len(authorname) + 1:] bookname = bookname.strip() # no initials or extensions after surname eg L. E. Modesitt Jr. -> Modesitt # and Charles H. Elliott, Phd -> Charles Elliott # but Tom Holt -> Tom Holt # Calibre directories may have trailing '.' replaced by '_' eg Jr_ if ' ' in authorname: authorname_exploded = authorname.split(' ') authorname = '' postfix = getList(lazylibrarian.CONFIG['NAME_POSTFIX']) for word in authorname_exploded: word = word.strip('.').strip('_') if len(word) > 1 and word.lower() not in postfix: if authorname: authorname += ' ' authorname += word if 'short' in searchType and '(' in bookname: bookname = bookname.split('(')[0].strip() return authorname, bookname
def formatAuthorName(author): """ get authorame in a consistent format """ if "," in author: postfix = getList(lazylibrarian.CONFIG['NAME_POSTFIX']) words = author.split(',') if len(words) == 2: # Need to handle names like "L. E. Modesitt, Jr." or "J. Springmann, Phd" # use an exceptions list for now, there might be a better way... if words[1].strip().strip('.').strip('_').lower() in postfix: surname = words[1].strip() forename = words[0].strip() else: # guess its "surname, forename" or "surname, initial(s)" so swap them round forename = words[1].strip() surname = words[0].strip() if author != forename + ' ' + surname: logger.debug('Formatted authorname [%s] to [%s %s]' % (author, forename, surname)) author = forename + ' ' + surname # reformat any initials, we want to end up with L.E. Modesitt Jr if len(author) > 2 and author[1] in '. ': surname = author forename = '' while len(surname) > 2 and surname[1] in '. ': forename = forename + surname[0] + '.' surname = surname[2:].strip() if author != forename + ' ' + surname: logger.debug('Stripped authorname [%s] to [%s %s]' % (author, forename, surname)) author = forename + ' ' + surname return ' '.join(author.split()) # ensure no extra whitespace
def _action(self, params, body=None, content_type=None): # noinspection PyTypeChecker url = self.base_url + '/gui/' + '?token=' + self.token + '&' + urlencode( params) request = Request(url) if lazylibrarian.CONFIG['PROXY_HOST']: for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']): request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item) request.add_header('User-Agent', getUserAgent()) if body: if PY2: request.add_data(body) else: request.data(body) request.add_header('Content-length', len(body)) if content_type: request.add_header('Content-type', content_type) try: response = self.opener.open(request) return response.code, json.loads(response.read()) except HTTPError as err: logger.debug('URL: %s' % url) logger.debug('uTorrent webUI raised the following error: ' + str(err))
def proxyList(): proxies = None if lazylibrarian.CONFIG['PROXY_HOST']: proxies = {} for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']): if item in ['http', 'https']: proxies.update({item: lazylibrarian.CONFIG['PROXY_HOST']}) return proxies
def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None, global_name=None, book_id=None): pp_path = pp_path.encode(lazylibrarian.SYS_ENCODING) # check we got a book in the downloaded files pp = False booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for bookfile in os.listdir(pp_path): if ((str(bookfile).split('.')[-1]) in booktype_list): pp = True if pp == False: # no book found in a format we wanted. Leave for the user to delete or convert manually logger.debug('Failed to locate a book in downloaded files, leaving for manual processing') return pp try: if not os.path.exists(dest_path): logger.debug('%s does not exist, so it\'s safe to create it' % dest_path) else: logger.debug('%s already exists. Removing existing tree.' % dest_path) shutil.rmtree(dest_path) logger.debug('Attempting to copy/move tree') if lazylibrarian.DESTINATION_COPY == 1 and lazylibrarian.DOWNLOAD_DIR != pp_path: shutil.copytree(pp_path, dest_path) logger.debug('Successfully copied %s to %s.' % (pp_path, dest_path)) elif lazylibrarian.DOWNLOAD_DIR == pp_path: booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for file3 in os.listdir(pp_path): if ((str(file3).split('.')[-1]) in booktype_list): bookID = str(file3).split("LL.(")[1].split(")")[0] if bookID == book_id: logger.debug('Processing %s' % bookID) if not os.path.exists(dest_path): try: os.makedirs(dest_path) except Exception, e: logger.debug(str(e)) if lazylibrarian.DESTINATION_COPY == 1: shutil.copyfile(os.path.join(pp_path, file3), os.path.join(dest_path, file3)) else: shutil.move(os.path.join(pp_path, file3), os.path.join(dest_path, file3)) else:
def _command(self, command, args=None, content_type=None, files=None): logger.debug('QBittorrent WebAPI Command: %s' % command) url = self.base_url + '/' + command data = None headers = dict() if files or content_type == 'multipart/form-data': data, headers = encode_multipart( args, files, '-------------------------acebdf13572468') else: if args: data = makeBytestr(urlencode(args)) if content_type: headers['Content-Type'] = content_type request = Request(url, data, headers) if lazylibrarian.CONFIG['PROXY_HOST']: for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']): request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item) request.add_header('User-Agent', USER_AGENT) try: response = self.opener.open(request) try: contentType = response.headers['content-type'] except KeyError: contentType = '' resp = response.read() # some commands return json if contentType == 'application/json': if resp: return json.loads(resp) return '' else: # some commands return plain text resp = makeUnicode(resp) logger.debug("QBitTorrent returned %s" % resp) if command == 'version/api': return resp # some just return Ok. or Fails. if resp and resp != 'Ok.': return False # some commands return nothing but response code (always 200) return True except URLError as err: logger.debug('Failed URL: %s' % url) logger.debug('QBitTorrent webUI raised the following error: %s' % err.reason) return False
def _command(self, command, args=None, content_type=None, files=None): logger.debug('QBittorrent WebAPI Command: %s' % command) url = self.base_url + '/' + command data = None headers = dict() if files or content_type == 'multipart/form-data': data, headers = encode_multipart(args, files, '-------------------------acebdf13572468') else: if args: data = makeBytestr(urlencode(args)) if content_type: headers['Content-Type'] = content_type request = Request(url, data, headers) if lazylibrarian.CONFIG['PROXY_HOST']: for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']): request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item) request.add_header('User-Agent', getUserAgent()) try: response = self.opener.open(request) try: contentType = response.headers['content-type'] except KeyError: contentType = '' resp = response.read() # some commands return json if contentType == 'application/json': if resp: return json.loads(resp) return '' else: # some commands return plain text resp = makeUnicode(resp) logger.debug("QBitTorrent returned %s" % resp) if command == 'version/api': return resp # some just return Ok. or Fails. if resp and resp != 'Ok.': return False # some commands return nothing but response code (always 200) return True except URLError as err: logger.debug('Failed URL: %s' % url) logger.debug('QBitTorrent webUI raised the following error: %s' % err.reason) return False
def multiLink(self, bookfile, bookid): types = [] multi = '' basename, extn = os.path.splitext(bookfile) for item in getList(lazylibrarian.CONFIG['EBOOK_TYPE']): target = basename + '.' + item if os.path.isfile(target): types.append(item) if len(types) > 1: for fmt in types: multi += '<link href="' multi += '%s?cmd=Serve&bookid=%s&fmt=%s' % ( self.opdsroot, quote_plus(bookid), fmt) multi += '" rel="http://opds-spec.org/acquisition" type="' + mimeType( '.' + fmt) + '"/>' return multi
def seriesInfo(bookid): """ Return series info for a bookid as a dict of formatted strings The strings are configurable, but by default... Full returns ( Lord of the Rings 2 ) Name returns Lord of the Rings (with added Num part if that's not numeric, eg Lord of the Rings Book One) Num returns Book #1 - (or empty string if no numeric part) so you can combine to make Book #1 - Lord of the Rings """ mydict = {'Name': '', 'Full': '', 'Num': ''} myDB = database.DBConnection() cmd = 'SELECT SeriesID,SeriesNum from member WHERE bookid=?' res = myDB.match(cmd, (bookid,)) if not res: return mydict seriesid = res['SeriesID'] serieslist = getList(res['SeriesNum']) seriesnum = '' seriesname = '' # might be "Book 3.5" or similar, just get the numeric part while serieslist: seriesnum = serieslist.pop() try: _ = float(seriesnum) break except ValueError: seriesnum = '' pass if not seriesnum: # couldn't figure out number, keep everything we got, could be something like "Book Two" serieslist = res['SeriesNum'] cmd = 'SELECT SeriesName from series WHERE seriesid=?' res = myDB.match(cmd, (seriesid,)) if res: seriesname = res['SeriesName'] if not seriesnum: # add what we got back to end of series name if serieslist: seriesname = "%s %s" % (seriesname, serieslist) mydict['Name'] = lazylibrarian.CONFIG['FMT_SERNAME'].replace('$SerName', seriesname).replace('$$', ' ') mydict['Num'] = lazylibrarian.CONFIG['FMT_SERNUM'].replace('$SerNum', seriesnum).replace('$$', ' ') mydict['Full'] = lazylibrarian.CONFIG['FMT_SERIES'].replace('$SerNum', seriesnum).replace( '$SerName', seriesname).replace('$$', ' ') return mydict
def _command(self, command, args=None, content_type=None, files=None): logger.debug('QBittorrent WebAPI Command: %s' % command) url = self.base_url + '/' + command data = None headers = dict() if files: # Use Multipart form data, headers = encode_multipart( args, files, '-------------------------acebdf13572468') else: if args: data = urllib.urlencode(args) if content_type: headers['Content-Type'] = content_type request = urllib2.Request(url, data, headers) if lazylibrarian.CONFIG['PROXY_HOST']: for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']): request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item) request.add_header('User-Agent', USER_AGENT) try: response = self.opener.open(request) info = response.info() if info: if info.getheader('content-type'): if info.getheader('content-type') == 'application/json': return json.loads(response.read()) # response code is always 200, whether success or fail else: resp = '' for line in response: resp = resp + line logger.debug("QBitTorrent returned %s" % resp) return False return True except urllib2.URLError as err: logger.debug('Failed URL: %s' % url) logger.debug('QBitTorrent webUI raised the following error: %s' % err.reason) return False
def get_author_info(self, authorid=None): URL = 'http://www.goodreads.com/author/show/' + authorid + '.xml?' + urllib.urlencode(self.params) author_dict = {} try: rootxml, in_cache = get_xml_request(URL) except Exception as e: logger.error("Error getting author info: %s" % str(e)) return author_dict if rootxml is None: logger.debug("Error requesting author info") return author_dict resultxml = rootxml.find('author') if not len(resultxml): logger.warn('No author found with ID: ' + authorid) else: # added authorname to author_dict - this holds the intact name preferred by GR # except GR messes up names like "L. E. Modesitt, Jr." where it returns <name>Jr., L. E. Modesitt</name> authorname = resultxml[1].text if "," in authorname: postfix = getList(lazylibrarian.CONFIG['NAME_POSTFIX']) words = authorname.split(',') if len(words) == 2: if words[0].strip().strip('.').lower in postfix: authorname = words[1].strip() + ' ' + words[0].strip() logger.debug("[%s] Processing info for authorID: %s" % (authorname, authorid)) author_dict = { 'authorid': resultxml[0].text, 'authorlink': resultxml.find('link').text, 'authorimg': resultxml.find('image_url').text, 'authorborn': resultxml.find('born_at').text, 'authordeath': resultxml.find('died_at').text, 'totalbooks': resultxml.find('works_count').text, 'authorname': ' '.join(authorname.split()) # remove any extra whitespace } return author_dict
def checkLink(): # connection test, check host/port auth = SABnzbd(nzburl='auth') if not auth: return "Unable to talk to SABnzbd, check HOST/PORT" # check apikey is valid cats = SABnzbd(nzburl='get_cats') if not cats: return "Unable to talk to SABnzbd, check APIKEY" # check category exists if lazylibrarian.SAB_CAT: catlist = formatter.getList(cats) if not lazylibrarian.SAB_CAT in catlist: msg = "SABnzbd: Unknown category [%s]\n" % lazylibrarian.SAB_CAT if catlist: msg += "Valid categories:\n" for cat in catlist: msg += '%s\n' % cat else: msg += "SABnzbd seems to have no categories set" return msg return "SABnzbd connection successful"
def checkLink(): # connection test, check host/port auth = SABnzbd(nzburl='auth') if not auth: return "Unable to talk to SABnzbd, check HOST/PORT" # check apikey is valid cats = SABnzbd(nzburl='get_cats') if not cats: return "Unable to talk to SABnzbd, check APIKEY" # check category exists if lazylibrarian.SAB_CAT: catlist = formatter.getList(cats) if lazylibrarian.SAB_CAT not in catlist: msg = "SABnzbd: Unknown category [%s]\n" % lazylibrarian.SAB_CAT if catlist: msg += "Valid categories:\n" for cat in catlist: msg += '%s\n' % cat else: msg += "SABnzbd seems to have no categories set" return msg return "SABnzbd connection successful"
def _action(self, params, body=None, content_type=None): url = self.base_url + '/gui/' + '?token=' + self.token + '&' + urlencode(params) request = Request(url) if lazylibrarian.CONFIG['PROXY_HOST']: for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']): request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item) request.add_header('User-Agent', USER_AGENT) if body: if PY2: request.add_data(body) else: request.data(body) request.add_header('Content-length', len(body)) if content_type: request.add_header('Content-type', content_type) try: response = self.opener.open(request) return response.code, json.loads(response.read()) except HTTPError as err: logger.debug('URL: %s' % url) logger.debug('uTorrent webUI raised the following error: ' + str(err))
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn("Cannot find directory: %s. Not scanning" % dir.decode(lazylibrarian.SYS_ENCODING, "replace")) return myDB = database.DBConnection() myDB.action("drop table if exists stats") myDB.action( "create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \ GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )" ) logger.info("Scanning ebook directory: %s" % dir.decode(lazylibrarian.SYS_ENCODING, "replace")) new_book_count = 0 file_count = 0 if lazylibrarian.FULL_SCAN: books = myDB.select('select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info("Missing books will be marked as %s" % status) for book in books: bookName = book["BookName"] bookAuthor = book["AuthorName"] bookID = book["BookID"] bookfile = book["BookFile"] if not (bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn("Book %s - %s updated as not found on disk" % (bookAuthor, bookName)) # guess this was meant to save repeat-scans of the same directory # if it contains multiple formats of the same book, but there was no code # that looked at the array. renamed from latest to processed to make # purpose clearer processed_subdirectories = [] matchString = "" for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + "\\" + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = "" count = -1 booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + "|" + book_type matchString = ( matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + "\.[" + booktypes + "]" ) pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 subdirectory = r.replace(dir, "") # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if formatter.is_valid_booktype(files): logger.debug( "[%s] Now scanning subdirectory %s" % ( dir.decode(lazylibrarian.SYS_ENCODING, "replace"), subdirectory.decode(lazylibrarian.SYS_ENCODING, "replace"), ) ) language = "Unknown" isbn = "" book = "" author = "" words = files.split(".") extn = words[len(words) - 1] # if it's an epub or a mobi we can try to read metadata from it if (extn == "epub") or (extn == "mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except: res = {} if "title" in res and "creator" in res: # this is the minimum we need match = 1 book = res["title"] author = res["creator"] if "language" in res: language = res["language"] if "identifier" in res: isbn = res["identifier"] if "type" in res: extn = res["type"] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if "title" in res and "creator" in res: # this is the minimum we need match = 1 book = res["title"] author = res["creator"] if "language" in res: language = res["language"] if "identifier" in res: isbn = res["identifier"] logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and formatter.is_valid_isbn(isbn): logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(",") author = words[1].strip() + " " + words[0].strip() # "forename surname" if author[1] == " ": author = author.replace(" ", ".") author = author.replace("..", ".") # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author ).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn("Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr["authorname"] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace(".", "_") match_auth = match_auth.replace(" ", "_") match_auth = match_auth.replace("__", "_") match_name = authorname.replace(".", "_") match_name = match_name.replace(" ", "_") match_name = match_name.replace("__", "_") match_name = common.remove_accents(match_name) match_auth = common.remove_accents(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug("Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name) ) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr["authorname"] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author ).fetchone() if not check_exist_author: logger.debug("Adding new author [%s]" % author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author ).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug("Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', "").replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid ).fetchone() if check_status["Status"] != "Open": # update status as we've got this book myDB.action('UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid) ) new_book_count += 1 cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone() logger.info("%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats" ).fetchone() if stats["sum(GR_book_hits)"] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug("GoogleBooks was hit %s times for books" % stats["sum(GR_book_hits)"]) logger.debug("GoogleBooks language was changed %s times" % stats["sum(GB_lang_change)"]) if lazylibrarian.BOOK_API == "GoodReads": logger.debug("GoodReads was hit %s times for books" % stats["sum(GR_book_hits)"]) logger.debug("GoodReads was hit %s times for languages" % stats["sum(GR_lang_hits)"]) logger.debug("LibraryThing was hit %s times for languages" % stats["sum(LT_lang_hits)"]) logger.debug("Language cache was hit %s times" % stats["sum(cache_hits)"]) logger.debug("Unwanted language removed %s books" % stats["sum(bad_lang)"]) logger.debug("Unwanted characters removed %s books" % stats["sum(bad_char)"]) logger.debug("Unable to cache %s books with missing ISBN" % stats["sum(uncached)"]) logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS)) logger.debug("ISBN Language cache holds %s entries" % cachesize["counter"]) stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if stats: logger.warn("There are %s books in your library with unknown language" % stats) authors = myDB.select("select AuthorName from authors") # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr logger.debug("Updating bookcounts for %i authors" % len(authors)) for author in authors: name = author["AuthorName"] havebooks = myDB.action( 'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % name ).fetchone() myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks["counter"], name)) totalbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % name ).fetchone() myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (totalbooks["counter"], name)) logger.info("Library scan complete")
def find_results(self, searchterm=None, queue=None): """ GoogleBooks performs much better if we search for author OR title not both at once, so if searchterm is not isbn, two searches needed. Lazylibrarian searches use <ll> to separate title from author in searchterm If this token isn't present, it's an isbn or searchterm as supplied by user """ try: myDB = database.DBConnection() resultlist = [] # See if we should check ISBN field, otherwise ignore it api_strings = ['inauthor:', 'intitle:'] if is_valid_isbn(searchterm): api_strings = ['isbn:'] api_hits = 0 ignored = 0 total_count = 0 no_author_count = 0 title = '' authorname = '' if ' <ll> ' in searchterm: # special token separates title from author title, authorname = searchterm.split(' <ll> ') fullterm = searchterm.replace(' <ll> ', ' ') logger.debug('Now searching Google Books API with searchterm: %s' % fullterm) for api_value in api_strings: set_url = self.url if api_value == "isbn:": set_url = set_url + quote(api_value + searchterm) elif api_value == 'intitle:': searchterm = fullterm if title: # just search for title # noinspection PyUnresolvedReferences title = title.split(' (')[0] # without any series info searchterm = title searchterm = searchterm.replace("'", "").replace('"', '').strip() # and no quotes if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) set_url = set_url + quote(api_value + '"' + searchterm + '"') elif api_value == 'inauthor:': searchterm = fullterm if authorname: searchterm = authorname # just search for author searchterm = searchterm.strip() if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) set_url = set_url + quote_plus(api_value + '"' + searchterm + '"') startindex = 0 resultcount = 0 ignored = 0 number_results = 1 total_count = 0 no_author_count = 0 try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request(URL) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] logger.debug('Searching url: ' + URL) if number_results == 0: logger.warn('Found no results for %s with value: %s' % (api_value, searchterm)) break else: pass except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) if not book['author']: logger.debug('Skipped a result without authorfield.') no_author_count += 1 continue if not book['name']: logger.debug('Skipped a result without title.') continue valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if "All" not in valid_langs: # don't care about languages, accept all try: # skip if language is not in valid list - booklang = book['lang'] if booklang not in valid_langs: logger.debug( 'Skipped %s with language %s' % (book['name'], booklang)) ignored += 1 continue except KeyError: ignored += 1 logger.debug('Skipped %s where no language is found' % book['name']) continue if authorname: author_fuzz = fuzz.ratio(book['author'], authorname) else: author_fuzz = fuzz.ratio(book['author'], fullterm) if title: book_fuzz = fuzz.token_set_ratio(book['name'], title) # lose a point for each extra word in the fuzzy matches so we get the closest match words = len(getList(book['name'])) words -= len(getList(title)) book_fuzz -= abs(words) else: book_fuzz = fuzz.token_set_ratio(book['name'], fullterm) isbn_fuzz = 0 if is_valid_isbn(fullterm): isbn_fuzz = 100 highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace AuthorID = '' if book['author']: match = myDB.match( 'SELECT AuthorID FROM authors WHERE AuthorName=?', ( book['author'].replace('"', '""'),)) if match: AuthorID = match['AuthorID'] resultlist.append({ 'authorname': book['author'], 'authorid': AuthorID, 'bookid': item['id'], 'bookname': bookname, 'booksub': book['sub'], 'bookisbn': book['isbn'], 'bookpub': book['pub'], 'bookdate': book['date'], 'booklang': book['lang'], 'booklink': book['link'], 'bookrate': float(book['rate']), 'bookrate_count': book['rate_count'], 'bookimg': book['img'], 'bookpages': book['pages'], 'bookgenre': book['genre'], 'bookdesc': book['desc'], 'author_fuzz': author_fuzz, 'book_fuzz': book_fuzz, 'isbn_fuzz': isbn_fuzz, 'highest_fuzz': highest_fuzz, 'num_reviews': book['ratings'] }) resultcount += 1 except KeyError: break logger.debug("Returning %s result%s for (%s) with keyword: %s" % (resultcount, plural(resultcount), api_value, searchterm)) logger.debug("Found %s result%s" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored))) logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count))) logger.debug('The Google Books API was hit %s time%s for searchterm: %s' % (api_hits, plural(api_hits), fullterm)) queue.put(resultlist) except Exception: logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())
def setWorkID(books=None): """ Set the goodreads workid for any books that don't already have one books is a comma separated list of bookids or if empty, select from database Paginate requests to reduce api hits """ myDB = database.DBConnection() pages = [] if books: page = books pages.append(page) else: cmd = "select BookID,BookName from books where WorkID='' or WorkID is null" books = myDB.select(cmd) if books: counter = 0 logger.debug('Setting WorkID for %s book%s' % (len(books), plural(len(books)))) page = '' for book in books: bookid = book['BookID'] if not bookid: logger.debug("No bookid for %s" % book['BookName']) else: if page: page = page + ',' page = page + bookid counter += 1 if counter == 50: counter = 0 pages.append(page) page = '' if page: pages.append(page) counter = 0 params = {"key": lazylibrarian.CONFIG['GR_API']} for page in pages: URL = 'https://www.goodreads.com/book/id_to_work_id/' + page + '?' + urlencode( params) try: rootxml, in_cache = gr_xml_request(URL, useCache=False) if rootxml is None: logger.debug("Error requesting id_to_work_id page") else: resultxml = rootxml.find('work-ids') if len(resultxml): ids = resultxml.getiterator('item') books = getList(page) cnt = 0 for item in ids: workid = item.text if not workid: logger.debug("No workid returned for %s" % books[cnt]) else: counter += 1 controlValueDict = {"BookID": books[cnt]} newValueDict = {"WorkID": workid} myDB.upsert("books", newValueDict, controlValueDict) cnt += 1 except Exception as e: logger.error("%s parsing id_to_work_id page: %s" % (type(e).__name__, str(e))) msg = 'Updated %s id%s' % (counter, plural(counter)) logger.debug("setWorkID complete: " + msg) return msg
def syncCalibreList(col_read=None, col_toread=None, userid=None): """ Get the lazylibrarian bookid for each read/toread calibre book so we can map our id to theirs, and sync current/supplied user's read/toread or supplied read/toread columns to calibre database. Return message giving totals """ myDB = database.DBConnection() if not userid: cookie = cherrypy.request.cookie if cookie and 'll_uid' in cookie.keys(): userid = cookie['ll_uid'].value if userid: res = myDB.match( 'SELECT UserName,ToRead,HaveRead,CalibreRead,CalibreToRead,Perms from users where UserID=?', (userid, )) if res: username = res['UserName'] if not col_read: col_read = res['CalibreRead'] if not col_toread: col_toread = res['CalibreToRead'] toreadlist = getList(res['ToRead']) readlist = getList(res['HaveRead']) # suppress duplicates (just in case) toreadlist = list(set(toreadlist)) readlist = list(set(readlist)) else: return "Error: Unable to get user column settings for %s" % userid if not userid: return "Error: Unable to find current userid" if not col_read and not col_toread: return "User %s has no calibre columns set" % username # check user columns exist in calibre and create if not res = calibredb('custom_columns') columns = res[0].split('\n') custom_columns = [] for column in columns: if column: custom_columns.append(column.split(' (')[0]) if col_read not in custom_columns: added = calibredb('add_custom_column', [col_read, col_read, 'bool']) if "column created" not in added[0]: return added if col_toread not in custom_columns: added = calibredb('add_custom_column', [col_toread, col_toread, 'bool']) if "column created" not in added[0]: return added nomatch = 0 readcol = '' toreadcol = '' map_ctol = {} map_ltoc = {} if col_read: readcol = '*' + col_read if col_toread: toreadcol = '*' + col_toread calibre_list = calibreList(col_read, col_toread) if not isinstance(calibre_list, list): # got an error message from calibredb return '"%s"' % calibre_list for item in calibre_list: if toreadcol and toreadcol in item or readcol and readcol in item: authorname, authorid, added = addAuthorNameToDB(item['authors'], refresh=False, addbooks=False) if authorname: if authorname != item['authors']: logger.debug( "Changed authorname for [%s] from [%s] to [%s]" % (item['title'], item['authors'], authorname)) item['authors'] = authorname bookid = find_book_in_db(authorname, item['title']) if not bookid: searchterm = "%s <ll> %s" % (item['title'], authorname) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']: logger.debug( "Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) bookid = result['bookid'] import_book(bookid) if bookid: # NOTE: calibre bookid is always an integer, lazylibrarian bookid is a string # (goodreads could be used as an int, but googlebooks can't as it's alphanumeric) # so convert all dict items to strings for ease of matching. map_ctol[str(item['id'])] = str(bookid) map_ltoc[str(bookid)] = str(item['id']) else: logger.warn( 'Calibre Book [%s] by [%s] is not in lazylibrarian database' % (item['title'], authorname)) nomatch += 1 else: logger.warn( 'Calibre Author [%s] not matched in lazylibrarian database' % (item['authors'])) nomatch += 1 # Now check current users lazylibrarian read/toread against the calibre library, warn about missing ones # which might be books calibre doesn't have, or might be minor differences in author or title for idlist in [("Read", readlist), ("To_Read", toreadlist)]: booklist = idlist[1] for bookid in booklist: cmd = "SELECT AuthorID,BookName from books where BookID=?" book = myDB.match(cmd, (bookid, )) if not book: logger.error('Error finding bookid %s' % bookid) else: cmd = "SELECT AuthorName from authors where AuthorID=?" author = myDB.match(cmd, (book['AuthorID'], )) if not author: logger.error('Error finding authorid %s' % book['AuthorID']) else: match = False for item in calibre_list: if item['authors'] == author['AuthorName'] and item[ 'title'] == book['BookName']: logger.debug("Exact match for %s [%s]" % (idlist[0], book['BookName'])) map_ctol[str(item['id'])] = str(bookid) map_ltoc[str(bookid)] = str(item['id']) match = True break if not match: high = 0 highname = '' highid = '' for item in calibre_list: if item['authors'] == author['AuthorName']: n = fuzz.token_sort_ratio( item['title'], book['BookName']) if n > high: high = n highname = item['title'] highid = item['id'] if high > 95: logger.debug( "Found ratio match %s%% [%s] for %s [%s]" % (high, highname, idlist[0], book['BookName'])) map_ctol[str(highid)] = str(bookid) map_ltoc[str(bookid)] = str(highid) match = True if not match: logger.warn( "No match for %s %s by %s in calibre database, closest match %s%% [%s]" % (idlist[0], book['BookName'], author['AuthorName'], high, highname)) nomatch += 1 logger.debug("BookID mapping complete, %s match %s, nomatch %s" % (username, len(map_ctol), nomatch)) # now sync the lists if userid: last_read = [] last_toread = [] calibre_read = [] calibre_toread = [] cmd = 'select SyncList from sync where UserID=? and Label=?' res = myDB.match(cmd, (userid, col_read)) if res: last_read = getList(res['SyncList']) res = myDB.match(cmd, (userid, col_toread)) if res: last_toread = getList(res['SyncList']) for item in calibre_list: if toreadcol and toreadcol in item and item[ toreadcol]: # only if True if str(item['id']) in map_ctol: calibre_toread.append(map_ctol[str(item['id'])]) else: logger.warn( "Calibre to_read book %s:%s has no lazylibrarian bookid" % (item['authors'], item['title'])) if readcol and readcol in item and item[readcol]: # only if True if str(item['id']) in map_ctol: calibre_read.append(map_ctol[str(item['id'])]) else: logger.warn( "Calibre read book %s:%s has no lazylibrarian bookid" % (item['authors'], item['title'])) logger.debug("Found %s calibre read, %s calibre toread" % (len(calibre_read), len(calibre_toread))) logger.debug("Found %s lazylib read, %s lazylib toread" % (len(readlist), len(toreadlist))) added_to_ll_toread = list(set(toreadlist) - set(last_toread)) removed_from_ll_toread = list(set(last_toread) - set(toreadlist)) added_to_ll_read = list(set(readlist) - set(last_read)) removed_from_ll_read = list(set(last_read) - set(readlist)) logger.debug("lazylibrarian changes to copy to calibre: %s %s %s %s" % (len(added_to_ll_toread), len(removed_from_ll_toread), len(added_to_ll_read), len(removed_from_ll_read))) added_to_calibre_toread = list(set(calibre_toread) - set(last_toread)) removed_from_calibre_toread = list( set(last_toread) - set(calibre_toread)) added_to_calibre_read = list(set(calibre_read) - set(last_read)) removed_from_calibre_read = list(set(last_read) - set(calibre_read)) logger.debug( "calibre changes to copy to lazylibrarian: %s %s %s %s" % (len(added_to_calibre_toread), len(removed_from_calibre_toread), len(added_to_calibre_read), len(removed_from_calibre_read))) calibre_changes = 0 for item in added_to_calibre_read: if item not in readlist: readlist.append(item) logger.debug("Lazylibrarian marked %s as read" % item) calibre_changes += 1 for item in added_to_calibre_toread: if item not in toreadlist: toreadlist.append(item) logger.debug("Lazylibrarian marked %s as to_read" % item) calibre_changes += 1 for item in removed_from_calibre_read: if item in readlist: readlist.remove(item) logger.debug("Lazylibrarian removed %s from read" % item) calibre_changes += 1 for item in removed_from_calibre_toread: if item in toreadlist: toreadlist.remove(item) logger.debug("Lazylibrarian removed %s from to_read" % item) calibre_changes += 1 if calibre_changes: myDB.action('UPDATE users SET ToRead=?,HaveRead=? WHERE UserID=?', (', '.join(toreadlist), ', '.join(readlist), userid)) ll_changes = 0 for item in added_to_ll_toread: if item in map_ltoc: res, err, rc = calibredb('set_custom', [col_toread, map_ltoc[item], 'true'], []) if rc: msg = "calibredb set_custom error: " if err: logger.error(msg + err) elif res: logger.error(msg + res) else: logger.error(msg + str(rc)) else: ll_changes += 1 else: logger.warn("Unable to set calibre %s true for %s" % (col_toread, item)) for item in removed_from_ll_toread: if item in map_ltoc: res, err, rc = calibredb('set_custom', [col_toread, map_ltoc[item], ''], []) if rc: msg = "calibredb set_custom error: " if err: logger.error(msg + err) elif res: logger.error(msg + res) else: logger.error(msg + str(rc)) else: ll_changes += 1 else: logger.warn("Unable to clear calibre %s for %s" % (col_toread, item)) for item in added_to_ll_read: if item in map_ltoc: res, err, rc = calibredb('set_custom', [col_read, map_ltoc[item], 'true'], []) if rc: msg = "calibredb set_custom error: " if err: logger.error(msg + err) elif res: logger.error(msg + res) else: logger.error(msg + str(rc)) else: ll_changes += 1 else: logger.warn("Unable to set calibre %s true for %s" % (col_read, item)) for item in removed_from_ll_read: if item in map_ltoc: res, err, rc = calibredb('set_custom', [col_read, map_ltoc[item], ''], []) if rc: msg = "calibredb set_custom error: " if err: logger.error(msg + err) elif res: logger.error(msg + res) else: logger.error(msg + str(rc)) else: ll_changes += 1 else: logger.warn("Unable to clear calibre %s for %s" % (col_read, item)) # store current sync list as comparison for next sync controlValueDict = {"UserID": userid, "Label": col_read} newValueDict = { "Date": str(time.time()), "Synclist": ', '.join(readlist) } myDB.upsert("sync", newValueDict, controlValueDict) controlValueDict = {"UserID": userid, "Label": col_toread} newValueDict = { "Date": str(time.time()), "Synclist": ', '.join(toreadlist) } myDB.upsert("sync", newValueDict, controlValueDict) msg = "%s sync updated: %s calibre, %s lazylibrarian" % ( username, ll_changes, calibre_changes) return msg
def bts_file(search_dir=None): if 'bts' not in getList(lazylibrarian.CONFIG['SKIPPED_EXT']): return '' return any_file(search_dir, '.bts')
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": " ", "*": "", "(": "", ")": "", "[": "", "]": "", "#": "", "0": "", "1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "", "9": "", "'": "", ":": "", "!": "", "-": " ", "\s\s": " ", } # ' the ': ' ', ' a ': ' ', ' and ': ' ', # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = { "...": "", ".": " ", " & ": " ", " = ": " ", "?": "", "$": "s", " + ": " ", '"': "", ",": "", "*": "", ":": "", ";": "", "'": "", } match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) author = formatter.latinToAscii(formatter.replace_all(book["authorName"], dic)) title = formatter.latinToAscii(formatter.replace_all(book["bookName"], dic)) matches = [] for nzb in resultlist: nzb_Title = formatter.latinToAscii(formatter.replace_all(nzb["nzbtitle"], dictrepl)).strip() nzb_Title = re.sub(r"\s\s+", " ", nzb_Title) # remove extra whitespace nzbAuthor_match = fuzz.token_set_ratio(author, nzb_Title) nzbBook_match = fuzz.token_set_ratio(title, nzb_Title) logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzb_Title)) rejected = False for word in reject_list: if word in nzb_Title.lower() and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (nzb_Title, word)) break nzbsize_temp = nzb["nzbsize"] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = round(float(nzbsize_temp) / 1048576, 2) maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0) if maxsize and nzbsize > maxsize: rejected = True logger.debug("Rejecting %s, too large" % nzb_Title) if nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected: # logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype)) bookid = book["bookid"] nzbTitle = (author + " - " + title + " LL.(" + book["bookid"] + ")").strip() nzburl = nzb["nzburl"] nzbprov = nzb["nzbprov"] nzbdate_temp = nzb["nzbdate"] nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb["nzbmode"] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": nzbsize, "NZBtitle": nzbTitle, "NZBmode": nzbmode, "Status": "Skipped", } score = (nzbBook_match + nzbAuthor_match) / 2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(formatter.getList(nzb_Title)) words -= len(formatter.getList(author)) words -= len(formatter.getList(title)) score -= abs(words) matches.append([score, nzb_Title, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] logger.info(u"Best match NZB (%s%%): %s using %s search" % (score, nzb_Title, searchtype)) myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"] ).fetchone() if not snatchedbooks: if nzbmode == "torznab": snatch = TORDownloadMethod( newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], ) else: snatch = NZBDownloadMethod( newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], ) if snatch: notifiers.notify_snatch(newValueDict["NZBtitle"] + " at " + formatter.now()) common.schedule_job(action="Start", target="processDir") return True logger.debug( "No nzb's found for " + (book["authorName"] + " " + book["bookName"]).strip() + " using searchtype " + searchtype ) return False
def processResultList(resultlist, author, title, book): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) # bit of a misnomer now, rss can search both tor and nzb rss feeds for tor in resultlist: tor_Title = formatter.latinToAscii(formatter.replace_all(tor['tor_title'], dictrepl)).strip() tor_Title = re.sub(r"\s\s+", " ", tor_Title) # remove extra whitespace tor_Author_match = fuzz.token_set_ratio(author, tor_Title) tor_Title_match = fuzz.token_set_ratio(title, tor_Title) logger.debug("RSS Author/Title Match: %s/%s for %s" %(tor_Author_match, tor_Title_match, tor_Title)) rejected = False for word in reject_list: if word in tor_Title.lower() and not word in author.lower() and not word in book.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (tor_Title, word)) break if (tor_Title_match >= match_ratio and tor_Author_match >= match_ratio and not rejected): logger.debug(u'Found RSS: %s' % tor['tor_title']) bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_feed = tor['tor_feed'] tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB' controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: # check if one of the other downloaders got there first if '.nzb' in tor_url: snatch = NZBDownloadMethod(bookid, tor_prov, tor_Title, tor_url) else: # http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398 if not tor_url.startswith('magnet'): # magnets don't use auth pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS'] auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH'] # don't know what form of password hash is required, try sha1 tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd)) snatch = TORDownloadMethod(bookid, tor_prov, tor_Title, tor_url) if snatch: notifiers.notify_snatch(formatter.latinToAscii(tor_Title) + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No RSS found for " + (book["authorName"] + ' ' + book['bookName']).strip()) return False
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", entrystatus='Active', refresh=False): # noinspection PyBroadException try: logger.debug('[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname)) api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 startindex = 0 resultcount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request(URL, useCache=not refresh) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break if number_results == 0: logger.warn('Found no results for %s' % authorname) break else: logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname)) startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) # skip if no author, no author is no book. if not book['author']: logger.debug('Skipped a result without authorfield.') continue isbnhead = "" if len(book['isbn']) == 10: isbnhead = book['isbn'][0:3] elif len(book['isbn']) == 13: isbnhead = book['isbn'][3:6] booklang = book['lang'] # do we care about language? if "All" not in valid_langs: if book['isbn']: # seems google lies to us, sometimes tells us books are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = False lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,)) if lang: booklang = lang['lang'] cache_hits += 1 logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead)) match = True if not match: # no match in cache, try lookup dict if isbnhead: if len(book['isbn']) == 13 and book['isbn'].startswith('979'): for lang in lazylibrarian.isbn_979_dict: if isbnhead.startswith(lang): booklang = lazylibrarian.isbn_979_dict[lang] logger.debug("ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break elif (len(book['isbn']) == 10) or \ (len(book['isbn']) == 13 and book['isbn'].startswith('978')): for lang in lazylibrarian.isbn_978_dict: if isbnhead.startswith(lang): booklang = lazylibrarian.isbn_978_dict[lang] logger.debug("ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break if match: myDB.action('insert into languages values (?, ?)', (isbnhead, booklang)) if not match: booklang = thingLang(book['isbn']) lt_lang_hits += 1 if booklang: match = True myDB.action('insert into languages values (?, ?)', (isbnhead, booklang)) if match: # We found a better language match if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]: # these are all english, may need to expand this list logger.debug("%s Google thinks [%s], we think [%s]" % (book['name'], googlelang, booklang)) gb_lang_change += 1 else: # No match anywhere, accept google language booklang = googlelang # skip if language is in ignore list if booklang not in valid_langs: logger.debug('Skipped [%s] with language %s' % (book['name'], booklang)) ignored += 1 continue rejected = 0 check_status = False book_status = bookstatus # new_book status, or new_author status audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] added = today() locked = False existing_book = None bookname = book['name'] bookid = item['id'] if not bookname: logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname)) removedResults += 1 rejected = 1 else: bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip() # GoodReads sometimes has multiple bookids for the same book (same author/title, different # editions) and sometimes uses the same bookid if the book is the same but the title is # slightly different. Not sure if googlebooks does too, but we only want one... cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?' existing_book = myDB.match(cmd, (bookid,)) if existing_book: book_status = existing_book['Status'] audio_status = existing_book['AudioStatus'] locked = existing_book['Manual'] added = existing_book['BookAdded'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: if rejected in [3, 4, 5]: book_status = 'Ignored' audio_status = 'Ignored' else: book_status = bookstatus # new_book status, or new_author status audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] added = today() locked = False if not rejected and re.match('[^\w-]', bookname): # remove books with bad characters in title logger.debug("[%s] removed book for bad characters" % bookname) removedResults += 1 rejected = 2 if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd if book['date'] > today()[:len(book['date'])]: logger.debug('Rejecting %s, future publication date %s' % (bookname, book['date'])) removedResults += 1 rejected = 3 if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date']: logger.debug('Rejecting %s, no publication date' % bookname) removedResults += 1 rejected = 4 if not rejected and lazylibrarian.CONFIG['NO_ISBN']: if not isbnhead: logger.debug('Rejecting %s, no isbn' % bookname) removedResults += 1 rejected = 5 if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE' match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""'))) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug('Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorname, bookname, bookid)) rejected = 6 duplicates += 1 if not rejected: cmd = 'SELECT AuthorName,BookName FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?' match = myDB.match(cmd, (bookid,)) if match: # we have a book with this bookid already if bookname != match['BookName'] or authorname != match['AuthorName']: logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorname, bookname, match['AuthorName'], match['BookName'])) else: logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorname, bookname)) check_status = True duplicates += 1 rejected = 7 if check_status or not rejected or ( lazylibrarian.CONFIG['IMP_IGNORE'] and rejected in [3, 4, 5]): # dates, isbn if not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": booklang, "Status": book_status, "AudioStatus": audio_status, "BookAdded": added } resultcount += 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug("Book found: " + bookname + " " + book['date']) updated = False if 'nocover' in book['img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) updated = True elif book['img'] and book['img'].startswith('http'): link, success, _ = cache_img("book", bookid, book['img'], refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) updated = True else: logger.debug('Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug('Updated series: %s [%s]' % (bookid, serieslist)) updated = True setSeries(serieslist, bookid) new_status = setStatus(bookid, serieslist, bookstatus) if not new_status == book_status: book_status = new_status updated = True worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug("[%s] Added book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) added_count += 1 elif updated: logger.debug("[%s] Updated book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) updated_count += 1 else: book_ignore_count += 1 except KeyError: pass deleteEmptySeries() logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' % (authorname, api_hits, plural(api_hits))) cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?' cmd += ' AND Status != "Ignored" order by BookDate DESC' lastbook = myDB.match(cmd, (authorid,)) if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": entrystatus, "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored))) logger.debug("Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount))) myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info("[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
def processResultList(resultlist, author, title, book): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ', # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) matches = [] # bit of a misnomer now, rss can search both tor and nzb rss feeds for tor in resultlist: torTitle = formatter.latinToAscii(formatter.replace_all(tor['tor_title'], dictrepl)).strip() torTitle = re.sub(r"\s\s+", " ", torTitle) # remove extra whitespace tor_Author_match = fuzz.token_set_ratio(author, torTitle) tor_Title_match = fuzz.token_set_ratio(title, torTitle) logger.debug("RSS Author/Title Match: %s/%s for %s" % (tor_Author_match, tor_Title_match, torTitle)) rejected = False for word in reject_list: if word in torTitle.lower() and not word in author.lower() and not word in book.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (torTitle, word)) break tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = round(float(tor_size_temp) / 1048576, 2) maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0) if maxsize and tor_size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % torTitle) if (tor_Title_match >= match_ratio and tor_Author_match >= match_ratio and not rejected): #logger.debug(u'Found RSS: %s' % tor['tor_title']) bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_url = tor['tor_url'] tor_prov = tor['tor_prov'] tor_feed = tor['tor_feed'] controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } score = (tor_Title_match + tor_Author_match)/2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(formatter.getList(torTitle)) words -= len(formatter.getList(author)) words -= len(formatter.getList(title)) score -= abs(words) matches.append([score, torTitle, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] logger.info(u'Best match RSS (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]).fetchone() if not snatchedbooks: # check if one of the other downloaders got there first tor_url = controlValueDict["NZBurl"] if '.nzb' in tor_url: snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) else: """ # http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398 if not tor_url.startswith('magnet'): # magnets don't use auth pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS'] auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH'] # don't know what form of password hash is required, try sha1 tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd)) """ snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], tor_url) if snatch: notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No RSS found for " + (book["authorName"] + ' ' + book['bookName']).strip()) return False
def DirectDownloadMethod(bookid=None, dl_title=None, dl_url=None, library='eBook'): myDB = database.DBConnection() downloadID = False Source = "DIRECT" logger.debug("Starting Direct Download for [%s]" % dl_title) proxies = proxyList() headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT} try: r = requests.get(dl_url, headers=headers, timeout=90, proxies=proxies) except requests.exceptions.Timeout: logger.warn('Timeout fetching file from url: %s' % dl_url) return False except Exception as e: if hasattr(e, 'reason'): logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, dl_url, e.reason)) else: logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, dl_url, str(e))) return False if not str(r.status_code).startswith('2'): logger.debug("Got a %s response for %s" % (r.status_code, dl_url)) elif len(r.content) < 1000: logger.debug("Only got %s bytes for %s, rejecting" % (len(r.content), dl_title)) else: extn = '' basename = '' if ' ' in dl_title: basename, extn = dl_title.rsplit(' ', 1) # last word is often the extension - but not always... if extn and extn in getList(lazylibrarian.CONFIG['EBOOK_TYPE']): dl_title = '.'.join(dl_title.rsplit(' ', 1)) elif magic: mtype = magic.from_buffer(r.content) if 'EPUB' in mtype: extn = '.epub' elif 'Mobipocket' in mtype: # also true for azw and azw3, does it matter? extn = '.mobi' elif 'PDF' in mtype: extn = '.pdf' else: logger.debug("magic reports %s" % mtype) basename = dl_title else: logger.warn("Don't know the filetype for %s" % dl_title) basename = dl_title logger.debug("File download got %s bytes for %s" % (len(r.content), dl_title)) destdir = os.path.join(lazylibrarian.DIRECTORY('Download'), basename) # destdir = os.path.join(lazylibrarian.DIRECTORY('Download'), '%s LL.(%s)' % (basename, bookid)) if not os.path.isdir(destdir): _ = mymakedirs(destdir) try: hashid = dl_url.split("md5=")[1].split("&")[0] except IndexError: hashid = sha1(encode(dl_url)).hexdigest() destfile = os.path.join(destdir, basename + extn) try: with open(destfile, 'wb') as bookfile: bookfile.write(r.content) setperm(destfile) downloadID = hashid except Exception as e: logger.error("%s writing book to %s, %s" % (type(e).__name__, destfile, e)) if downloadID: logger.debug('File %s has been downloaded from %s' % (dl_title, dl_url)) if library == 'eBook': myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid,)) elif library == 'AudioBook': myDB.action('UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid,)) myDB.action('UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?', (Source, downloadID, dl_url)) return True else: logger.error('Failed to download file @ <a href="%s">%s</a>' % (dl_url, dl_url)) myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (dl_url,)) return False
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss try: threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if mags is None: # backlog search searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title, Regex, LastAcquired, IssueDate from magazines \ WHERE Title="%s" AND Status="Active"' % (magazine['bookid'])) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored logger.debug(u"Removing old magazine search results") myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Regex'] if not searchterm: searchterm = searchmag['Title'] dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} searchterm = unaccented_str(replace_all(searchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm}) if searchlist == []: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] tor_resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers') if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag') if not nproviders: logger.warn('No torrent providers are set. Check config for TORRENT providers') if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders = IterateOverRSSSites(book, 'mag') if not nproviders: logger.warn('No rss providers are set. Check config for RSS providers') if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item['tor_date'], # may be fake date as none returned from rss torrents, only rss nzb 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("Adding magazine %s to queue." % book['searchterm']) else: bad_name = 0 bad_date = 0 old_date = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb['bookid'] nzbtitle = unaccented_str(nzb['nzbtitle']) nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize_temp = check_int(nzbsize_temp, 1000) # not all torrents returned by torznab have a size nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] results = myDB.match('SELECT * from magazines WHERE Title="%s"' % bookid) if not results: logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_name = bad_name + 1 else: rejected = False maxsize = check_int(lazylibrarian.REJECT_MAGSIZE, 0) if maxsize and nzbsize > maxsize: logger.debug("Rejecting %s, too large" % nzbtitle) rejected = True if not rejected: control_date = results['IssueDate'] reject_list = getList(results['Reject']) dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': ''} nzbtitle_formatted = replace_all(nzbtitle, dic).strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this # remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb starts with magazine title followed by a date # eg The MagPI Issue 22 - July 2015 if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz mag_title_match = fuzz.token_set_ratio( unaccented(bookid), unaccented(nzbtitle_formatted)) if mag_title_match < lazylibrarian.MATCH_RATIO: logger.debug( u"Magazine token set Match failed: " + str( mag_title_match) + "% for " + nzbtitle_formatted) rejected = True else: logger.debug( u"Magazine matched: " + str( mag_title_match) + "% " + bookid + " for " + nzbtitle_formatted) else: rejected = True if not rejected: already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % nzburl) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, already_failed['NZBprov'])) rejected = True if not rejected: lower_title = unaccented(nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if not rejected: # some magazine torrent uploaders add their sig in [] or {} # Fortunately for us, they always seem to add it at the end # also some magazine torrent titles are "magazine_name some_form_of_date pdf" # or other words we don't want. Should make the word list configurable. # so strip all the trailing junk... strip_list = ['pdf', 'true', 'truepdf', 'german', 'ebooks'] while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \ nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() in strip_list: nzbtitle_exploded.pop() # gotta love the function names # need at least one word magazine title and two date components if len(nzbtitle_exploded) > 2: # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2] regexA_month = month2num(unaccented(regexA_month_temp)) if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100: regexA_year = 'fail' # force date failure # if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].rstrip(',').zfill(2) if regexA_day.isdigit(): if int(regexA_day) > 31: # probably issue number nn regexA_day = '01' else: regexA_day = '01' # just MonthName YYYY # else: # regexA_day = '01' # monthly, or less frequent try: newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day # try to make sure the year/month/day are valid, exception if not # ie don't accept day > 31, or 30 in some months, or month <1 or >12 # also handles multiple date format named issues eg Jan 2014, 01 2014 # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day)) except ValueError: # regexB = MonthName DD YYYY or MonthName DD, YYYY regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3] regexB_month = month2num(unaccented(regexB_month_temp)) regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].rstrip(',').zfill(2) if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100: regexB_year = 'fail' try: newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day)) except ValueError: # regexC = YYYY MM or YYYY MM DD # (can't get MM/DD if named YYYY Issue nn) # First try YYYY MM regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) regexC_day = '01' else: # try YYYY MM DD regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3] regexC_month = 0 regexC_day = 0 if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) else: regexC_year = 'fail' try: newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day)) except Exception: # regexD Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn try: IssueLabel = nzbtitle_exploded[len(nzbtitle_exploded) - 2] if IssueLabel.lower() in ["issue", "no", "nr", "vol"]: # issue nn regexD_issue = nzbtitle_exploded[len(nzbtitle_exploded) - 1] if regexD_issue.isdigit(): newdatish = str(int(regexD_issue)) # 4 == 04 == 004 else: IssueLabel = nzbtitle_exploded[len(nzbtitle_exploded) - 3] if IssueLabel.lower() in ["issue", "no", "nr", "vol"]: # issue nn, YYYY regexD_issue = nzbtitle_exploded[len(nzbtitle_exploded) - 2] regexD_issue = regexD_issue.strip(',') if regexD_issue.isdigit(): newdatish = str(int(regexD_issue)) # 4 == 04 == 004 else: raise ValueError regexD_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] if regexD_year.isdigit(): if int(regexD_year) < int(datetime.date.today().year): newdatish = 0 # it's old else: raise ValueError except Exception: # regexE nn YYYY issue number without "Nr" before it # nn is assumed not to be a month as they are normally names not digits try: regexE_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexE_issue = nzbtitle_exploded[len(nzbtitle_exploded) - 2] print "[%s][%s]" % (regexE_year, regexE_issue) if regexE_issue.isdigit(): newdatish = int(regexE_issue) if int(regexE_year) < int(datetime.date.today().year): newdatish = 0 # it's old else: raise ValueError except Exception: # regexF issue and year as a single 6 digit string eg 222015 try: regexF = nzbtitle_exploded[len(nzbtitle_exploded) - 1] if regexF.isdigit() and len(regexF) == 6: regexF_issue = regexF[:2] regexF_year = regexF[2:] newdatish = str(int(regexF_issue)) # 4 == 04 == 004 if int(regexF_year) < int(datetime.date.today().year): newdatish = 0 # it's old else: raise ValueError except Exception: logger.debug('Magazine %s not in a recognised date format.' % nzbtitle_formatted) bad_date = bad_date + 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues # continue else: logger.debug('Magazine [%s] does not match the search term [%s].' % ( nzbtitle_formatted, bookid)) bad_name = bad_name + 1 continue # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" insert_table = "pastissues" insert_status = "Skipped" if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # or for magazines with only an issue number, use zero if '-' in str(newdatish): start_time = time.time() start_time -= int(lazylibrarian.MAG_AGE) * 24 * 60 * 60 # number of seconds in days if start_time < 0: # limit of unixtime (1st Jan 1970) start_time = 0 control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) logger.debug('Magazine date comparing to %s' % control_date) else: control_date = 0 if '-' in str(control_date) and '-' in str(newdatish): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare(newdatish, control_date) elif '-' not in str(control_date) and '-' not in str(newdatish): # for issue numbers, check if later than last one we have comp_date = int(newdatish) - int(control_date) newdatish = "%s" % newdatish newdatish = newdatish.zfill(4) # pad so we sort correctly else: # invalid comparison of date and issue number logger.debug('Magazine %s incorrect date or issue format.' % nzbtitle_formatted) bad_date = bad_date + 1 newdatish = "1970-01-01" # this is our fake date for ones we can't decipher comp_date = 0 if comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date = new_date + 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) insert_table = "wanted" insert_status = "Wanted" nzbdate = now() # when we asked for it else: logger.debug('This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date = old_date + 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.select('SELECT * from %s WHERE NZBtitle="%s" and NZBprov="%s"' % ( insert_table, nzbtitle, nzbprov)) if not mag_entry: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": newdatish, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) else: # logger.debug('Magazine [%s] was rejected.' % nzbtitle_formatted) bad_name = bad_name + 1 logger.info('Found %i result%s for %s. %i new, %i old, %i fail date, %i fail name: %i to download' % ( total_nzbs, plural(total_nzbs), bookid, new_date, old_date, bad_date, bad_name, len(maglist))) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod( magazine['bookid'], magazine['nzbprov'], magazine['nzbtitle'], magazine['nzburl']) else: snatch = NZBDownloadMethod( magazine['bookid'], magazine['nzbprov'], magazine['nzbtitle'], magazine['nzburl']) if snatch: logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) notify_snatch("%s from %s at %s" % (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now())) scheduleJob(action='Start', target='processDir') maglist = [] if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete") except Exception as e: logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc())
def find_book_in_db(myDB, author, book): # PAB fuzzy search for book in library, return LL bookid if found or zero # if not, return bookid to more easily update status # prefer an exact match on author & book match = myDB.action( 'SELECT BookID FROM books where AuthorName="%s" and BookName="%s"' % (author, book)).fetchone() if match: logger.debug('Exact match [%s]' % book) return match['BookID'] else: # No exact match # Try a more complex fuzzy match against each book in the db by this author # Using hard-coded ratios for now, ratio high (>90), partial_ratio lower (>75) # These are results that work well on my library, minimal false matches and no misses # on books that should be matched # Maybe make ratios configurable in config.ini later books = myDB.select('SELECT BookID,BookName FROM books where AuthorName="%s"' % author) best_ratio = 0 best_partial = 0 ratio_name = "" partial_name = "" ratio_id = 0 partial_id = 0 for a_book in books: # tidy up everything to raise fuzziness scores # still need to lowercase for matching against partial_name later on book_lower = unaccented(book.lower()) a_book_lower = unaccented(a_book['BookName'].lower()) # ratio = fuzz.ratio(book_lower, a_book_lower) partial = fuzz.partial_ratio(book_lower, a_book_lower) # lose a point for each extra word in the fuzzy matches so we get the closest match words = len(getList(book_lower)) words -= len(getList(a_book_lower)) ratio -= abs(words) partial -= abs(words) if ratio > best_ratio: best_ratio = ratio ratio_name = a_book['BookName'] ratio_id = a_book['BookID'] if partial > best_partial: best_partial = partial partial_name = a_book['BookName'] partial_id = a_book['BookID'] if partial == best_partial: # prefer the match closest to the left, ie prefer starting with a match and ignoring the rest # this eliminates most false matches against omnibuses # find the position of the shortest string in the longest if len(getList(book_lower)) >= len(getList(a_book_lower)): match1 = book_lower.find(a_book_lower) else: match1 = a_book_lower.find(book_lower) if len(getList(book_lower)) >= len(getList(partial_name.lower())): match2 = book_lower.find(partial_name.lower()) else: match2 = partial_name.lower().find(book_lower) if match1 < match2: logger.debug( "Fuzz left change, prefer [%s] over [%s] for [%s]" % (a_book['BookName'], partial_name, book)) best_partial = partial partial_name = a_book['BookName'] partial_id = a_book['BookID'] if best_ratio > 90: logger.debug( "Fuzz match ratio [%d] [%s] [%s]" % (best_ratio, book, ratio_name)) return ratio_id if best_partial > 75: logger.debug( "Fuzz match partial [%d] [%s] [%s]" % (best_partial, book, partial_name)) return partial_id logger.debug( 'Fuzz failed [%s - %s] ratio [%d,%s], partial [%d,%s]' % (author, book, best_ratio, ratio_name, best_partial, partial_name)) return 0
def LibraryScan(startdir=None): """ Scan a directory tree adding new books into database Return how many books you added """ if not startdir: if not lazylibrarian.DESTINATION_DIR: return 0 else: startdir = lazylibrarian.DESTINATION_DIR if not os.path.isdir(startdir): logger.warn( 'Cannot find directory: %s. Not scanning' % startdir) return 0 myDB = database.DBConnection() # keep statistics of full library scans if startdir == lazylibrarian.DESTINATION_DIR: myDB.action('DELETE from stats') logger.info('Scanning ebook directory: %s' % startdir) new_book_count = 0 file_count = 0 author = "" if lazylibrarian.FULL_SCAN and startdir == lazylibrarian.DESTINATION_DIR: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(startdir): for directory in d[:]: # prevent magazine being scanned if directory.startswith("_") or directory.startswith("."): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode(lazylibrarian.SYS_ENCODING) subdirectory = r.replace(startdir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" extn = os.path.splitext(files)[1] # if it's an epub or a mobi we can try to read metadata from it if (extn == ".epub") or (extn == ".mobi"): book_filename = os.path.join( r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING)) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] logger.debug( "file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and is_valid_isbn(isbn): logger.debug( "Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug( "Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip() # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = unaccented(match_name) match_auth = unaccented(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.info( "Adding new author [%s]" % author) try: addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # update cover file to cover.jpg in book folder (if exists) bookdir = book_filename.rsplit(os.sep, 1)[0] coverimg = os.path.join(bookdir, 'cover.jpg') cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache') cacheimg = os.path.join(cachedir, bookid + '.jpg') if os.path.isfile(coverimg): copyfile(coverimg, cacheimg) new_book_count += 1 else: logger.debug( "Failed to match book [%s] by [%s] in database" % (book, author)) logger.info("%s new/modified book%s found and added to the database" % (new_book_count, plural(new_book_count))) logger.info("%s file%s processed" % (file_count, plural(file_count))) # show statistics of full library scans if startdir == lazylibrarian.DESTINATION_DIR: stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats").fetchone() if stats['sum(GR_book_hits)'] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug("GoogleBooks was hit %s time%s for books" % (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)']))) logger.debug("GoogleBooks language was changed %s time%s" % (stats['sum(GB_lang_change)'], plural(stats['sum(GB_lang_change)']))) if lazylibrarian.BOOK_API == "GoodReads": logger.debug("GoodReads was hit %s time%s for books" % (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)']))) logger.debug("GoodReads was hit %s time%s for languages" % (stats['sum(GR_lang_hits)'], plural(stats['sum(GR_lang_hits)']))) logger.debug("LibraryThing was hit %s time%s for languages" % (stats['sum(LT_lang_hits)'], plural (stats['sum(LT_lang_hits)']))) logger.debug("Language cache was hit %s time%s" % (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)']))) logger.debug("Unwanted language removed %s book%s" % (stats['sum(bad_lang)'], plural (stats['sum(bad_lang)']))) logger.debug("Unwanted characters removed %s book%s" % (stats['sum(bad_char)'], plural(stats['sum(bad_char)']))) logger.debug("Unable to cache %s book%s with missing ISBN" % (stats['sum(uncached)'], plural(stats['sum(uncached)']))) logger.debug("Found %s duplicate book%s" % (stats['sum(duplicates)'], plural(stats['sum(duplicates)']))) logger.debug("Cache %s hit%s, %s miss" % (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS)) cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone() logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) nolang = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if nolang: logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang))) authors = myDB.select('select AuthorID from authors') # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr else: # single author/book import authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author) logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors)))) for author in authors: update_totals(author['AuthorID']) images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"') if len(images): logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: bookid = item['bookid'] bookimg = item['bookimg'] bookname = item['bookname'] newimg = cache_cover(bookid, bookimg) if newimg is not None: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"') if len(images): logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: authorid = item['authorid'] authorimg = item['authorimg'] authorname = item['authorname'] newimg = cache_cover(authorid, authorimg) if newimg is not None: myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid)) setWorkPages() logger.info('Library scan complete') return new_book_count
def grsync(status, shelf): # noinspection PyBroadException try: shelf = shelf.lower() logger.info('Syncing %s to %s shelf' % (status, shelf)) myDB = database.DBConnection() cmd = 'select bookid from books where status="%s"' % status if status == 'Open': cmd += ' or status="Have"' results = myDB.select(cmd) ll_list = [] for terms in results: ll_list.append(terms['bookid']) GA = grauth() GR = None shelves = GA.get_shelf_list() found = False for item in shelves: # type: dict if item['name'] == shelf: found = True break if not found: res, msg = GA.create_shelf(shelf=shelf) if not res: logger.debug("Unable to create shelf %s: %s" % (shelf, msg)) return 0, 0 else: logger.debug("Created new goodreads shelf: %s" % shelf) gr_shelf = GA.get_gr_shelf_contents(shelf=shelf) dstatus = status if dstatus == "Open": dstatus += "/Have" logger.info("There are %s %s books, %s books on goodreads %s shelf" % (len(ll_list), dstatus, len(gr_shelf), shelf)) # Sync method for WANTED: # Get results of last_sync (if any) # For each book in last_sync # if not in ll_list, new deletion, remove from gr_shelf # if not in gr_shelf, new deletion, remove from ll_list, mark Skipped # For each book in ll_list # if not in last_sync, new addition, add to gr_shelf # For each book in gr_shelf # if not in last sync, new addition, add to ll_list, mark Wanted # # save ll WANTED as last_sync # For HAVE/OPEN method is the same, but only change status if HAVE, not OPEN cmd = 'select SyncList from sync where UserID="%s" and Label="%s"' % ("goodreads", shelf) res = myDB.match(cmd) last_sync = [] shelf_changed = 0 ll_changed = 0 if res: last_sync = getList(res['SyncList']) added_to_shelf = list(set(gr_shelf) - set(last_sync)) removed_from_shelf = list(set(last_sync) - set(gr_shelf)) added_to_ll = list(set(ll_list) - set(last_sync)) removed_from_ll = list(set(last_sync) - set(ll_list)) logger.info("%s missing from lazylibrarian %s" % (len(removed_from_ll), shelf)) for book in removed_from_ll: # first the deletions since last sync... try: res, content = GA.BookToList(book, shelf, action='remove') except Exception as e: logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s removed from %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content)) logger.info("%s missing from goodreads %s" % (len(removed_from_shelf), shelf)) for book in removed_from_shelf: # deleted from goodreads cmd = 'select Status from books where BookID="%s"' % book res = myDB.match(cmd) if not res: logger.debug('Adding new book %s to database' % book) if not GR: GR = GoodReads(book) GR.find_book(book) res = myDB.match(cmd) if not res: logger.warn('Book %s not found in database' % book) else: if res['Status'] in ['Have', 'Wanted']: myDB.action('UPDATE books SET Status="Skipped" WHERE BookID=?', (book,)) ll_changed += 1 logger.debug("%10s set to Skipped" % book) else: logger.warn("Not removing %s, book is marked %s" % (book, res['Status'])) # new additions to lazylibrarian logger.info("%s new in lazylibrarian %s" % (len(added_to_ll), shelf)) for book in added_to_ll: try: res, content = GA.BookToList(book, shelf, action='add') except Exception as e: logger.error("Error adding %s to %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s added to %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to add %s to %s shelf: %s" % (book, shelf, content)) # new additions to goodreads shelf logger.info("%s new in goodreads %s" % (len(added_to_shelf), shelf)) for book in added_to_shelf: cmd = 'select Status from books where BookID="%s"' % book res = myDB.match(cmd) if not res: logger.debug('Adding new book %s to database' % book) if not GR: GR = GoodReads(book) GR.find_book(book) res = myDB.match(cmd) if not res: logger.warn('Book %s not found in database' % book) else: if status == 'Open': if res['Status'] == 'Open': logger.warn("Book %s is already marked Open" % book) else: myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,)) ll_changed += 1 logger.debug("%10s set to Have" % book) elif status == 'Wanted': # if in "wanted" and already marked "Open/Have", optionally delete from "wanted" # (depending on user prefs, to-read and wanted might not be the same thing) if lazylibrarian.CONFIG['GR_UNIQUE'] and res['Status'] in ['Open', 'Have']: try: res, content = GA.BookToList(book, shelf, action='remove') except Exception as e: logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s removed from %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content)) elif res['Status'] != 'Open': myDB.action('UPDATE books SET Status="Wanted" WHERE BookID=?', (book,)) ll_changed += 1 logger.debug("%10s set to Wanted" % book) else: logger.warn("Not setting %s as Wanted, already marked Open" % book) # get new definitive list from ll cmd = 'select bookid from books where status="%s"' % status if status == 'Open': cmd += ' or status="Have"' results = myDB.select(cmd) ll_list = [] for terms in results: ll_list.append(terms['bookid']) # store as comparison for next sync controlValueDict = {"UserID": "goodreads", "Label": shelf} newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(ll_list)} myDB.upsert("sync", newValueDict, controlValueDict) logger.debug('Sync %s to %s shelf complete' % (status, shelf)) return shelf_changed, ll_changed except Exception: logger.error('Unhandled exception in grsync: %s' % traceback.format_exc()) return 0, 0
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab myDB = database.DBConnection() searchlist = [] threading.currentThread().name = "SEARCHMAGS" if mags is None: # backlog search searchmags = myDB.select('SELECT Title, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title, LastAcquired, IssueDate from magazines \ WHERE Title="%s" AND Status="Active"' % (magazine['bookid'])) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 1: logger.info('Searching for one magazine') else: logger.info('Searching for %i magazines' % len(searchmags)) for searchmag in searchmags: bookid = searchmag[0] searchterm = searchmag[0] # frequency = searchmag[1] # last_acquired = searchmag[2] # issue_date = searchmag[3] dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic)) searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchlist.append({"bookid": bookid, "searchterm": searchterm}) if searchlist == []: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] tor_resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'mag') if not nproviders: logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers') if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = providers.IterateOverTorrentSites(book, 'mag') if not nproviders: logger.warn('No torrent providers are set. Check config for TORRENT providers') for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if not resultlist: logger.debug("Adding magazine %s to queue." % book['searchterm']) else: bad_regex = 0 bad_date = 0 old_date = 0 total_nzbs = 0 new_date = 0 to_snatch = 0 maglist = [] issues = [] reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for nzb in resultlist: total_nzbs = total_nzbs + 1 bookid = nzb['bookid'] nzbtitle = (u'%s' % nzb['nzbtitle']) nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] if nzbsize_temp is None: # not all torrents returned by torznab have a size nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] checkifmag = myDB.select('SELECT * from magazines WHERE Title="%s"' % bookid) if checkifmag: for results in checkifmag: control_date = results['IssueDate'] # frequency = results['Frequency'] # regex = results['Regex'] nzbtitle_formatted = nzbtitle.replace('.', ' ').replace('-', ' ').replace('/', ' ').replace( '+', ' ').replace('_', ' ').replace('(', '').replace(')', '').strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this # keyword_check = nzbtitle_formatted.replace(bookid, '') # remove extra spaces if they're in a row nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split(' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb starts with magazine title, and ends with a date # eg The MagPI Issue 22 - July 2015 # do something like check left n words match title # then check last n words are a date name_match = 1 # assume name matches for now if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz mag_title_match = fuzz.token_set_ratio(common.remove_accents(bookid), common.remove_accents(nzbtitle_formatted)) if mag_title_match < lazylibrarian.MATCH_RATIO: logger.debug(u"Magazine token set Match failed: " + str(mag_title_match) + "% for " + nzbtitle_formatted) name_match = 0 lower_title = common.remove_accents(nzbtitle_formatted).lower() lower_bookid = common.remove_accents(bookid).lower() for word in reject_list: if word in lower_title and not word in lower_bookid: name_match = 0 logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if name_match: # some magazine torrent uploaders add their sig in [] or {} # Fortunately for us, they always seem to add it at the end # also some magazine torrent titles are "magazine_name some_form_of_date pdf" # so strip all the trailing junk... while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \ nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() == 'pdf': nzbtitle_exploded.pop() # gotta love the function names # need at least one word magazine title and two date components if len(nzbtitle_exploded) > 2: # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn MonthName YYYY regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2] regexA_month = formatter.month2num(common.remove_accents(regexA_month_temp)) if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100: regexA_year = 'fail' # force date failure #if frequency == "Weekly" or frequency == "BiWeekly": regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].zfill(2) if regexA_day.isdigit(): if int(regexA_day) > 31: # probably issue number nn regexA_day = '01' else: regexA_day = '01' # just MonthName YYYY #else: # regexA_day = '01' # monthly, or less frequent try: newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day # try to make sure the year/month/day are valid, exception if not # ie don't accept day > 31, or 30 in some months # also handles multiple date format named issues eg Jan 2014, 01 2014 # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day)) except ValueError: # regexB = MonthName DD YYYY regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1] regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3] regexB_month = formatter.month2num(common.remove_accents(regexB_month_temp)) regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100: regexB_year = 'fail' try: newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day)) except ValueError: # regexC = YYYY MM or YYYY MM DD # (can't get MM/DD if named YYYY Issue nn) # First try YYYY MM regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) regexC_day = '01' else: # try YYYY MM DD regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3] if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100: regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2) regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2) else: regexC_year = 'fail' try: newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day # datetime will give a ValueError if not a good date or a param is not int date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day)) except: logger.debug('Magazine %s not in proper date format.' % nzbtitle_formatted) bad_date = bad_date + 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues # continue else: continue if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc start_time = time.time() start_time -= 31 * 24 * 60 * 60 # number of seconds in 31 days control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = formatter.datecompare(newdatish, control_date) if comp_date > 0: # Should probably only upsert when downloaded and processed in case snatch fails # keep track of what we're going to download so we don't download dupes new_date = new_date + 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) to_snatch = to_snatch + 1 issues.append(issue) controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBtitle": nzbtitle, "AuxInfo": newdatish, "Status": "Wanted", "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert("wanted", newValueDict, controlValueDict) else: logger.debug('This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date = old_date + 1 else: logger.debug('Magazine [%s] does not completely match search term [%s].' % ( nzbtitle_formatted, bookid)) bad_regex = bad_regex + 1 logger.info('Found %s results for %s. %s are new, %s are old, %s fail date, %s fail name matching' % ( total_nzbs, bookid, new_date, old_date, bad_date, bad_regex)) logger.info("%s, %s issues to download" % (bookid, to_snatch)) for items in maglist: if items['nzbmode'] == "torznab": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) elif items['nzbmode'] == "torrent": snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) else: snatch = NZBDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl']) if snatch: notifiers.notify_snatch(formatter.latinToAscii(items['nzbtitle']) + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') maglist = [] if reset == True: common.schedule_job(action='Restart', target='search_magazines') logger.info("Search for magazines complete")
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = getList(lazylibrarian.REJECT_WORDS) author = unaccented_str(replace_all(book['authorName'], dic)) title = unaccented_str(replace_all(book['bookName'], dic)) matches = [] for tor in resultlist: torTitle = unaccented_str(tor['tor_title']) torTitle = replace_all(torTitle, dictrepl).strip() torTitle = re.sub(r"\s\s+", " ", torTitle) # remove extra whitespace torAuthor_match = fuzz.token_set_ratio(author, torTitle) torBook_match = fuzz.token_set_ratio(title, torTitle) logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle)) tor_url = tor['tor_url'] rejected = False already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % tor_url) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov'])) rejected = True if not rejected: for word in reject_list: if word in torTitle.lower() and word not in author.lower() and word not in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (torTitle, word)) break tor_size_temp = tor['tor_size'] # Need to cater for when this is NONE (Issue 35) tor_size_temp = check_int(tor_size_temp, 1000) tor_size = round(float(tor_size_temp) / 1048576, 2) maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0) if not rejected: if maxsize and tor_size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % torTitle) if not rejected: bookid = book['bookid'] tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor['tor_prov'], "BookID": bookid, "NZBdate": now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } score = (torBook_match + torAuthor_match) / 2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(getList(torTitle)) words -= len(getList(author)) words -= len(getList(title)) score -= abs(words) matches.append([score, torTitle, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] if score < match_ratio: logger.info(u'Nearest TOR match (%s%%): %s using %s search for %s %s' % (score, nzb_Title, searchtype, author, title)) return False logger.info(u'Best TOR match (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) snatchedbooks = myDB.match('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]) if snatchedbooks: logger.debug('%s already marked snatched' % nzb_Title) return True # someone else found it, not us else: myDB.upsert("wanted", newValueDict, controlValueDict) if newValueDict["NZBprov"] == 'libgen': # for libgen we use direct download links snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], nzb_Title) else: snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) if snatch: logger.info('Downloading %s from %s' % (newValueDict["NZBtitle"], newValueDict["NZBprov"])) notify_snatch("%s from %s at %s" % (newValueDict["NZBtitle"], newValueDict["NZBprov"], now())) scheduleJob(action='Start', target='processDir') return True + True # we found it else: logger.debug("No torrent's found for [%s] using searchtype %s" % (book["searchterm"], searchtype)) return False
def syncCalibreList(col_read=None, col_toread=None, userid=None): """ Get the lazylibrarian bookid for each read/toread calibre book so we can map our id to theirs, and sync current/supplied user's read/toread or supplied read/toread columns to calibre database. Return message giving totals """ myDB = database.DBConnection() username = '' readlist = [] toreadlist = [] if not userid: cookie = cherrypy.request.cookie if cookie and 'll_uid' in list(cookie.keys()): userid = cookie['ll_uid'].value if userid: res = myDB.match('SELECT UserName,ToRead,HaveRead,CalibreRead,CalibreToRead,Perms from users where UserID=?', (userid,)) if res: username = res['UserName'] if not col_read: col_read = res['CalibreRead'] if not col_toread: col_toread = res['CalibreToRead'] toreadlist = getList(res['ToRead']) readlist = getList(res['HaveRead']) # suppress duplicates (just in case) toreadlist = list(set(toreadlist)) readlist = list(set(readlist)) else: return "Error: Unable to get user column settings for %s" % userid if not userid: return "Error: Unable to find current userid" if not col_read and not col_toread: return "User %s has no calibre columns set" % username # check user columns exist in calibre and create if not res = calibredb('custom_columns') columns = res[0].split('\n') custom_columns = [] for column in columns: if column: custom_columns.append(column.split(' (')[0]) if col_read not in custom_columns: added = calibredb('add_custom_column', [col_read, col_read, 'bool']) if "column created" not in added[0]: return added if col_toread not in custom_columns: added = calibredb('add_custom_column', [col_toread, col_toread, 'bool']) if "column created" not in added[0]: return added nomatch = 0 readcol = '' toreadcol = '' map_ctol = {} map_ltoc = {} if col_read: readcol = '*' + col_read if col_toread: toreadcol = '*' + col_toread calibre_list = calibreList(col_read, col_toread) if not isinstance(calibre_list, list): # got an error message from calibredb return '"%s"' % calibre_list for item in calibre_list: if toreadcol and toreadcol in item or readcol and readcol in item: authorname, authorid, added = addAuthorNameToDB(item['authors'], refresh=False, addbooks=False) if authorname: if authorname != item['authors']: logger.debug("Changed authorname for [%s] from [%s] to [%s]" % (item['title'], item['authors'], authorname)) item['authors'] = authorname bookid, mtype = find_book_in_db(authorname, item['title'], ignored=False, library='eBook') if bookid and mtype == "Ignored": logger.warn("Book %s by %s is marked Ignored in database, importing anyway" % (item['title'], authorname)) if not bookid: searchterm = "%s <ll> %s" % (item['title'], authorname) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']: logger.debug("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) bookid = result['bookid'] import_book(bookid) if bookid: # NOTE: calibre bookid is always an integer, lazylibrarian bookid is a string # (goodreads could be used as an int, but googlebooks can't as it's alphanumeric) # so convert all dict items to strings for ease of matching. map_ctol[str(item['id'])] = str(bookid) map_ltoc[str(bookid)] = str(item['id']) else: logger.warn('Calibre Book [%s] by [%s] is not in lazylibrarian database' % (item['title'], authorname)) nomatch += 1 else: logger.warn('Calibre Author [%s] not matched in lazylibrarian database' % (item['authors'])) nomatch += 1 # Now check current users lazylibrarian read/toread against the calibre library, warn about missing ones # which might be books calibre doesn't have, or might be minor differences in author or title for idlist in [("Read", readlist), ("To_Read", toreadlist)]: booklist = idlist[1] for bookid in booklist: cmd = "SELECT AuthorID,BookName from books where BookID=?" book = myDB.match(cmd, (bookid,)) if not book: logger.error('Error finding bookid %s' % bookid) else: cmd = "SELECT AuthorName from authors where AuthorID=?" author = myDB.match(cmd, (book['AuthorID'],)) if not author: logger.error('Error finding authorid %s' % book['AuthorID']) else: match = False high = 0 highname = '' for item in calibre_list: if item['authors'] == author['AuthorName'] and item['title'] == book['BookName']: logger.debug("Exact match for %s [%s]" % (idlist[0], book['BookName'])) map_ctol[str(item['id'])] = str(bookid) map_ltoc[str(bookid)] = str(item['id']) match = True break if not match: highid = '' for item in calibre_list: if item['authors'] == author['AuthorName']: n = fuzz.token_sort_ratio(item['title'], book['BookName']) if n > high: high = n highname = item['title'] highid = item['id'] if high > 95: logger.debug("Found ratio match %s%% [%s] for %s [%s]" % (high, highname, idlist[0], book['BookName'])) map_ctol[str(highid)] = str(bookid) map_ltoc[str(bookid)] = str(highid) match = True if not match: logger.warn("No match for %s %s by %s in calibre database, closest match %s%% [%s]" % (idlist[0], book['BookName'], author['AuthorName'], high, highname)) nomatch += 1 logger.debug("BookID mapping complete, %s match %s, nomatch %s" % (username, len(map_ctol), nomatch)) # now sync the lists if not userid: msg = "No userid found" else: last_read = [] last_toread = [] calibre_read = [] calibre_toread = [] cmd = 'select SyncList from sync where UserID=? and Label=?' res = myDB.match(cmd, (userid, col_read)) if res: last_read = getList(res['SyncList']) res = myDB.match(cmd, (userid, col_toread)) if res: last_toread = getList(res['SyncList']) for item in calibre_list: if toreadcol and toreadcol in item and item[toreadcol]: # only if True if str(item['id']) in map_ctol: calibre_toread.append(map_ctol[str(item['id'])]) else: logger.warn("Calibre to_read book %s:%s has no lazylibrarian bookid" % (item['authors'], item['title'])) if readcol and readcol in item and item[readcol]: # only if True if str(item['id']) in map_ctol: calibre_read.append(map_ctol[str(item['id'])]) else: logger.warn("Calibre read book %s:%s has no lazylibrarian bookid" % (item['authors'], item['title'])) logger.debug("Found %s calibre read, %s calibre toread" % (len(calibre_read), len(calibre_toread))) logger.debug("Found %s lazylib read, %s lazylib toread" % (len(readlist), len(toreadlist))) added_to_ll_toread = list(set(toreadlist) - set(last_toread)) removed_from_ll_toread = list(set(last_toread) - set(toreadlist)) added_to_ll_read = list(set(readlist) - set(last_read)) removed_from_ll_read = list(set(last_read) - set(readlist)) logger.debug("lazylibrarian changes to copy to calibre: %s %s %s %s" % (len(added_to_ll_toread), len(removed_from_ll_toread), len(added_to_ll_read), len(removed_from_ll_read))) added_to_calibre_toread = list(set(calibre_toread) - set(last_toread)) removed_from_calibre_toread = list(set(last_toread) - set(calibre_toread)) added_to_calibre_read = list(set(calibre_read) - set(last_read)) removed_from_calibre_read = list(set(last_read) - set(calibre_read)) logger.debug("calibre changes to copy to lazylibrarian: %s %s %s %s" % (len(added_to_calibre_toread), len(removed_from_calibre_toread), len(added_to_calibre_read), len(removed_from_calibre_read))) calibre_changes = 0 for item in added_to_calibre_read: if item not in readlist: readlist.append(item) logger.debug("Lazylibrarian marked %s as read" % item) calibre_changes += 1 for item in added_to_calibre_toread: if item not in toreadlist: toreadlist.append(item) logger.debug("Lazylibrarian marked %s as to_read" % item) calibre_changes += 1 for item in removed_from_calibre_read: if item in readlist: readlist.remove(item) logger.debug("Lazylibrarian removed %s from read" % item) calibre_changes += 1 for item in removed_from_calibre_toread: if item in toreadlist: toreadlist.remove(item) logger.debug("Lazylibrarian removed %s from to_read" % item) calibre_changes += 1 if calibre_changes: myDB.action('UPDATE users SET ToRead=?,HaveRead=? WHERE UserID=?', (', '.join(toreadlist), ', '.join(readlist), userid)) ll_changes = 0 for item in added_to_ll_toread: if item in map_ltoc: res, err, rc = calibredb('set_custom', [col_toread, map_ltoc[item], 'true'], []) if rc: msg = "calibredb set_custom error: " if err: logger.error(msg + err) elif res: logger.error(msg + res) else: logger.error(msg + str(rc)) else: ll_changes += 1 else: logger.warn("Unable to set calibre %s true for %s" % (col_toread, item)) for item in removed_from_ll_toread: if item in map_ltoc: res, err, rc = calibredb('set_custom', [col_toread, map_ltoc[item], ''], []) if rc: msg = "calibredb set_custom error: " if err: logger.error(msg + err) elif res: logger.error(msg + res) else: logger.error(msg + str(rc)) else: ll_changes += 1 else: logger.warn("Unable to clear calibre %s for %s" % (col_toread, item)) for item in added_to_ll_read: if item in map_ltoc: res, err, rc = calibredb('set_custom', [col_read, map_ltoc[item], 'true'], []) if rc: msg = "calibredb set_custom error: " if err: logger.error(msg + err) elif res: logger.error(msg + res) else: logger.error(msg + str(rc)) else: ll_changes += 1 else: logger.warn("Unable to set calibre %s true for %s" % (col_read, item)) for item in removed_from_ll_read: if item in map_ltoc: res, err, rc = calibredb('set_custom', [col_read, map_ltoc[item], ''], []) if rc: msg = "calibredb set_custom error: " if err: logger.error(msg + err) elif res: logger.error(msg + res) else: logger.error(msg + str(rc)) else: ll_changes += 1 else: logger.warn("Unable to clear calibre %s for %s" % (col_read, item)) # store current sync list as comparison for next sync controlValueDict = {"UserID": userid, "Label": col_read} newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(readlist)} myDB.upsert("sync", newValueDict, controlValueDict) controlValueDict = {"UserID": userid, "Label": col_toread} newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(toreadlist)} myDB.upsert("sync", newValueDict, controlValueDict) msg = "%s sync updated: %s calibre, %s lazylibrarian" % (username, ll_changes, calibre_changes) return msg
def searchItem(item=None, bookid=None, cat=None): """ Call all active search providers to search for item return a list of results, each entry in list containing percentage_match, title, provider, size, url item = searchterm to use for general search bookid = link to data for book/audio searches cat = category to search [general, book, audio] """ results = [] if not item: return results book = {} searchterm = unaccented_str(item) book['searchterm'] = searchterm if bookid: book['bookid'] = bookid else: book['bookid'] = searchterm if cat in ['book', 'audio']: myDB = database.DBConnection() cmd = 'SELECT authorName,bookName,bookSub from books,authors WHERE books.AuthorID=authors.AuthorID' cmd += ' and bookID=?' match = myDB.match(cmd, (bookid,)) if match: book['authorName'] = match['authorName'] book['bookName'] = match['bookName'] book['bookSub'] = match['bookSub'] else: logger.debug('Forcing general search') cat = 'general' nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR() + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT() logger.debug('Searching %s provider%s (%s) for %s' % (nprov, plural(nprov), cat, searchterm)) if lazylibrarian.USE_NZB(): resultlist, nprov = IterateOverNewzNabSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_TOR(): resultlist, nprov = IterateOverTorrentSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_DIRECT(): resultlist, nprov = IterateOverDirectSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_RSS(): resultlist, nprov = IterateOverRSSSites() if nprov: results += resultlist # reprocess to get consistent results searchresults = [] for item in results: provider = '' title = '' url = '' size = '' date = '' mode = '' if 'nzbtitle' in item: title = item['nzbtitle'] if 'nzburl' in item: url = item['nzburl'] if 'nzbprov' in item: provider = item['nzbprov'] if 'nzbsize' in item: size = item['nzbsize'] if 'nzbdate' in item: date = item['nzbdate'] if 'nzbmode' in item: mode = item['nzbmode'] if 'tor_title' in item: title = item['tor_title'] if 'tor_url' in item: url = item['tor_url'] if 'tor_prov' in item: provider = item['tor_prov'] if 'tor_size' in item: size = item['tor_size'] if 'tor_date' in item: date = item['tor_date'] if 'tor_type' in item: mode = item['tor_type'] if title and provider and mode and url: # Not all results have a date or a size if not date: date = 'Fri, 01 Jan 1970 00:00:00 +0100' if not size: size = '1000' # calculate match percentage - torrents might have words_with_underscore_separator score = fuzz.token_set_ratio(searchterm, title.replace('_', ' ')) # lose a point for each extra word in the title so we get the closest match words = len(getList(searchterm)) words -= len(getList(title)) score -= abs(words) if score >= 40: # ignore wildly wrong results? result = {'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date, 'url': quote_plus(url), 'mode': mode} searchresults.append(result) # from operator import itemgetter # searchresults = sorted(searchresults, key=itemgetter('score'), reverse=True) logger.debug('Found %s %s results for %s' % (len(searchresults), cat, searchterm)) return searchresults
def TORDownloadMethod(bookid=None, tor_title=None, tor_url=None, library='eBook'): myDB = database.DBConnection() downloadID = False Source = '' full_url = tor_url # keep the url as stored in "wanted" table if tor_url and tor_url.startswith('magnet'): torrent = tor_url # allow magnet link to write to blackhole and hash to utorrent/rtorrent else: # h = HTMLParser() # tor_url = h.unescape(tor_url) # HTMLParser is probably overkill, we only seem to get & # tor_url = tor_url.replace('&', '&') if '&file=' in tor_url: # torznab results need to be re-encoded # had a problem with torznab utf-8 encoded strings not matching # our utf-8 strings because of long/short form differences url, value = tor_url.split('&file=', 1) value = makeUnicode(value) # ensure unicode value = unicodedata.normalize('NFC', value) # normalize to short form value = value.encode('unicode-escape') # then escape the result value = value.replace(' ', '%20') # and encode any spaces tor_url = url + '&file=' + value # strip url back to the .torrent as some sites add parameters if not tor_url.endswith('.torrent'): if '.torrent' in tor_url: tor_url = tor_url.split('.torrent')[0] + '.torrent' headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT} proxies = proxyList() try: r = requests.get(tor_url, headers=headers, timeout=90, proxies=proxies) except requests.exceptions.Timeout: logger.warn('Timeout fetching file from url: %s' % tor_url) return False except Exception as e: if hasattr(e, 'reason'): logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, e.reason)) else: logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, str(e))) return False torrent = r.content if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']: Source = "BLACKHOLE" logger.debug("Sending %s to blackhole" % tor_title) tor_name = cleanName(tor_title).replace(' ', '_') if tor_url and tor_url.startswith('magnet'): if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']: hashid = CalcTorrentHash(tor_url) tor_name = 'meta-' + hashid + '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) result = magnet2torrent(tor_url, tor_path) if result is not False: logger.debug('Magnet file saved as: %s' % tor_path) downloadID = Source else: tor_name += '.magnet' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) msg = '' try: msg = 'Opening ' with open(tor_path, 'wb') as torrent_file: msg += 'Writing ' if isinstance(torrent, unicode): torrent = torrent.encode('iso-8859-1') torrent_file.write(torrent) msg += 'SettingPerm' setperm(tor_path) msg += 'Saved' logger.debug('Magnet file saved: %s' % tor_path) downloadID = Source except Exception as e: logger.debug("Failed to write magnet to file: %s %s" % (type(e).__name__, str(e))) logger.debug("Progress: %s" % msg) logger.debug("Filename [%s]" % (repr(tor_path))) return False else: tor_name += '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) msg = '' try: msg = 'Opening ' with open(tor_path, 'wb') as torrent_file: msg += 'Writing ' if isinstance(torrent, unicode): torrent = torrent.encode('iso-8859-1') torrent_file.write(torrent) msg += 'SettingPerm ' setperm(tor_path) msg += 'Saved' logger.debug('Torrent file saved: %s' % tor_name) downloadID = Source except Exception as e: logger.debug("Failed to write torrent to file: %s %s" % (type(e).__name__, str(e))) logger.debug("Progress: %s" % msg) logger.debug("Filename [%s]" % (repr(tor_path))) return False if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG[ 'UTORRENT_HOST']: logger.debug("Sending %s to Utorrent" % tor_title) Source = "UTORRENT" hashid = CalcTorrentHash(torrent) downloadID = utorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = utorrent.nameTorrent(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG[ 'RTORRENT_HOST']: logger.debug("Sending %s to rTorrent" % tor_title) Source = "RTORRENT" hashid = CalcTorrentHash(torrent) downloadID = rtorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = rtorrent.getName(downloadID) if lazylibrarian.CONFIG[ 'TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG[ 'QBITTORRENT_HOST']: logger.debug("Sending %s to qbittorrent" % tor_title) Source = "QBITTORRENT" hashid = CalcTorrentHash(torrent) status = qbittorrent.addTorrent(tor_url, hashid) # returns True or False if status: downloadID = hashid tor_title = qbittorrent.getName(hashid) if lazylibrarian.CONFIG[ 'TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG[ 'TRANSMISSION_HOST']: logger.debug("Sending %s to Transmission" % tor_title) Source = "TRANSMISSION" downloadID = transmission.addTorrent(tor_url) # returns id or False if downloadID: # transmission returns it's own int, but we store hashid instead downloadID = CalcTorrentHash(torrent) tor_title = transmission.getTorrentFolder(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \ lazylibrarian.CONFIG['SYNOLOGY_HOST']: logger.debug("Sending %s to Synology" % tor_title) Source = "SYNOLOGY_TOR" downloadID = synology.addTorrent(tor_url) # returns id or False if downloadID: tor_title = synology.getName(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG[ 'DELUGE_HOST']: logger.debug("Sending %s to Deluge" % tor_title) if not lazylibrarian.CONFIG['DELUGE_USER']: # no username, talk to the webui Source = "DELUGEWEBUI" downloadID = deluge.addTorrent(tor_url) # returns hash or False if downloadID: tor_title = deluge.getTorrentFolder(downloadID) else: # have username, talk to the daemon Source = "DELUGERPC" client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'], lazylibrarian.CONFIG['DELUGE_URL_BASE'], int(lazylibrarian.CONFIG['DELUGE_PORT']), lazylibrarian.CONFIG['DELUGE_USER'], lazylibrarian.CONFIG['DELUGE_PASS']) try: client.connect() args = {"name": tor_title} if tor_url.startswith('magnet'): downloadID = client.call('core.add_torrent_magnet', tor_url, args) else: downloadID = client.call('core.add_torrent_url', tor_url, args) if downloadID: if lazylibrarian.CONFIG['DELUGE_LABEL']: _ = client.call('label.set_torrent', downloadID, lazylibrarian.CONFIG['DELUGE_LABEL']) result = client.call('core.get_torrent_status', downloadID, {}) # for item in result: # logger.debug ('Deluge RPC result %s: %s' % (item, result[item])) if 'name' in result: tor_title = result['name'] except Exception as e: logger.debug('DelugeRPC failed %s %s' % (type(e).__name__, str(e))) return False if not Source: logger.warn('No torrent download method is enabled, check config.') return False if downloadID: if tor_title: if downloadID.upper() in tor_title.upper(): logger.warn( '%s: name contains hash, probably unresolved magnet' % Source) else: tor_title = unaccented_str(tor_title) # need to check against reject words list again as the name may have changed # library = magazine eBook AudioBook to determine which reject list # but we can't easily do the per-magazine rejects if library == 'magazine': reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS']) elif library == 'eBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS']) elif library == 'AudioBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO']) else: logger.debug("Invalid library [%s] in TORDownloadMethod" % library) reject_list = [] rejected = False lower_title = tor_title.lower() for word in reject_list: if word in lower_title: rejected = True logger.debug("Rejecting torrent name %s, contains %s" % (tor_title, word)) break if rejected: myDB.action( 'UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url, )) delete_task(Source, downloadID, True) return False else: logger.debug('%s setting torrent name to [%s]' % (Source, tor_title)) myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?', (tor_title, full_url)) if library == 'eBook': myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid, )) elif library == 'AudioBook': myDB.action( 'UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid, )) myDB.action( 'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?', (Source, downloadID, full_url)) return True logger.error('Failed to download torrent from %s, %s' % (Source, tor_url)) myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url, )) return False
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if mags is None: threading.currentThread().name = "SEARCHALLMAG" else: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if mags is None: # backlog search searchmags = myDB.select('SELECT Title, Regex, DateType, LastAcquired, \ IssueDate from magazines WHERE Status="Active"') else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select('SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \ WHERE Title=? AND Status="Active"', (magazine['bookid'],)) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: threading.currentThread().name = "WEBSERVER" return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored # logger.debug("Removing old magazine search results") # myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Regex'] datetype = searchmag['DateType'] if not datetype: datetype = '' if not searchterm: dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''} # strip accents from the magazine title for easier name-matching searchterm = unaccented_str(searchmag['Title']) if not searchterm: # unless there are no ascii characters left searchterm = searchmag['Title'] searchterm = replace_all(searchterm, dic) searchterm = re.sub('[.\-/]', ' ', searchterm) if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm, "datetype": datetype}) if not searchlist: logger.warn('There is nothing to search for. Mark some magazines as active.') for book in searchlist: resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn('No nzb providers are available. Check config and blocklist') lazylibrarian.NO_NZB_MSG = timenow if lazylibrarian.USE_DIRECT(): dir_resultlist, nproviders = IterateOverDirectSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn('No direct providers are available. Check config and blocklist') lazylibrarian.NO_DIRECT_MSG = timenow if dir_resultlist: for item in dir_resultlist: # reformat the results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn('No tor providers are available. Check config and blocklist') lazylibrarian.NO_TOR_MSG = timenow if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders = IterateOverRSSSites() if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow: logger.warn('No rss providers are available. Check config and blocklist') lazylibrarian.NO_RSS_MSG = timenow if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item['tor_date'], # may be fake date as none returned from rss torrents, only rss nzb 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("No results for magazine %s" % book['searchterm']) else: bad_name = 0 bad_date = 0 old_date = 0 rejects = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] bookid = '' for nzb in resultlist: total_nzbs += 1 bookid = nzb['bookid'] # strip accents from the magazine title for easier name-matching nzbtitle = unaccented_str(nzb['nzbtitle']) if not nzbtitle: # unless it's not a latin-1 encodable name nzbtitle = nzb['nzbtitle'] nzbtitle = nzbtitle.replace('"', '').replace("'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize_temp = check_int(nzbsize_temp, 1000) # not all torrents returned by torznab have a size nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) so split into "words" dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ', '#': '# '} nzbtitle_formatted = replace_all(nzbtitle, dic).strip() # remove extra spaces if they're in a row nzbtitle_formatted = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_formatted.split(' ') results = myDB.match('SELECT * from magazines WHERE Title=?', (bookid,)) if not results: logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_name += 1 else: rejected = False maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0) if maxsize and nzbsize > maxsize: logger.debug("Rejecting %s, too large" % nzbtitle) rejected = True if not rejected: minsize = check_int(lazylibrarian.CONFIG['REJECT_MAGMIN'], 0) if minsize and nzbsize < minsize: logger.debug("Rejecting %s, too small" % nzbtitle) rejected = True if not rejected: if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # Check nzb has magazine title and a date/issue nr # eg The MagPI July 2015 if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check all the words in the mag title are in the nzbtitle rejected = False wlist = [] for word in nzbtitle_exploded: word = unaccented(word).lower() if word: wlist.append(word) for word in bookid_exploded: word = unaccented(word).lower() if word and word not in wlist: logger.debug("Rejecting %s, missing %s" % (nzbtitle, word)) rejected = True break if rejected: logger.debug( "Magazine title match failed " + bookid + " for " + nzbtitle_formatted) else: logger.debug( "Magazine title matched " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded)) rejected = True if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']: blocked = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl,)) if blocked: logger.debug("Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']: blocked = myDB.match('SELECT * from wanted WHERE NZBurl=?', (nzburl,)) if blocked: logger.debug("Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected: reject_list = getList(str(results['Reject']).lower()) reject_list += getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',') lower_title = unaccented(nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() if reject_list: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('Reject: %s' % str(reject_list)) logger.debug('Title: %s' % lower_title) logger.debug('Bookid: %s' % lower_bookid) for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if rejected: rejects += 1 else: regex_pass, issuedate, year = get_issue_date(nzbtitle_exploded) if regex_pass: logger.debug('Issue %s (regex %s) for %s ' % (issuedate, regex_pass, nzbtitle_formatted)) datetype_ok = True datetype = book['datetype'] if datetype: # check all wanted parts are in the regex result # Day Month Year Vol Iss (MM needs two months) if 'M' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 12]: datetype_ok = False elif 'D' in datetype and regex_pass not in [3, 5, 6]: datetype_ok = False elif 'MM' in datetype and regex_pass not in [1]: # bi monthly datetype_ok = False elif 'V' in datetype and 'I' in datetype and regex_pass not in [8, 9, 17, 18]: datetype_ok = False elif 'V' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 17, 18]: datetype_ok = False elif 'I' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 16, 17, 18]: datetype_ok = False elif 'Y' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 15, 16, 18]: datetype_ok = False else: datetype_ok = False logger.debug('Magazine %s not in a recognised date format.' % nzbtitle_formatted) bad_date += 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers issuedate = "1970-01-01" # provide a fake date for bad-date issues # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" or "Have" insert_table = "pastissues" comp_date = 0 if datetype_ok: control_date = results['IssueDate'] logger.debug("Control date: [%s]" % control_date) if not control_date: # we haven't got any copies of this magazine yet # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # For magazines with only an issue number use zero as we can't tell age if str(issuedate).isdigit(): logger.debug('Magazine comparing issue numbers (%s)' % issuedate) control_date = 0 elif re.match('\d+-\d\d-\d\d', str(issuedate)): start_time = time.time() start_time -= int( lazylibrarian.CONFIG['MAG_AGE']) * 24 * 60 * 60 # number of seconds in days if start_time < 0: # limit of unixtime (1st Jan 1970) start_time = 0 control_date = time.strftime("%Y-%m-%d", time.localtime(start_time)) logger.debug('Magazine date comparing to %s' % control_date) else: logger.debug('Magazine unable to find comparison type [%s]' % issuedate) control_date = 0 if str(control_date).isdigit() and str(issuedate).isdigit(): # for issue numbers, check if later than last one we have if regex_pass in [10, 12, 13] and year: issuedate = "%s%04d" % (year, int(issuedate)) else: issuedate = str(issuedate).zfill(4) if not control_date: comp_date = 1 else: comp_date = int(issuedate) - int(control_date) elif re.match('\d+-\d\d-\d\d', str(control_date)) and \ re.match('\d+-\d\d-\d\d', str(issuedate)): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare(issuedate, control_date) else: # invalid comparison of date and issue number comp_date = 0 if re.match('\d+-\d\d-\d\d', str(control_date)): if regex_pass > 9 and year: # we assumed it was an issue number, but it could be a date year = check_int(year, 0) if regex_pass in [10, 12, 13]: issuedate = int(issuedate[:4]) issuenum = check_int(issuedate, 0) if year and 1 <= issuenum <= 12: issuedate = "%04d-%02d-01" % (year, issuenum) comp_date = datecompare(issuedate, control_date) if not comp_date: logger.debug('Magazine %s failed: Expecting a date' % nzbtitle_formatted) else: logger.debug('Magazine %s failed: Expecting issue number' % nzbtitle_formatted) if not comp_date: bad_date += 1 issuedate = "1970-01-01" if issuedate == "1970-01-01": logger.debug('This issue of %s is unknown age; skipping.' % nzbtitle_formatted) elif not datetype_ok: logger.debug('This issue of %s not in a wanted date format.' % nzbtitle_formatted) elif comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date += 1 issue = bookid + ',' + issuedate if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) logger.debug('Magazine request number %s' % len(issues)) if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug(str(issues)) insert_table = "wanted" nzbdate = now() # when we asked for it else: logger.debug('This issue of %s is already flagged for download' % issue) else: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted) old_date += 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.match('SELECT Status from %s WHERE NZBtitle=? and NZBprov=?' % insert_table, (nzbtitle, nzbprov)) if mag_entry: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('%s is already in %s marked %s' % (nzbtitle, insert_table, mag_entry['Status'])) else: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } if insert_table == 'pastissues': # try to mark ones we've already got match = myDB.match("SELECT * from issues WHERE Title=? AND IssueDate=?", (bookid, issuedate)) if match: insert_status = "Have" else: insert_status = "Skipped" else: insert_status = "Wanted" newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": issuedate, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status)) msg = 'Found %i result%s for %s. %i new,' % (total_nzbs, plural(total_nzbs), bookid, new_date) msg += ' %i old, %i fail date, %i fail name,' % (old_date, bad_date, bad_name) msg += ' %i rejected: %i to download' % (rejects, len(maglist)) logger.info(msg) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch, res = TORDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'Magazine') elif magazine['nzbmode'] == 'direct': snatch, res = DirectDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'Magazine') elif magazine['nzbmode'] == 'nzb': snatch, res = NZBDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'Magazine') else: res = 'Unhandled NZBmode [%s] for %s' % (magazine['nzbmode'], magazine["nzburl"]) logger.error(res) snatch = 0 if snatch: logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) custom_notify_snatch("%s %s" % (magazine['bookid'], magazine['nzburl'])) notify_snatch("Magazine %s from %s at %s" % (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now())) scheduleJob(action='Start', target='PostProcessor') else: myDB.action('UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?', (res, magazine["nzburl"])) if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete") except Exception: logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn( 'Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) return myDB = database.DBConnection() myDB.action('drop table if exists stats') myDB.action( 'create table stats ( authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )') new_authors = [] logger.info( 'Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) new_book_count = 0 file_count = 0 if lazylibrarian.FULL_SCAN: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action( 'update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action( 'update books set BookFile="" where BookID="%s"' % bookID) logger.warn( 'Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # for book_type in getList(lazylibrarian.EBOOK_TYPE): # bookName = book['BookName'] # bookAuthor = book['AuthorName'] # Default destination path, should be allowed change per config file. # dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName) # dest_path = authorname+'/'+bookname # global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName) # # encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING) # if os.path.isfile(encoded_book_path): # book_exists = True # if not book_exists: # myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName]) # logger.info('Book %s updated as not found on disk' % # encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') ) if bookAuthor not in new_authors: new_authors.append(bookAuthor) # guess this was meant to save repeat-scans of the same directory # if it contains multiple formats of the same book, but there was no code # that looked at the array. renamed from latest to processed to make # purpose clearer processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 subdirectory = r.replace(dir, '') # Added new code to skip if we've done this directory before. Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same # subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # If metadata.opf exists, use that # else if epub or mobi, read metadata from the book # else have to try pattern match for author/title and look up isbn/lang from LT or GR late match = 0 extn = "" if '.' in files: words = files.split('.') extn = words[len(words) - 1] if formatter.is_valid_booktype(files): logger.debug( "[%s] Now scanning subdirectory %s" % (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace'))) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] else: language = "" if 'identifier' in res: isbn = res['identifier'] else: isbn = "" match = 1 logger.debug( "file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # it's a book, but no external metadata found # if it's an epub or a mobi we can try to read metadata # from it if (extn == "epub") or (extn == "mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] else: language = "" if 'identifier' in res: isbn = res['identifier'] else: isbn = "" logger.debug("book meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) match = 1 else: logger.debug("Book meta incomplete in %s" % book_filename) if not match: match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: processed_subdirectories.append( subdirectory) # flag that we found a book in this subdirectory # # If we have a valid looking isbn, and language != "Unknown", add it to cache # if not language: language = "Unknown" if not formatter.is_valid_isbn(isbn): isbn = "" if isbn != "" and language != "Unknown": logger.debug( "Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug( "Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip() # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data # not sure what this is for, never seems to fail?? if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = common.remove_accents(match_name) match_auth = common.remove_accents(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that match_fuzz = fuzz.ratio(match_auth, match_name) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a # different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.debug( "Adding new author [%s]" % author) if author not in new_authors: new_authors.append(author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already # there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join( r, files).encode( lazylibrarian.SYS_ENCODING) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) new_book_count += 1 cachesize = myDB.action("select count(*) from languages").fetchone() logger.info( "%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) if new_book_count: stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone() if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug( "GoogleBooks was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoogleBooks language was changed %s times" % stats['sum(GB_lang_change)']) if lazylibrarian.BOOK_API == "GoodReads": logger.debug( "GoodReads was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoodReads was hit %s times for languages" % stats['sum(GR_lang_hits)']) logger.debug( "LibraryThing was hit %s times for languages" % stats['sum(LT_lang_hits)']) logger.debug( "Language cache was hit %s times" % stats['sum(cache_hits)']) logger.debug( "Unwanted language removed %s books" % stats['sum(bad_lang)']) logger.debug( "Unwanted characters removed %s books" % stats['sum(bad_char)']) logger.debug( "Unable to cache %s books with missing ISBN" % stats['sum(uncached)']) logger.debug("ISBN Language cache holds %s entries" % cachesize['count(*)']) stats = len( myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if stats: logger.warn( "There are %s books in your library with unknown language" % stats) logger.debug('Updating %i authors' % len(new_authors)) for auth in new_authors: havebooks = len( myDB.select('select BookName from Books where status="%s" and AuthorName="%s"' % ('Open', auth))) myDB.action( 'UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks, auth)) totalbooks = len( myDB.select('select BookName from Books where status!="%s" and AuthorName="%s"' % ('Ignored', auth))) myDB.action( 'UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (totalbooks, auth)) logger.info('Library scan complete')
def magazineScan(): lazylibrarian.MAG_UPDATE = 1 # noinspection PyBroadException try: myDB = database.DBConnection() mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER'] mag_path = mag_path.split('$')[0] if lazylibrarian.CONFIG['MAG_RELATIVE']: if mag_path[0] not in '._': mag_path = '_' + mag_path mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path) mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.CONFIG['FULL_SCAN']: mags = myDB.select('select * from Issues') # check all the issues are still there, delete entry if not for mag in mags: title = mag['Title'] issuedate = mag['IssueDate'] issuefile = mag['IssueFile'] if issuefile and not os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile=?', (issuefile, )) logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "LatestCover": None, "IssueStatus": "Skipped" # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug('Magazine %s details reset' % title) mags = myDB.select('SELECT * from magazines') # now check the magazine titles and delete any with no issues for mag in mags: title = mag['Title'] count = myDB.select( 'SELECT COUNT(Title) as counter FROM issues WHERE Title=?', (title, )) issues = count[0]['counter'] if not issues: logger.debug('Magazine %s deleted as no issues found' % title) myDB.action('DELETE from magazines WHERE Title=?', (title, )) logger.info(' Checking [%s] for magazines' % mag_path) matchString = '' for char in lazylibrarian.CONFIG['MAG_DEST_FILE']: matchString = matchString + '\\' + char # massage the MAG_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type match = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']' title_pattern = re.compile(match, re.VERBOSE) match = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']' date_pattern = re.compile(match, re.VERBOSE) # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file # to utf-8 and fails (eg scandinavian characters in ascii 8bit) for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)): rootdir = makeUnicode(rootdir) filenames = [makeUnicode(item) for item in filenames] for fname in filenames: # maybe not all magazines will be pdf? if is_valid_booktype(fname, booktype='mag'): issuedate = '' # noinspection PyBroadException try: match = title_pattern.match(fname) if match: issuedate = match.group("issuedate") title = match.group("title") match = True else: match = False except Exception: match = False if not match: try: match = date_pattern.match(fname) if match: issuedate = match.group("issuedate") title = os.path.basename(rootdir) else: logger.debug("Pattern match failed for [%s]" % fname) continue except Exception as e: logger.debug("Invalid name format for [%s] %s %s" % (fname, type(e).__name__, str(e))) continue logger.debug("Found %s Issue %s" % (title, fname)) issuefile = os.path.join(rootdir, fname) # full path to issue.pdf mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat( datetime.date.fromtimestamp(mtime)) controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.match( 'SELECT LastAcquired, IssueDate, MagazineAdded from magazines WHERE Title=?', (title, )) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Reject": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "LatestCover": None, "IssueDate": None, "IssueStatus": "Skipped", "Regex": None } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) magissuedate = None magazineadded = None else: maglastacquired = mag_entry['LastAcquired'] magissuedate = mag_entry['IssueDate'] magazineadded = mag_entry['MagazineAdded'] magissuedate = str(magissuedate).zfill(4) issuedate = str(issuedate).zfill( 4) # for sorting issue numbers # is this issue already in the database? controlValueDict = {"Title": title, "IssueDate": issuedate} issue_id = create_id("%s %s" % (title, issuedate)) iss_entry = myDB.match( 'SELECT Title from issues WHERE Title=? and IssueDate=?', (title, issuedate)) if not iss_entry: newValueDict = { "IssueAcquired": iss_acquired, "IssueID": issue_id, "IssueFile": issuefile } myDB.upsert("Issues", newValueDict, controlValueDict) logger.debug("Adding issue %s %s" % (title, issuedate)) create_cover(issuefile) lazylibrarian.postprocess.processMAGOPF( issuefile, title, issuedate, issue_id) # see if this issues date values are useful controlValueDict = {"Title": title} if not mag_entry: # new magazine, this is the only issue newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "LatestCover": os.path.splitext(issuefile)[0] + '.jpg', "IssueDate": issuedate, "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) else: # Set magazine_issuedate to issuedate of most recent issue we have # Set latestcover to most recent issue cover # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps newValueDict = {"IssueStatus": "Open"} if not magazineadded or iss_acquired < magazineadded: newValueDict["MagazineAdded"] = iss_acquired if not maglastacquired or iss_acquired > maglastacquired: newValueDict["LastAcquired"] = iss_acquired if not magissuedate or issuedate >= magissuedate: newValueDict["IssueDate"] = issuedate newValueDict["LatestCover"] = os.path.splitext( issuefile)[0] + '.jpg' myDB.upsert("magazines", newValueDict, controlValueDict) magcount = myDB.match("select count(*) from magazines") isscount = myDB.match("select count(*) from issues") logger.info("Magazine scan complete, found %s magazine%s, %s issue%s" % (magcount['count(*)'], plural(magcount['count(*)']), isscount['count(*)'], plural(isscount['count(*)']))) lazylibrarian.MAG_UPDATE = 0 except Exception: lazylibrarian.MAG_UPDATE = 0 logger.error('Unhandled exception in magazineScan: %s' % traceback.format_exc())
def find_book(self, bookid=None, bookstatus="None"): myDB = database.DBConnection() if not lazylibrarian.CONFIG['GB_API']: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API'] jsonresults, in_cache = gb_json_request(URL) if jsonresults is None: logger.debug('No results found for %s' % bookid) return if not bookstatus: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] book = bookdict(jsonresults) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace if not book['author']: logger.debug('Book %s does not contain author field, skipping' % bookname) return # warn if language is in ignore list, but user said they wanted this book valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if book['lang'] not in valid_langs and 'All' not in valid_langs: logger.debug('Book %s googlebooks language does not match preference, %s' % (bookname, book['lang'])) if lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date'] or book['date'] == '0000': logger.warn('Book %s Publication date does not match preference, %s' % (bookname, book['date'])) if lazylibrarian.CONFIG['NO_FUTURE']: if book['date'] > today()[:4]: logger.warn('Book %s Future publication date does not match preference, %s' % (bookname, book['date'])) authorname = book['author'] GR = GoodReads(authorname) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?', (AuthorID,)) if not match: match = myDB.match('SELECT AuthorID from authors WHERE AuthorName=?', (author['authorname'],)) if match: logger.debug('%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match['AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author with newauthor status # User hit "add book" button from a search or a wishlist import newauthor_status = 'Active' if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in ['Skipped', 'Ignored']: newauthor_status = 'Paused' controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": newauthor_status } authorname = author['authorname'] myDB.upsert("authors", newValueDict, controlValueDict) if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']: self.get_author_books(AuthorID, entrystatus=lazylibrarian.CONFIG['NEWAUTHOR_STATUS']) else: logger.warn("No AuthorID for %s, unable to add book %s" % (book['author'], bookname)) return controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": book['lang'], "Status": bookstatus, "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'], "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s by %s added to the books database" % (bookname, authorname)) if 'nocover' in book['img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif book['img'] and book['img'].startswith('http'): link, success, _ = cache_img("book", bookid, book['img']) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug('Updated series: %s [%s]' % (bookid, serieslist)) setSeries(serieslist, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn( 'Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) return myDB = database.DBConnection() myDB.action('drop table if exists stats') myDB.action( 'create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \ GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )') logger.info( 'Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) new_book_count = 0 file_count = 0 if lazylibrarian.FULL_SCAN: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode('utf-8') subdirectory = r.replace(dir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if formatter.is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (dir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" words = files.split('.') extn = words[len(words) - 1] # if it's an epub or a mobi we can try to read metadata from it if (extn == "epub") or (extn == "mobi"): book_filename = os.path.join( r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING)) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] logger.debug( "file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and formatter.is_valid_isbn(isbn): logger.debug( "Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug( "Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip() # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = common.remove_accents(match_name) match_auth = common.remove_accents(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.debug( "Adding new author [%s]" % author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) new_book_count += 1 cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone() logger.info( "%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone() if stats['sum(GR_book_hits)'] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug( "GoogleBooks was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoogleBooks language was changed %s times" % stats['sum(GB_lang_change)']) if lazylibrarian.BOOK_API == "GoodReads": logger.debug( "GoodReads was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoodReads was hit %s times for languages" % stats['sum(GR_lang_hits)']) logger.debug( "LibraryThing was hit %s times for languages" % stats['sum(LT_lang_hits)']) logger.debug( "Language cache was hit %s times" % stats['sum(cache_hits)']) logger.debug( "Unwanted language removed %s books" % stats['sum(bad_lang)']) logger.debug( "Unwanted characters removed %s books" % stats['sum(bad_char)']) logger.debug( "Unable to cache %s books with missing ISBN" % stats['sum(uncached)']) logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS)) logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if stats: logger.warn("There are %s books in your library with unknown language" % stats) authors = myDB.select('select AuthorName from authors') # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr logger.debug('Updating bookcounts for %i authors' % len(authors)) for author in authors: name = author['AuthorName'] havebooks = myDB.action( 'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % name).fetchone() myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks['counter'], name)) totalbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s"' % name).fetchone() myDB.action('UPDATE authors set TotalBooks="%s" where AuthorName="%s"' % (totalbooks['counter'], name)) unignoredbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % name).fetchone() myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (unignoredbooks['counter'], name)) covers = myDB.action("select count('bookimg') as counter from books where bookimg like 'http%'").fetchone() logger.info("Caching covers for %s books" % covers['counter']) images = myDB.action('select bookid, bookimg, bookname from books where bookimg like "http%"') for item in images: bookid = item['bookid'] bookimg = item['bookimg'] bookname = item['bookname'] newimg = bookwork.cache_cover(bookid, bookimg) if newimg != bookimg: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) logger.info('Library scan complete')
try: os.makedirs(dest_path) except Exception, e: logger.debug(str(e)) if lazylibrarian.DESTINATION_COPY == 1: shutil.copyfile(os.path.join(pp_path, file3), os.path.join(dest_path, file3)) else: shutil.move(os.path.join(pp_path, file3), os.path.join(dest_path, file3)) else: shutil.move(pp_path, dest_path) logger.debug('Successfully moved %s to %s.' % (pp_path, dest_path)) pp = True # try and rename the actual book file & remove non-book files booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for file2 in os.listdir(dest_path): #logger.debug('file extension: ' + str(file2).split('.')[-1]) if ((file2.lower().find(".jpg") <= 0) & (file2.lower().find(".opf") <= 0)): if ((str(file2).split('.')[-1]) not in booktype_list): logger.debug('Removing unwanted file: %s' % str(file2)) os.remove(os.path.join(dest_path, file2)) else: logger.debug('Moving %s to directory %s' % (file2, dest_path)) os.rename(os.path.join(dest_path, file2), os.path.join(dest_path, global_name + '.' + str(file2).split('.')[-1])) try: os.chmod(dest_path, 0777) except Exception, e: logger.debug("Could not chmod path: " + str(dest_path)) except OSError, e: logger.error('Could not create destination folder or rename the downloaded ebook. Check permissions of: ' + lazylibrarian.DESTINATION_DIR)
def magazineScan(thread=None): # rename this thread if thread is None: threading.currentThread().name = "MAGAZINESCAN" myDB = database.DBConnection() mag_path = lazylibrarian.MAG_DEST_FOLDER if '$' in mag_path: mag_path = mag_path.split('$')[0] if lazylibrarian.MAG_RELATIVE: if mag_path[0] not in '._': mag_path = '_' + mag_path mag_path = os.path.join(lazylibrarian.DESTINATION_DIR, mag_path).encode(lazylibrarian.SYS_ENCODING) else: mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.FULL_SCAN: mags = myDB.select('select * from Issues') # check all the issues are still there, delete entry if not for mag in mags: title = mag['Title'] issuedate = mag['IssueDate'] issuefile = mag['IssueFile'] if issuefile and not os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile="%s"' % issuefile) logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "IssueStatus": "Skipped" # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug('Magazine %s details reset' % title) mags = myDB.select('SELECT * from magazines') # now check the magazine titles and delete any with no issues for mag in mags: title = mag['Title'] count = myDB.select('SELECT COUNT(Title) as counter FROM issues WHERE Title="%s"' % title) issues = count[0]['counter'] if not issues: logger.debug('Magazine %s deleted as no issues found' % title) myDB.action('DELETE from magazines WHERE Title="%s"' % title) logger.info(' Checking [%s] for magazines' % mag_path) matchString = '' for char in lazylibrarian.MAG_DEST_FILE: matchString = matchString + '\\' + char # massage the MAG_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = formatter.getList(lazylibrarian.MAG_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for dirname, dirnames, filenames in os.walk(mag_path): for fname in filenames[:]: # maybe not all magazines will be pdf? if formatter.is_valid_booktype(fname, booktype='mag'): try: match = pattern.match(fname) if match: issuedate = match.group("issuedate") title = match.group("title") # print issuedate # print title else: logger.debug("Pattern match failed for [%s]" % fname) continue # title = fname.split('-')[3] # title = title.split('.')[-2] # title = title.strip() # issuedate = fname.split(' ')[0] except: logger.debug("Invalid name format for [%s]" % fname) continue logger.debug("Found Issue %s" % fname) issuefile = os.path.join(dirname, fname) # full path to issue.pdf mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat(datetime.date.fromtimestamp(mtime)) # magazines : Title, Frequency, Regex, Status, MagazineAdded, LastAcquired, IssueDate, IssueStatus # issues : Title, IssueAcquired, IssueDate, IssueFile controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.select('SELECT * from magazines WHERE Title="%s"' % title) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Frequency": None, # unused currently "Regex": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "IssueDate": None, "IssueStatus": "Skipped" } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) lastacquired = None magissuedate = None magazineadded = None else: maglastacquired = mag_entry[0]['LastAcquired'] magissuedate = mag_entry[0]['IssueDate'] magazineadded = mag_entry[0]['MagazineAdded'] # is this issue already in the database? controlValueDict = {"Title": title, "IssueDate": issuedate} issue_id = create_id("%s %s" % (title, issuedate)) iss_entry = myDB.select('SELECT * from issues WHERE Title="%s" and IssueDate="%s"' % ( title, issuedate)) if not iss_entry: newValueDict = { "IssueAcquired": iss_acquired, "IssueID": issue_id, "IssueFile": issuefile } logger.debug("Adding issue %s %s" % (title, issuedate)) else: # don't really need to do this each time newValueDict = {"IssueID": issue_id} myDB.upsert("Issues", newValueDict, controlValueDict) create_cover(issuefile) # see if this issues date values are useful # if its a new magazine, magazineadded,magissuedate,lastacquired are all None # if magazineadded is NOT None, but the others are, we've deleted one or more issues # so the most recent dates may be wrong and need to be updated. # Set magazine_issuedate to issuedate of most recent issue we have # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps if magazineadded is None: # new magazine, this might be the only issue controlValueDict = {"Title": title} newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "IssueDate": issuedate, "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) else: if iss_acquired < magazineadded: controlValueDict = {"Title": title} newValueDict = {"MagazineAdded": iss_acquired} myDB.upsert("magazines", newValueDict, controlValueDict) if maglastacquired is None or iss_acquired > maglastacquired: controlValueDict = {"Title": title} newValueDict = {"LastAcquired": iss_acquired} myDB.upsert("magazines", newValueDict, controlValueDict) if magissuedate is None or issuedate > magissuedate: controlValueDict = {"Title": title} newValueDict = {"IssueDate": issuedate} myDB.upsert("magazines", newValueDict, controlValueDict) magcount = myDB.action("select count(*) from magazines").fetchone() isscount = myDB.action("select count(*) from issues").fetchone() logger.info("Magazine scan complete, found %s magazines, %s issues" % (magcount['count(*)'], isscount['count(*)']))
def findBestResult(resultlist, book, searchtype, source): """ resultlist: collated results from search providers book: the book we want to find searchtype: book, magazine, shortbook, audiobook etc. source: nzb, tor, rss, direct return: highest scoring match, or None if no match """ # noinspection PyBroadException try: myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '.', ';': '', '\'': ''} if source == 'rss': author, title = get_searchterm(book, searchtype) else: author = unaccented_str(replace_all(book['authorName'], dic)) title = unaccented_str(replace_all(book['bookName'], dic)) if book['library'] == 'AudioBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO']) maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0) minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0) auxinfo = 'AudioBook' else: # elif book['library'] == 'eBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS']) maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0) minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0) auxinfo = 'eBook' if source == 'nzb': prefix = 'nzb' else: # rss and libgen return same names as torrents prefix = 'tor_' logger.debug('Searching %s %s results for best %s match' % (len(resultlist), source, auxinfo)) matches = [] for res in resultlist: resultTitle = unaccented_str(replace_all(res[prefix + 'title'], dictrepl)).strip() resultTitle = re.sub(r"\s\s+", " ", resultTitle) # remove extra whitespace Author_match = fuzz.token_set_ratio(author, resultTitle) Book_match = fuzz.token_set_ratio(title, resultTitle) if lazylibrarian.LOGLEVEL & lazylibrarian.log_fuzz: logger.debug("%s author/book Match: %s/%s %s at %s" % (source.upper(), Author_match, Book_match, resultTitle, res[prefix + 'prov'])) rejected = False url = res[prefix + 'url'] if url is None: rejected = True logger.debug("Rejecting %s, no URL found" % resultTitle) if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']: already_failed = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (url,)) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov'])) rejected = True if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']: already_failed = myDB.match('SELECT * from wanted WHERE NZBurl=?', (url,)) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov'])) rejected = True if not rejected and not url.startswith('http') and not url.startswith('magnet'): rejected = True logger.debug("Rejecting %s, invalid URL [%s]" % (resultTitle, url)) if not rejected: for word in reject_list: if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \ and word not in getList(title.lower()): rejected = True logger.debug("Rejecting %s, contains %s" % (resultTitle, word)) break size_temp = check_int(res[prefix + 'size'], 1000) # Need to cater for when this is NONE (Issue 35) size = round(float(size_temp) / 1048576, 2) if not rejected and maxsize and size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % resultTitle) if not rejected and minsize and size < minsize: rejected = True logger.debug("Rejecting %s, too small" % resultTitle) if not rejected: bookid = book['bookid'] # newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() # newTitle = resultTitle + ' LL.(' + book['bookid'] + ')' if source == 'nzb': mode = res['nzbmode'] # nzb, torznab else: mode = res['tor_type'] # torrent, magnet, nzb(from rss), direct controlValueDict = {"NZBurl": url} newValueDict = { "NZBprov": res[prefix + 'prov'], "BookID": bookid, "NZBdate": now(), # when we asked for it "NZBsize": size, "NZBtitle": resultTitle, "NZBmode": mode, "AuxInfo": auxinfo, "Status": "Skipped" } score = (Book_match + Author_match) / 2 # as a percentage # lose a point for each unwanted word in the title so we get the closest match # but for RSS ignore anything at the end in square braces [keywords, genres etc] if source == 'rss': wordlist = getList(resultTitle.rsplit('[', 1)[0].lower()) else: wordlist = getList(resultTitle.lower()) words = [x for x in wordlist if x not in getList(author.lower())] words = [x for x in words if x not in getList(title.lower())] typelist = '' if newValueDict['AuxInfo'] == 'eBook': words = [x for x in words if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE'])] typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) elif newValueDict['AuxInfo'] == 'AudioBook': words = [x for x in words if x not in getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])] typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE']) score -= len(words) # prioritise titles that include the ebook types we want # add more points for booktypes nearer the left in the list # eg if epub, mobi, pdf add 3 points if epub found, 2 for mobi, 1 for pdf booktypes = [x for x in wordlist if x in typelist] if booktypes: typelist = list(reversed(typelist)) for item in booktypes: for i in [i for i, x in enumerate(typelist) if x == item]: score += i + 1 matches.append([score, newValueDict, controlValueDict, res['priority']]) if matches: highest = max(matches, key=lambda s: (s[0], s[3])) score = highest[0] newValueDict = highest[1] # controlValueDict = highest[2] dlpriority = highest[3] if score < int(lazylibrarian.CONFIG['MATCH_RATIO']): logger.info('Nearest match (%s%%): %s using %s search for %s %s' % (score, newValueDict['NZBtitle'], searchtype, book['authorName'], book['bookName'])) else: logger.info('Best match (%s%%): %s using %s search, %s priority %s' % (score, newValueDict['NZBtitle'], searchtype, newValueDict['NZBprov'], dlpriority)) return highest else: logger.debug("No %s found for [%s] using searchtype %s" % (source, book["searchterm"], searchtype)) return None except Exception: logger.error('Unhandled exception in findBestResult: %s' % traceback.format_exc())
def grsync(status, shelf): # noinspection PyBroadException try: shelf = shelf.lower() logger.info('Syncing %s to %s shelf' % (status, shelf)) myDB = database.DBConnection() cmd = 'select bookid from books where status="%s"' % status if status == 'Open': cmd += ' or status="Have"' results = myDB.select(cmd) ll_list = [] for terms in results: ll_list.append(terms['bookid']) GA = grauth() GR = None shelves = GA.get_shelf_list() found = False for item in shelves: # type: dict if item['name'] == shelf: found = True break if not found: res, msg = GA.create_shelf(shelf=shelf) if not res: logger.debug("Unable to create shelf %s: %s" % (shelf, msg)) return 0, 0 else: logger.debug("Created new goodreads shelf: %s" % shelf) gr_shelf = GA.get_gr_shelf_contents(shelf=shelf) dstatus = status if dstatus == "Open": dstatus += "/Have" logger.info("There are %s %s books, %s books on goodreads %s shelf" % (len(ll_list), dstatus, len(gr_shelf), shelf)) # Sync method for WANTED: # Get results of last_sync (if any) # For each book in last_sync # if not in ll_list, new deletion, remove from gr_shelf # if not in gr_shelf, new deletion, remove from ll_list, mark Skipped # For each book in ll_list # if not in last_sync, new addition, add to gr_shelf # For each book in gr_shelf # if not in last sync, new addition, add to ll_list, mark Wanted # # save ll WANTED as last_sync # For HAVE/OPEN method is the same, but only change status if HAVE, not OPEN cmd = 'select SyncList from sync where UserID="%s" and Label="%s"' % ( "goodreads", shelf) res = myDB.match(cmd) last_sync = [] shelf_changed = 0 ll_changed = 0 if res: last_sync = getList(res['SyncList']) added_to_shelf = list(set(gr_shelf) - set(last_sync)) removed_from_shelf = list(set(last_sync) - set(gr_shelf)) added_to_ll = list(set(ll_list) - set(last_sync)) removed_from_ll = list(set(last_sync) - set(ll_list)) logger.info("%s missing from lazylibrarian %s" % (len(removed_from_ll), shelf)) for book in removed_from_ll: # first the deletions since last sync... try: res, content = GA.BookToList(book, shelf, action='remove') except Exception as e: logger.debug("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s removed from %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content)) logger.info("%s missing from goodreads %s" % (len(removed_from_shelf), shelf)) for book in removed_from_shelf: # deleted from goodreads cmd = 'select Status from books where BookID="%s"' % book res = myDB.match(cmd) if not res: logger.debug('Adding new book %s to database' % book) if not GR: GR = GoodReads(book) GR.find_book(book) res = myDB.match(cmd) if not res: logger.warn('Book %s not found in database' % book) else: if res['Status'] in ['Have', 'Wanted']: myDB.action( 'UPDATE books SET Status="Skipped" WHERE BookID=?', (book, )) ll_changed += 1 logger.debug("%10s set to Skipped" % book) else: logger.warn("Not removing %s, book is marked %s" % (book, res['Status'])) # new additions to lazylibrarian logger.info("%s new in lazylibrarian %s" % (len(added_to_ll), shelf)) for book in added_to_ll: try: res, content = GA.BookToList(book, shelf, action='add') except Exception as e: logger.debug("Error adding %s to %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s added to %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to add %s to %s shelf: %s" % (book, shelf, content)) # new additions to goodreads shelf logger.info("%s new in goodreads %s" % (len(added_to_shelf), shelf)) for book in added_to_shelf: cmd = 'select Status from books where BookID="%s"' % book res = myDB.match(cmd) if not res: logger.debug('Adding new book %s to database' % book) if not GR: GR = GoodReads(book) GR.find_book(book) res = myDB.match(cmd) if not res: logger.warn('Book %s not found in database' % book) else: if status == 'Open': if res['Status'] == 'Open': logger.warn("Book %s is already marked Open" % book) else: myDB.action( 'UPDATE books SET Status="Have" WHERE BookID=?', (book, )) ll_changed += 1 logger.debug("%10s set to Have" % book) elif status == 'Wanted': # if in "wanted" and already marked "Open/Have", optionally delete from "wanted" # (depending on user prefs, to-read and wanted might not be the same thing) if lazylibrarian.CONFIG['GR_UNIQUE'] and res['Status'] in [ 'Open', 'Have' ]: try: res, content = GA.BookToList(book, shelf, action='remove') except Exception as e: logger.debug( "Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s removed from %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn( "Failed to remove %s from %s shelf: %s" % (book, shelf, content)) elif res['Status'] != 'Open': myDB.action( 'UPDATE books SET Status="Wanted" WHERE BookID=?', (book, )) ll_changed += 1 logger.debug("%10s set to Wanted" % book) else: logger.warn( "Not setting %s as Wanted, already marked Open" % book) # get new definitive list from ll cmd = 'select bookid from books where status="%s"' % status if status == 'Open': cmd += ' or status="Have"' results = myDB.select(cmd) ll_list = [] for terms in results: ll_list.append(terms['bookid']) # store as comparison for next sync controlValueDict = {"UserID": "goodreads", "Label": shelf} newValueDict = { "Date": str(time.time()), "Synclist": ', '.join(ll_list) } myDB.upsert("sync", newValueDict, controlValueDict) logger.debug('Sync %s to %s shelf complete' % (status, shelf)) return shelf_changed, ll_changed except Exception: logger.error('Unhandled exception in grsync: %s' % traceback.format_exc()) return 0, 0
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if mags is None: threading.currentThread().name = "SEARCHALLMAG" else: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if mags is None: # backlog search searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \ IssueDate from magazines WHERE Status="Active"' ) else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select( 'SELECT Title, Regex, LastAcquired, IssueDate from magazines \ WHERE Title=? AND Status="Active"', (magazine['bookid'], )) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: threading.currentThread().name = "WEBSERVER" return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored logger.debug("Removing old magazine search results") myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Regex'] if not searchterm: dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '' } # strip accents from the magazine title for easier name-matching searchterm = unaccented_str(searchmag['Title']) if not searchterm: # unless there are no ascii characters left searchterm = searchmag['Title'] searchterm = replace_all(searchterm, dic) searchterm = re.sub('[.\-/]', ' ', searchterm) searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm}) if not searchlist: logger.warn( 'There is nothing to search for. Mark some magazines as active.' ) for book in searchlist: resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow if lazylibrarian.USE_DIRECT(): dir_resultlist, nproviders = IterateOverDirectSites( book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn( 'No direct providers are available. Check config and blocklist' ) lazylibrarian.NO_DIRECT_MSG = timenow if dir_resultlist: for item in dir_resultlist: # reformat the results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites( book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn( 'No tor providers are available. Check config and blocklist' ) lazylibrarian.NO_TOR_MSG = timenow if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders = IterateOverRSSSites() if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow: logger.warn( 'No rss providers are available. Check config and blocklist' ) lazylibrarian.NO_RSS_MSG = timenow if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item[ 'tor_date'], # may be fake date as none returned from rss torrents, only rss nzb 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("No results for magazine %s" % book['searchterm']) else: bad_name = 0 bad_date = 0 old_date = 0 rejects = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] bookid = '' for nzb in resultlist: total_nzbs += 1 bookid = nzb['bookid'] # strip accents from the magazine title for easier name-matching nzbtitle = unaccented_str(nzb['nzbtitle']) if not nzbtitle: # unless it's not a latin-1 encodable name nzbtitle = nzb['nzbtitle'] nzbtitle = nzbtitle.replace('"', '').replace( "'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize_temp = check_int( nzbsize_temp, 1000 ) # not all torrents returned by torznab have a size nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] results = myDB.match( 'SELECT * from magazines WHERE Title=?', (bookid, )) if not results: logger.debug( 'Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_name += 1 else: rejected = False maxsize = check_int( lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0) if maxsize and nzbsize > maxsize: logger.debug("Rejecting %s, too large" % nzbtitle) rejected = True if not rejected: minsize = check_int( lazylibrarian.CONFIG['REJECT_MAGMIN'], 0) if minsize and nzbsize < minsize: logger.debug("Rejecting %s, too small" % nzbtitle) rejected = True if not rejected: dic = { '.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '' } nzbtitle_formatted = replace_all(nzbtitle, dic).strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) # remove extra spaces if they're in a row if nzbtitle_formatted and nzbtitle_formatted[ 0] == '[' and nzbtitle_formatted[-1] == ']': nzbtitle_formatted = nzbtitle_formatted[1:-1] nzbtitle_exploded_temp = " ".join( nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split( ' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb has magazine title and a date/issue nr # eg The MagPI July 2015 if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check all the words in the mag title are in the nzbtitle rejected = False wlist = [] for word in nzbtitle_exploded: wlist.append(unaccented(word).lower()) for word in bookid_exploded: if unaccented(word).lower() not in wlist: rejected = True break if rejected: logger.debug( u"Magazine title match failed " + bookid + " for " + nzbtitle_formatted) else: logger.debug(u"Magazine matched " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded)) rejected = True if not rejected: blocked = myDB.match( 'SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl, )) if blocked: logger.debug( "Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected: reject_list = getList( str(results['Reject']).lower()) reject_list += getList( lazylibrarian.CONFIG['REJECT_MAGS']) lower_title = unaccented( nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() if reject_list: if lazylibrarian.LOGLEVEL > 2: logger.debug('Reject: %s' % str(reject_list)) logger.debug('Title: %s' % lower_title) logger.debug('Bookid: %s' % lower_bookid) for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break regex_pass = 0 if not rejected: # Magazine names have many different styles of date # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY # MonthName DD YYYY or MonthName DD, YYYY # YYYY MM or YYYY MM DD # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn # nn YYYY issue number without "Nr" before it # issue and year as a single 6 digit string eg 222015 newdatish = "none" # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and pos: month = month2num(nzbtitle_exploded[pos - 1]) if month: if pos - 1: day = check_int( nzbtitle_exploded[pos - 2], 1) if day > 31: # probably issue number nn day = 1 else: day = 1 newdatish = "%04d-%02d-%02d" % ( year, month, day) try: _ = datetime.date(year, month, day) regex_pass = 1 break except ValueError: regex_pass = 0 pos += 1 # MonthName DD YYYY or MonthName DD, YYYY if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and (pos - 1): month = month2num( nzbtitle_exploded[pos - 2]) if month: day = check_int( nzbtitle_exploded[ pos - 1].rstrip(','), 1) try: _ = datetime.date( year, month, day) newdatish = "%04d-%02d-%02d" % ( year, month, day) regex_pass = 2 break except ValueError: regex_pass = 0 pos += 1 # YYYY MM or YYYY MM DD if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and pos + 1 < len( nzbtitle_exploded): month = check_int( nzbtitle_exploded[pos + 1], 0) if month: if pos + 2 < len( nzbtitle_exploded): day = check_int( nzbtitle_exploded[pos + 2], 1) else: day = 1 try: _ = datetime.date( year, month, day) newdatish = "%04d-%02d-%02d" % ( year, month, day) regex_pass = 3 break except ValueError: regex_pass = 0 pos += 1 # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): if nzbtitle_exploded[pos].lower() in [ "issue", "no", "nr", "vol" ]: if pos + 1 < len(nzbtitle_exploded): issue = check_int( nzbtitle_exploded[pos + 1], 0) if issue: newdatish = str( issue) # 4 == 04 == 004 if pos + 2 < len( nzbtitle_exploded): year = check_year( nzbtitle_exploded[pos + 2]) if year and year < int( datetime.date. today().year): newdatish = '0' # it's old regex_pass = 4 # Issue/No/Nr/Vol nn, YYYY else: regex_pass = 5 # Issue/No/Nr/Vol nn break pos += 1 # nn YYYY issue number without "Nr" before it if not regex_pass: pos = 1 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year: issue = check_int( nzbtitle_exploded[pos - 1], 0) if issue: newdatish = str( issue) # 4 == 04 == 004 regex_pass = 6 if year < int(datetime.date.today( ).year): newdatish = '0' # it's old break pos += 1 # issue and year as a single 6 digit string eg 222015 if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): issue = nzbtitle_exploded[pos] if issue.isdigit() and len(issue) == 6: year = int(issue[2:]) issue = int(issue[:2]) newdatish = str( issue) # 4 == 04 == 004 regex_pass = 7 if year < int( datetime.date.today().year): newdatish = '0' # it's old break pos += 1 if not regex_pass: logger.debug( 'Magazine %s not in a recognised date format.' % nzbtitle_formatted) bad_date += 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues regex_pass = 99 if rejected: rejects += 1 else: if lazylibrarian.LOGLEVEL > 2: logger.debug("regex %s [%s] %s" % (regex_pass, nzbtitle_formatted, newdatish)) # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" insert_table = "pastissues" insert_status = "Skipped" control_date = results['IssueDate'] if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # or for magazines with only an issue number, use zero if str(newdatish).isdigit(): logger.debug( 'Magazine comparing issue numbers (%s)' % newdatish) control_date = 0 elif re.match('\d+-\d\d-\d\d', str(newdatish)): start_time = time.time() start_time -= int( lazylibrarian.CONFIG['MAG_AGE'] ) * 24 * 60 * 60 # number of seconds in days if start_time < 0: # limit of unixtime (1st Jan 1970) start_time = 0 control_date = time.strftime( "%Y-%m-%d", time.localtime(start_time)) logger.debug( 'Magazine date comparing to %s' % control_date) else: logger.debug( 'Magazine unable to find comparison type [%s]' % newdatish) control_date = 0 if str(control_date).isdigit() and str( newdatish).isdigit(): # for issue numbers, check if later than last one we have comp_date = int(newdatish) - int(control_date) newdatish = "%s" % newdatish newdatish = newdatish.zfill( 4) # pad so we sort correctly elif re.match('\d+-\d\d-\d\d', str(control_date)) and \ re.match('\d+-\d\d-\d\d', str(newdatish)): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare( newdatish, control_date) else: # invalid comparison of date and issue number if re.match('\d+-\d\d-\d\d', str(control_date)): logger.debug( 'Magazine %s failed: Expecting a date' % nzbtitle_formatted) else: logger.debug( 'Magazine %s failed: Expecting issue number' % nzbtitle_formatted) bad_date += 1 newdatish = "1970-01-01" # this is our fake date for ones we can't decipher comp_date = 0 if comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date += 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug( 'This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) logger.debug('Magazine request number %s' % len(issues)) if lazylibrarian.LOGLEVEL > 2: logger.debug(str(issues)) insert_table = "wanted" insert_status = "Wanted" nzbdate = now() # when we asked for it else: logger.debug( 'This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug( 'This issue of %s is old; skipping.' % nzbtitle_formatted) old_date += 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.match( 'SELECT * from %s WHERE NZBtitle=? and NZBprov=?' % insert_table, (nzbtitle, nzbprov)) if mag_entry: if lazylibrarian.LOGLEVEL > 2: logger.debug( '%s is already in %s marked %s' % (nzbtitle, insert_table, insert_status)) else: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": newdatish, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) if lazylibrarian.LOGLEVEL > 2: logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status)) msg = 'Found %i result%s for %s. %i new,' % ( total_nzbs, plural(total_nzbs), bookid, new_date) msg += ' %i old, %i fail date, %i fail name,' % ( old_date, bad_date, bad_name) msg += ' %i rejected: %i to download' % (rejects, len(maglist)) logger.info(msg) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod(magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') else: snatch = NZBDownloadMethod(magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') if snatch: logger.info( 'Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) notify_snatch("Magazine %s from %s at %s" % (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now())) custom_notify_snatch(magazine['bookid']) scheduleJob(action='Start', target='processDir') if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete") except Exception: logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def TORDownloadMethod(bookid=None, tor_title=None, tor_url=None, library='eBook'): myDB = database.DBConnection() downloadID = False Source = '' full_url = tor_url # keep the url as stored in "wanted" table if tor_url and tor_url.startswith('magnet:?'): torrent = tor_url # allow magnet link to write to blackhole and hash to utorrent/rtorrent elif 'magnet:?' in tor_url: # discard any other parameters and just use the magnet link torrent = 'magnet:?' + tor_url.split('magnet:?')[1] else: # h = HTMLParser() # tor_url = h.unescape(tor_url) # HTMLParser is probably overkill, we only seem to get & # tor_url = tor_url.replace('&', '&') if '&file=' in tor_url: # torznab results need to be re-encoded # had a problem with torznab utf-8 encoded strings not matching # our utf-8 strings because of long/short form differences url, value = tor_url.split('&file=', 1) value = makeUnicode(value) # ensure unicode value = unicodedata.normalize('NFC', value) # normalize to short form value = value.encode('unicode-escape') # then escape the result value = makeUnicode(value) # ensure unicode value = value.replace(' ', '%20') # and encode any spaces tor_url = url + '&file=' + value # strip url back to the .torrent as some sites add extra parameters if not tor_url.endswith('.torrent'): if '.torrent' in tor_url: tor_url = tor_url.split('.torrent')[0] + '.torrent' headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT} proxies = proxyList() try: r = requests.get(tor_url, headers=headers, timeout=90, proxies=proxies) torrent = r.content except requests.exceptions.Timeout: logger.warn('Timeout fetching file from url: %s' % tor_url) return False except Exception as e: # some jackett providers redirect internally using http 301 to a magnet link # which requests can't handle, so throws an exception if "magnet:?" in str(e): torrent = 'magnet:?' + str(e).split('magnet:?')[1]. strip("'") else: if hasattr(e, 'reason'): logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, e.reason)) else: logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, str(e))) return False if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']: Source = "BLACKHOLE" logger.debug("Sending %s to blackhole" % tor_title) tor_name = cleanName(tor_title).replace(' ', '_') if tor_url and tor_url.startswith('magnet'): if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']: hashid = CalcTorrentHash(tor_url) tor_name = 'meta-' + hashid + '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) result = magnet2torrent(tor_url, tor_path) if result is not False: logger.debug('Magnet file saved as: %s' % tor_path) downloadID = Source else: tor_name += '.magnet' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) msg = '' try: msg = 'Opening ' with open(tor_path, 'wb') as torrent_file: msg += 'Writing ' if isinstance(torrent, text_type): torrent = torrent.encode('iso-8859-1') torrent_file.write(torrent) msg += 'SettingPerm ' setperm(tor_path) msg += 'Saved ' logger.debug('Magnet file saved: %s' % tor_path) downloadID = Source except Exception as e: logger.warn("Failed to write magnet to file: %s %s" % (type(e).__name__, str(e))) logger.debug("Progress: %s" % msg) logger.debug("Filename [%s]" % (repr(tor_path))) return False else: tor_name += '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) msg = '' try: msg = 'Opening ' with open(tor_path, 'wb') as torrent_file: msg += 'Writing ' if isinstance(torrent, text_type): torrent = torrent.encode('iso-8859-1') torrent_file.write(torrent) msg += 'SettingPerm ' setperm(tor_path) msg += 'Saved ' logger.debug('Torrent file saved: %s' % tor_name) downloadID = Source except Exception as e: logger.warn("Failed to write torrent to file: %s %s" % (type(e).__name__, str(e))) logger.debug("Progress: %s" % msg) logger.debug("Filename [%s]" % (repr(tor_path))) return False hashid = CalcTorrentHash(torrent) if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG['UTORRENT_HOST']: logger.debug("Sending %s to Utorrent" % tor_title) Source = "UTORRENT" downloadID = utorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = utorrent.nameTorrent(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG['RTORRENT_HOST']: logger.debug("Sending %s to rTorrent" % tor_title) Source = "RTORRENT" downloadID = rtorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = rtorrent.getName(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG['QBITTORRENT_HOST']: logger.debug("Sending %s to qbittorrent" % tor_title) Source = "QBITTORRENT" if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'): status = qbittorrent.addTorrent(torrent, hashid) elif isinstance(torrent, text_type) and torrent.startswith('magnet'): status = qbittorrent.addTorrent(torrent, hashid) else: status = qbittorrent.addTorrent(tor_url, hashid) # returns True or False if status: downloadID = hashid tor_title = qbittorrent.getName(hashid) if lazylibrarian.CONFIG['TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG['TRANSMISSION_HOST']: logger.debug("Sending %s to Transmission" % tor_title) if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug("TORRENT %s [%s] [%s]" % (len(torrent), torrent[:20], torrent[-20:])) Source = "TRANSMISSION" if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'): downloadID = transmission.addTorrent(torrent) # returns id or False elif isinstance(torrent, text_type) and torrent.startswith('magnet'): downloadID = transmission.addTorrent(torrent) elif torrent: downloadID = transmission.addTorrent(None, metainfo=b64encode(torrent)) else: downloadID = transmission.addTorrent(tor_url) # returns id or False if downloadID: # transmission returns it's own int, but we store hashid instead downloadID = hashid tor_title = transmission.getTorrentFolder(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \ lazylibrarian.CONFIG['SYNOLOGY_HOST']: logger.debug("Sending %s to Synology" % tor_title) Source = "SYNOLOGY_TOR" downloadID = synology.addTorrent(tor_url) # returns id or False if downloadID: tor_title = synology.getName(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG['DELUGE_HOST']: logger.debug("Sending %s to Deluge" % tor_title) if not lazylibrarian.CONFIG['DELUGE_USER']: # no username, talk to the webui Source = "DELUGEWEBUI" if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'): downloadID = deluge.addTorrent(torrent) elif isinstance(torrent, text_type) and torrent.startswith('magnet'): downloadID = deluge.addTorrent(torrent) elif torrent: downloadID = deluge.addTorrent(tor_title, data=b64encode(torrent)) else: downloadID = deluge.addTorrent(tor_url) # can be link or magnet, returns hash or False if downloadID: tor_title = deluge.getTorrentFolder(downloadID) else: # have username, talk to the daemon Source = "DELUGERPC" client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'], int(lazylibrarian.CONFIG['DELUGE_PORT']), lazylibrarian.CONFIG['DELUGE_USER'], lazylibrarian.CONFIG['DELUGE_PASS']) try: client.connect() args = {"name": tor_title} if tor_url.startswith('magnet'): downloadID = client.call('core.add_torrent_magnet', tor_url, args) elif isinstance(torrent, binary_type) and torrent.startswith(b'magnet'): downloadID = client.call('core.add_torrent_magnet', torrent, args) elif isinstance(torrent, text_type) and torrent.startswith('magnet'): downloadID = client.call('core.add_torrent_magnet', torrent, args) elif torrent: downloadID = client.call('core.add_torrent_file', tor_title, b64encode(torrent), args) else: downloadID = client.call('core.add_torrent_url', tor_url, args) if downloadID: if lazylibrarian.CONFIG['DELUGE_LABEL']: _ = client.call('label.set_torrent', downloadID, lazylibrarian.CONFIG['DELUGE_LABEL'].lower()) result = client.call('core.get_torrent_status', downloadID, {}) # for item in result: # logger.debug ('Deluge RPC result %s: %s' % (item, result[item])) if 'name' in result: tor_title = result['name'] except Exception as e: logger.error('DelugeRPC failed %s %s' % (type(e).__name__, str(e))) return False if not Source: logger.warn('No torrent download method is enabled, check config.') return False if downloadID: if tor_title: if downloadID.upper() in tor_title.upper(): logger.warn('%s: name contains hash, probably unresolved magnet' % Source) else: tor_title = unaccented_str(tor_title) # need to check against reject words list again as the name may have changed # library = magazine eBook AudioBook to determine which reject list # but we can't easily do the per-magazine rejects if library == 'magazine': reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS']) elif library == 'eBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS']) elif library == 'AudioBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO']) else: logger.debug("Invalid library [%s] in TORDownloadMethod" % library) reject_list = [] rejected = False lower_title = tor_title.lower() for word in reject_list: if word in lower_title: rejected = True logger.debug("Rejecting torrent name %s, contains %s" % (tor_title, word)) break if rejected: myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url,)) delete_task(Source, downloadID, True) return False else: logger.debug('%s setting torrent name to [%s]' % (Source, tor_title)) myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?', (tor_title, full_url)) if library == 'eBook': myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid,)) elif library == 'AudioBook': myDB.action('UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid,)) myDB.action('UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?', (Source, downloadID, full_url)) return True logger.error('Failed to download torrent from %s, %s' % (Source, tor_url)) myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url,)) return False
def magazineScan(title=None): lazylibrarian.MAG_UPDATE = 1 # noinspection PyBroadException try: myDB = database.DBConnection() onetitle = title if onetitle: mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER'].replace('$Title', onetitle) else: mag_path = os.path.dirname(lazylibrarian.CONFIG['MAG_DEST_FOLDER']) if lazylibrarian.CONFIG['MAG_RELATIVE']: mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path) if PY2: mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle: mags = myDB.select('select * from Issues') # check all the issues are still there, delete entry if not for mag in mags: title = mag['Title'] issuedate = mag['IssueDate'] issuefile = mag['IssueFile'] if issuefile and not os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile=?', (issuefile,)) logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "LatestCover": None, "IssueStatus": "Skipped" # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug('Magazine %s details reset' % title) # now check the magazine titles and delete any with no issues if lazylibrarian.CONFIG['MAG_DELFOLDER']: mags = myDB.select('SELECT Title,count(Title) as counter from issues group by Title') for mag in mags: title = mag['Title'] issues = mag['counter'] if not issues: logger.debug('Magazine %s deleted as no issues found' % title) myDB.action('DELETE from magazines WHERE Title=?', (title,)) logger.info(' Checking [%s] for magazines' % mag_path) matchString = '' for char in lazylibrarian.CONFIG['MAG_DEST_FILE']: matchString = matchString + '\\' + char # massage the MAG_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type match = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']' title_pattern = re.compile(match, re.VERBOSE) match = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']' date_pattern = re.compile(match, re.VERBOSE) # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file # to utf-8 and fails (eg scandinavian characters in ascii 8bit) for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)): rootdir = makeUnicode(rootdir) filenames = [makeUnicode(item) for item in filenames] for fname in filenames: # maybe not all magazines will be pdf? if is_valid_booktype(fname, booktype='mag'): issuedate = '' # noinspection PyBroadException try: match = title_pattern.match(fname) if match: title = match.group("title") issuedate = match.group("issuedate") if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Title pattern [%s][%s]" % (title, issuedate)) match = True else: logger.debug("Title pattern match failed for [%s]" % fname) except Exception: match = False if not match: # noinspection PyBroadException try: match = date_pattern.match(fname) if match: issuedate = match.group("issuedate") title = os.path.basename(rootdir) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Date pattern [%s][%s]" % (title, issuedate)) match = True else: logger.debug("Date pattern match failed for [%s]" % fname) except Exception: match = False if not match: title = os.path.basename(rootdir) issuedate = '' dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ', '#': '# '} if issuedate: exploded = replace_all(issuedate, dic).split() regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(exploded) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Date regex [%s][%s][%s]" % (regex_pass, issuedate, year)) if not regex_pass: issuedate = '' if not issuedate: exploded = replace_all(fname, dic).split() regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(exploded) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("File regex [%s][%s][%s]" % (regex_pass, issuedate, year)) if not regex_pass: issuedate = '' if not issuedate: logger.warn("Invalid name format for [%s]" % fname) continue issuefile = os.path.join(rootdir, fname) # full path to issue.pdf mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat(datetime.date.fromtimestamp(mtime)) if lazylibrarian.CONFIG['MAG_RENAME']: filedate = issuedate if issuedate and issuedate.isdigit(): if len(issuedate) == 8: if check_year(issuedate[:4]): filedate = 'Issue %d %s' % (int(issuedate[4:]), issuedate[:4]) else: filedate = 'Vol %d Iss %d' % (int(issuedate[:4]), int(issuedate[4:])) elif len(issuedate) == 12: filedate = 'Vol %d Iss %d %s' % (int(issuedate[4:8]), int(issuedate[8:]), issuedate[:4]) else: filedate = str(issuedate).zfill(4) extn = os.path.splitext(fname)[1] newfname = lazylibrarian.CONFIG['MAG_DEST_FILE'].replace('$Title', title).replace( '$IssueDate', filedate) newfname = newfname + extn if newfname and newfname != fname: logger.debug("Rename %s -> %s" % (fname, newfname)) newissuefile = os.path.join(rootdir, newfname) newissuefile = safe_move(issuefile, newissuefile) if os.path.exists(issuefile.replace(extn, '.jpg')): safe_move(issuefile.replace(extn, '.jpg'), newissuefile.replace(extn, '.jpg')) if os.path.exists(issuefile.replace(extn, '.opf')): safe_move(issuefile.replace(extn, '.opf'), newissuefile.replace(extn, '.opf')) issuefile = newissuefile logger.debug("Found %s Issue %s" % (title, issuedate)) controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.match( 'SELECT LastAcquired,IssueDate,MagazineAdded,CoverPage from magazines WHERE Title=?', (title,)) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Reject": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "LatestCover": None, "IssueDate": None, "IssueStatus": "Skipped", "Regex": None, "CoverPage": 1 } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) magissuedate = None magazineadded = None maglastacquired = None magcoverpage = 1 else: maglastacquired = mag_entry['LastAcquired'] magissuedate = mag_entry['IssueDate'] magazineadded = mag_entry['MagazineAdded'] magissuedate = str(magissuedate).zfill(4) magcoverpage = mag_entry['CoverPage'] issuedate = str(issuedate).zfill(4) # for sorting issue numbers # is this issue already in the database? issue_id = create_id("%s %s" % (title, issuedate)) iss_entry = myDB.match('SELECT Title,IssueFile from issues WHERE Title=? and IssueDate=?', (title, issuedate)) new_entry = False if not iss_entry or iss_entry['IssueFile'] != issuefile: new_entry = True # new entry or name changed if not iss_entry: logger.debug("Adding issue %s %s" % (title, issuedate)) else: logger.debug("Updating issue %s %s" % (title, issuedate)) controlValueDict = {"Title": title, "IssueDate": issuedate} newValueDict = { "IssueAcquired": iss_acquired, "IssueID": issue_id, "IssueFile": issuefile } myDB.upsert("Issues", newValueDict, controlValueDict) ignorefile = os.path.join(os.path.dirname(issuefile), '.ll_ignore') with open(ignorefile, 'a'): os.utime(ignorefile, None) createMagCover(issuefile, pagenum=magcoverpage, refresh=new_entry) lazylibrarian.postprocess.processMAGOPF(issuefile, title, issuedate, issue_id, overwrite=new_entry) # see if this issues date values are useful controlValueDict = {"Title": title} if not mag_entry: # new magazine, this is the only issue newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "LatestCover": os.path.splitext(issuefile)[0] + '.jpg', "IssueDate": issuedate, "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) else: # Set magazine_issuedate to issuedate of most recent issue we have # Set latestcover to most recent issue cover # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps newValueDict = {"IssueStatus": "Open"} if not magazineadded or iss_acquired < magazineadded: newValueDict["MagazineAdded"] = iss_acquired if not maglastacquired or iss_acquired > maglastacquired: newValueDict["LastAcquired"] = iss_acquired if not magissuedate or issuedate >= magissuedate: newValueDict["IssueDate"] = issuedate newValueDict["LatestCover"] = os.path.splitext(issuefile)[0] + '.jpg' myDB.upsert("magazines", newValueDict, controlValueDict) if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle: magcount = myDB.match("select count(*) from magazines") isscount = myDB.match("select count(*) from issues") logger.info("Magazine scan complete, found %s magazine%s, %s issue%s" % (magcount['count(*)'], plural(magcount['count(*)']), isscount['count(*)'], plural(isscount['count(*)']))) else: logger.info("Magazine scan complete") lazylibrarian.MAG_UPDATE = 0 except Exception: lazylibrarian.MAG_UPDATE = 0 logger.error('Unhandled exception in magazineScan: %s' % traceback.format_exc())
def findBestResult(resultlist, book, searchtype, source): """ resultlist: collated results from search providers book: the book we want to find searchtype: book, magazine, shortbook, audiobook etc. source: nzb, tor, rss, direct return: highest scoring match, or None if no match """ # noinspection PyBroadException try: myDB = database.DBConnection() dictrepl = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' ' } dic = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '.', ';': '', '\'': '' } if source == 'rss': author, title = get_searchterm(book, searchtype) else: author = unaccented_str(replace_all(book['authorName'], dic)) title = unaccented_str(replace_all(book['bookName'], dic)) if book['library'] == 'AudioBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO']) maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0) minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0) auxinfo = 'AudioBook' else: # elif book['library'] == 'eBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS']) maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0) minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0) auxinfo = 'eBook' if source == 'nzb': prefix = 'nzb' else: # rss and libgen return same names as torrents prefix = 'tor_' logger.debug('Searching %s %s results for best %s match' % (len(resultlist), source, auxinfo)) matches = [] for res in resultlist: resultTitle = unaccented_str( replace_all(res[prefix + 'title'], dictrepl)).strip() resultTitle = re.sub(r"\s\s+", " ", resultTitle) # remove extra whitespace Author_match = fuzz.token_set_ratio(author, resultTitle) Book_match = fuzz.token_set_ratio(title, resultTitle) if lazylibrarian.LOGLEVEL & lazylibrarian.log_fuzz: logger.debug("%s author/book Match: %s/%s %s at %s" % (source.upper(), Author_match, Book_match, resultTitle, res[prefix + 'prov'])) rejected = False url = res[prefix + 'url'] if url is None: rejected = True logger.debug("Rejecting %s, no URL found" % resultTitle) if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']: already_failed = myDB.match( 'SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (url, )) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov'])) rejected = True if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']: already_failed = myDB.match( 'SELECT * from wanted WHERE NZBurl=?', (url, )) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov'])) rejected = True if not rejected and not url.startswith( 'http') and not url.startswith('magnet'): rejected = True logger.debug("Rejecting %s, invalid URL [%s]" % (resultTitle, url)) if not rejected: for word in reject_list: if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \ and word not in getList(title.lower()): rejected = True logger.debug("Rejecting %s, contains %s" % (resultTitle, word)) break size_temp = check_int( res[prefix + 'size'], 1000) # Need to cater for when this is NONE (Issue 35) size = round(float(size_temp) / 1048576, 2) if not rejected and maxsize and size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % resultTitle) if not rejected and minsize and size < minsize: rejected = True logger.debug("Rejecting %s, too small" % resultTitle) if not rejected: bookid = book['bookid'] # newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() # newTitle = resultTitle + ' LL.(' + book['bookid'] + ')' if source == 'nzb': mode = res['nzbmode'] # nzb, torznab else: mode = res[ 'tor_type'] # torrent, magnet, nzb(from rss), direct controlValueDict = {"NZBurl": url} newValueDict = { "NZBprov": res[prefix + 'prov'], "BookID": bookid, "NZBdate": now(), # when we asked for it "NZBsize": size, "NZBtitle": resultTitle, "NZBmode": mode, "AuxInfo": auxinfo, "Status": "Skipped" } score = (Book_match + Author_match) / 2 # as a percentage # lose a point for each unwanted word in the title so we get the closest match # but for RSS ignore anything at the end in square braces [keywords, genres etc] if source == 'rss': wordlist = getList(resultTitle.rsplit('[', 1)[0].lower()) else: wordlist = getList(resultTitle.lower()) words = [ x for x in wordlist if x not in getList(author.lower()) ] words = [x for x in words if x not in getList(title.lower())] typelist = '' if newValueDict['AuxInfo'] == 'eBook': words = [ x for x in words if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE']) ] typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) elif newValueDict['AuxInfo'] == 'AudioBook': words = [ x for x in words if x not in getList( lazylibrarian.CONFIG['AUDIOBOOK_TYPE']) ] typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE']) score -= len(words) # prioritise titles that include the ebook types we want # add more points for booktypes nearer the left in the list # eg if epub, mobi, pdf add 3 points if epub found, 2 for mobi, 1 for pdf booktypes = [x for x in wordlist if x in typelist] if booktypes: typelist = list(reversed(typelist)) for item in booktypes: for i in [ i for i, x in enumerate(typelist) if x == item ]: score += i + 1 matches.append( [score, newValueDict, controlValueDict, res['priority']]) if matches: highest = max(matches, key=lambda s: (s[0], s[3])) score = highest[0] newValueDict = highest[1] # controlValueDict = highest[2] dlpriority = highest[3] if score < int(lazylibrarian.CONFIG['MATCH_RATIO']): logger.info( 'Nearest match (%s%%): %s using %s search for %s %s' % (score, newValueDict['NZBtitle'], searchtype, book['authorName'], book['bookName'])) else: logger.info( 'Best match (%s%%): %s using %s search, %s priority %s' % (score, newValueDict['NZBtitle'], searchtype, newValueDict['NZBprov'], dlpriority)) return highest else: logger.debug("No %s found for [%s] using searchtype %s" % (source, book["searchterm"], searchtype)) return None except Exception: logger.error('Unhandled exception in findBestResult: %s' % traceback.format_exc())
def find_results(self, searchterm=None, queue=None): """ GoogleBooks performs much better if we search for author OR title not both at once, so if searchterm is not isbn, two searches needed. Lazylibrarian searches use <ll> to separate title from author in searchterm If this token isn't present, it's an isbn or searchterm as supplied by user """ try: myDB = database.DBConnection() resultlist = [] # See if we should check ISBN field, otherwise ignore it api_strings = ['inauthor:', 'intitle:'] if is_valid_isbn(searchterm): api_strings = ['isbn:'] api_hits = 0 ignored = 0 total_count = 0 no_author_count = 0 if ' <ll> ' in searchterm: # special token separates title from author title, authorname = searchterm.split(' <ll> ') else: title = '' authorname = '' fullterm = searchterm.replace(' <ll> ', '') logger.debug('Now searching Google Books API with searchterm: %s' % fullterm) for api_value in api_strings: set_url = self.url if api_value == "isbn:": set_url = set_url + urllib.quote( api_value + searchterm.encode(lazylibrarian.SYS_ENCODING)) elif api_value == 'intitle:': searchterm = fullterm if title: # just search for title title = title.split(' (')[ 0] # with out any series info searchterm = title searchterm = searchterm.replace("'", "").replace( '"', '') # and no quotes searchterm = searchterm.strip() set_url = set_url + \ urllib.quote(api_value + '"' + searchterm.encode(lazylibrarian.SYS_ENCODING) + '"') elif api_value == 'inauthor:': searchterm = fullterm if authorname: searchterm = authorname # just search for author set_url = set_url + \ urllib.quote(api_value + '"' + searchterm.encode(lazylibrarian.SYS_ENCODING) + '"') searchterm = searchterm.strip() startindex = 0 resultcount = 0 ignored = 0 number_results = 1 total_count = 0 no_author_count = 0 try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urllib.urlencode(self.params) try: jsonresults, in_cache = get_json_request(URL) if not jsonresults: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] logger.debug('Searching url: ' + URL) if number_results == 0: logger.warn( 'Found no results for %s with value: %s' % (api_value, searchterm)) break else: pass except HTTPError as err: logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % err.reason) break startindex += 40 for item in jsonresults['items']: total_count += 1 # skip if no author, no author is no book. try: Author = item['volumeInfo']['authors'][0] except KeyError: logger.debug( 'Skipped a result without authorfield.') no_author_count += 1 continue try: bookname = item['volumeInfo']['title'] except KeyError: logger.debug('Skipped a result without title.') continue valid_langs = getList( lazylibrarian.CONFIG['IMP_PREFLANG']) booklang = '' if "All" not in valid_langs: # don't care about languages, accept all try: # skip if language is not in valid list - booklang = item['volumeInfo']['language'] if booklang not in valid_langs: logger.debug( 'Skipped %s with language %s' % (bookname, booklang)) ignored += 1 continue except KeyError: ignored += 1 logger.debug( 'Skipped %s where no language is found' % bookname) continue try: bookpub = item['volumeInfo']['publisher'] except KeyError: bookpub = "" try: booksub = item['volumeInfo']['subtitle'] except KeyError: booksub = "" try: bookdate = item['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' bookdate = bookdate[:4] try: bookimg = item['volumeInfo']['imageLinks'][ 'thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = item['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = item['volumeInfo']['pageCount'] except KeyError: bookpages = '0' try: bookgenre = item['volumeInfo']['categories'][0] except KeyError: bookgenre = "" try: bookdesc = item['volumeInfo']['description'] except KeyError: bookdesc = 'Not available' try: num_reviews = item['volumeInfo'][ 'ratingsCount'] except KeyError: num_reviews = 0 try: if item['volumeInfo']['industryIdentifiers'][ 0]['type'] == 'ISBN_10': bookisbn = item['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = 0 except KeyError: bookisbn = 0 if authorname: author_fuzz = fuzz.ratio(Author, authorname) else: author_fuzz = fuzz.ratio(Author, fullterm) if title: book_fuzz = fuzz.ratio(bookname, title) # lose a point for each extra word in the fuzzy matches so we get the closest match words = len(getList(bookname)) words -= len(getList(title)) book_fuzz -= abs(words) else: book_fuzz = fuzz.ratio(bookname, fullterm) isbn_fuzz = 0 if is_valid_isbn(fullterm): isbn_fuzz = 100 highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace bookid = item['id'] author = myDB.select( 'SELECT AuthorID FROM authors WHERE AuthorName = "%s"' % Author.replace('"', '""')) if author: AuthorID = author[0]['authorid'] else: AuthorID = '' resultlist.append({ 'authorname': Author, 'authorid': AuthorID, 'bookid': bookid, 'bookname': bookname, 'booksub': booksub, 'bookisbn': bookisbn, 'bookpub': bookpub, 'bookdate': bookdate, 'booklang': booklang, 'booklink': item['volumeInfo']['canonicalVolumeLink'], 'bookrate': float(bookrate), 'bookimg': bookimg, 'bookpages': bookpages, 'bookgenre': bookgenre, 'bookdesc': bookdesc, 'author_fuzz': author_fuzz, 'book_fuzz': book_fuzz, 'isbn_fuzz': isbn_fuzz, 'highest_fuzz': highest_fuzz, 'num_reviews': num_reviews }) resultcount += 1 except KeyError: break logger.debug( "Returning %s result%s for (%s) with keyword: %s" % (resultcount, plural(resultcount), api_value, searchterm)) logger.debug("Found %s result%s" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored))) logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count))) logger.debug( 'The Google Books API was hit %s time%s for searchterm: %s' % (api_hits, plural(api_hits), fullterm)) queue.put(resultlist) except Exception: logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())
def TORDownloadMethod(bookid=None, tor_title=None, tor_url=None, library='eBook'): myDB = database.DBConnection() downloadID = False Source = '' torrent = '' full_url = tor_url # keep the url as stored in "wanted" table if 'magnet:?' in tor_url: # discard any other parameters and just use the magnet link tor_url = 'magnet:?' + tor_url.split('magnet:?')[1] else: # h = HTMLParser() # tor_url = h.unescape(tor_url) # HTMLParser is probably overkill, we only seem to get & # tor_url = tor_url.replace('&', '&') if '&file=' in tor_url: # torznab results need to be re-encoded # had a problem with torznab utf-8 encoded strings not matching # our utf-8 strings because of long/short form differences url, value = tor_url.split('&file=', 1) value = makeUnicode(value) # ensure unicode value = unicodedata.normalize('NFC', value) # normalize to short form value = value.encode('unicode-escape') # then escape the result value = makeUnicode(value) # ensure unicode value = value.replace(' ', '%20') # and encode any spaces tor_url = url + '&file=' + value # strip url back to the .torrent as some sites add extra parameters if not tor_url.endswith('.torrent') and '.torrent' in tor_url: tor_url = tor_url.split('.torrent')[0] + '.torrent' headers = {'Accept-encoding': 'gzip', 'User-Agent': getUserAgent()} proxies = proxyList() try: logger.debug("Fetching %s" % tor_url) r = requests.get(tor_url, headers=headers, timeout=90, proxies=proxies) if str(r.status_code).startswith('2'): torrent = r.content if not len(torrent): res = "Got empty response for %s" % tor_url logger.warn(res) return False, res elif len(torrent) < 100: res = "Only got %s bytes for %s" % (len(torrent), tor_url) logger.warn(res) return False, res else: logger.debug("Got %s bytes for %s" % (len(torrent), tor_url)) else: res = "Got a %s response for %s" % (r.status_code, tor_url) logger.warn(res) return False, res except requests.exceptions.Timeout: res = 'Timeout fetching file from url: %s' % tor_url logger.warn(res) return False, res except Exception as e: # some jackett providers redirect internally using http 301 to a magnet link # which requests can't handle, so throws an exception logger.debug("Requests exception: %s" % str(e)) if "magnet:?" in str(e): tor_url = 'magnet:?' + str(e).split('magnet:?')[1].strip("'") logger.debug("Redirecting to %s" % tor_url) else: if hasattr(e, 'reason'): res = '%s fetching file from url: %s, %s' % ( type(e).__name__, tor_url, e.reason) else: res = '%s fetching file from url: %s, %s' % ( type(e).__name__, tor_url, str(e)) logger.warn(res) return False, res if not torrent and not tor_url.startswith('magnet:?'): res = "No magnet or data, cannot continue" logger.warn(res) return False, res if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']: Source = "BLACKHOLE" logger.debug("Sending %s to blackhole" % tor_title) tor_name = cleanName(tor_title).replace(' ', '_') if tor_url and tor_url.startswith('magnet'): if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']: hashid = calculate_torrent_hash(tor_url) if not hashid: hashid = tor_name tor_name = 'meta-' + hashid + '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) result = magnet2torrent(tor_url, tor_path) if result is not False: logger.debug('Magnet file saved as: %s' % tor_path) downloadID = Source else: tor_name += '.magnet' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) msg = '' try: msg = 'Opening ' with open(tor_path, 'wb') as torrent_file: msg += 'Writing ' if isinstance(torrent, text_type): torrent = torrent.encode('iso-8859-1') torrent_file.write(torrent) msg += 'SettingPerm ' setperm(tor_path) msg += 'Saved ' logger.debug('Magnet file saved: %s' % tor_path) downloadID = Source except Exception as e: res = "Failed to write magnet to file: %s %s" % ( type(e).__name__, str(e)) logger.warn(res) logger.debug("Progress: %s Filename [%s]" % (msg, repr(tor_path))) return False, res else: tor_name += '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) msg = '' try: msg = 'Opening ' with open(tor_path, 'wb') as torrent_file: msg += 'Writing ' if isinstance(torrent, text_type): torrent = torrent.encode('iso-8859-1') torrent_file.write(torrent) msg += 'SettingPerm ' setperm(tor_path) msg += 'Saved ' logger.debug('Torrent file saved: %s' % tor_name) downloadID = Source except Exception as e: res = "Failed to write torrent to file: %s %s" % ( type(e).__name__, str(e)) logger.warn(res) logger.debug("Progress: %s Filename [%s]" % (msg, repr(tor_path))) return False, res hashid = calculate_torrent_hash(tor_url, torrent) if not hashid: res = "Unable to calculate torrent hash from url/data" logger.error(res) logger.debug("url: %s" % tor_url) logger.debug("data: %s" % makeUnicode(str(torrent[:50]))) return False, res if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG[ 'UTORRENT_HOST']: logger.debug("Sending %s to Utorrent" % tor_title) Source = "UTORRENT" downloadID, res = utorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = utorrent.nameTorrent(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG[ 'RTORRENT_HOST']: logger.debug("Sending %s to rTorrent" % tor_title) Source = "RTORRENT" if torrent: logger.debug("Sending %s data to rTorrent" % tor_title) downloadID, res = rtorrent.addTorrent(tor_title, hashid, data=torrent) else: logger.debug("Sending %s url to rTorrent" % tor_title) downloadID, res = rtorrent.addTorrent( tor_url, hashid) # returns hash or False if downloadID: tor_title = rtorrent.getName(downloadID) if lazylibrarian.CONFIG[ 'TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG[ 'QBITTORRENT_HOST']: Source = "QBITTORRENT" if torrent: logger.debug("Sending %s data to qBittorrent" % tor_title) status, res = qbittorrent.addFile(torrent, hashid, tor_title) else: logger.debug("Sending %s url to qBittorrent" % tor_title) status, res = qbittorrent.addTorrent( tor_url, hashid) # returns True or False if status: downloadID = hashid tor_title = qbittorrent.getName(hashid) if lazylibrarian.CONFIG[ 'TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG[ 'TRANSMISSION_HOST']: Source = "TRANSMISSION" if torrent: logger.debug("Sending %s data to Transmission" % tor_title) # transmission needs b64encoded metainfo to be unicode, not bytes downloadID, res = transmission.addTorrent(None, metainfo=makeUnicode( b64encode(torrent))) else: logger.debug("Sending %s url to Transmission" % tor_title) downloadID, res = transmission.addTorrent( tor_url) # returns id or False if downloadID: # transmission returns it's own int, but we store hashid instead downloadID = hashid tor_title = transmission.getTorrentFolder(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \ lazylibrarian.CONFIG['SYNOLOGY_HOST']: logger.debug("Sending %s url to Synology" % tor_title) Source = "SYNOLOGY_TOR" downloadID, res = synology.addTorrent(tor_url) # returns id or False if downloadID: tor_title = synology.getName(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG[ 'DELUGE_HOST']: if not lazylibrarian.CONFIG['DELUGE_USER']: # no username, talk to the webui Source = "DELUGEWEBUI" if torrent: logger.debug("Sending %s data to Deluge" % tor_title) downloadID, res = deluge.addTorrent(tor_title, data=b64encode(torrent)) else: logger.debug("Sending %s url to Deluge" % tor_title) downloadID, res = deluge.addTorrent( tor_url) # can be link or magnet, returns hash or False if downloadID: tor_title = deluge.getTorrentFolder(downloadID) else: return False, res else: # have username, talk to the daemon Source = "DELUGERPC" client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'], int(lazylibrarian.CONFIG['DELUGE_PORT']), lazylibrarian.CONFIG['DELUGE_USER'], lazylibrarian.CONFIG['DELUGE_PASS']) try: client.connect() args = {"name": tor_title} if tor_url.startswith('magnet'): res = "Sending %s magnet to DelugeRPC" % tor_title logger.debug(res) downloadID = client.call('core.add_torrent_magnet', tor_url, args) elif torrent: res = "Sending %s data to DelugeRPC" % tor_title logger.debug(res) downloadID = client.call('core.add_torrent_file', tor_title, b64encode(torrent), args) else: res = "Sending %s url to DelugeRPC" % tor_title logger.debug(res) downloadID = client.call('core.add_torrent_url', tor_url, args) if downloadID: if lazylibrarian.CONFIG['DELUGE_LABEL']: _ = client.call( 'label.set_torrent', downloadID, lazylibrarian.CONFIG['DELUGE_LABEL'].lower()) result = client.call('core.get_torrent_status', downloadID, {}) if 'name' in result: tor_title = result['name'] else: res += ' failed' logger.error(res) return False, res except Exception as e: res = 'DelugeRPC failed %s %s' % (type(e).__name__, str(e)) logger.error(res) return False, res if not Source: res = 'No torrent download method is enabled, check config.' logger.warn(res) return False, res if downloadID: if tor_title: if downloadID.upper() in tor_title.upper(): logger.warn( '%s: name contains hash, probably unresolved magnet' % Source) else: tor_title = unaccented_str(tor_title) # need to check against reject words list again as the name may have changed # library = magazine eBook AudioBook to determine which reject list # but we can't easily do the per-magazine rejects if library == 'Magazine': reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',') elif library == 'eBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'], ',') elif library == 'AudioBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'], ',') else: logger.debug("Invalid library [%s] in TORDownloadMethod" % library) reject_list = [] rejected = False lower_title = tor_title.lower() for word in reject_list: if word in lower_title: rejected = "Rejecting torrent name %s, contains %s" % ( tor_title, word) logger.debug(rejected) break if not rejected: rejected = check_contents(Source, downloadID, library, tor_title) if rejected: myDB.action( 'UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?', (rejected, full_url)) delete_task(Source, downloadID, True) return False else: logger.debug('%s setting torrent name to [%s]' % (Source, tor_title)) myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?', (tor_title, full_url)) if library == 'eBook': myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid, )) elif library == 'AudioBook': myDB.action( 'UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid, )) myDB.action( 'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?', (Source, downloadID, full_url)) return True, '' res = 'Failed to send torrent to %s' % Source logger.error(res) return False, res
def find_book(bookid=None, queue=None): myDB = database.DBConnection() if not lazylibrarian.CONFIG['GB_API']: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API'] jsonresults, in_cache = get_json_request(URL) if not jsonresults: logger.debug('No results found for %s' % bookid) return bookname = jsonresults['volumeInfo']['title'] dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug('Book %s does not contain author field, skipping' % bookname) return try: # warn if language is in ignore list, but user said they wanted this book booklang = jsonresults['volumeInfo']['language'] valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if booklang not in valid_langs and 'All' not in valid_langs: logger.debug( 'Book %s googlebooks language does not match preference, %s' % (bookname, booklang)) except KeyError: logger.debug('Book does not have language field') booklang = "Unknown" try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = "" series = "" seriesNum = "" try: booksub = jsonresults['volumeInfo']['subtitle'] try: series = booksub.split('(')[1].split(' Series ')[0] except IndexError: series = "" try: seriesNum = booksub.split('(')[1].split(' Series ')[1].split( ')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = "" except KeyError: booksub = "" try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = "" try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = "" try: if jsonresults['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][ 'identifier'] else: bookisbn = "" except KeyError: bookisbn = "" booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) GR = GoodReads(authorname) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID) if not match: match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorName="%s"' % author['authorname']) if match: logger.debug( '%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match[ 'AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author as "ignored" # User hit "add book" button from a search controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Ignored" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn("No AuthorID for %s, unable to add book %s" % (authorname, bookname)) return controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % bookimg) if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict)) else: if series: seriesdict = {cleanName(unaccented(series)): seriesNum} setSeries(seriesdict, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def DirectDownloadMethod(bookid=None, dl_title=None, dl_url=None, library='eBook'): myDB = database.DBConnection() downloadID = False Source = "DIRECT" logger.debug("Starting Direct Download for [%s]" % dl_title) proxies = proxyList() headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT} try: r = requests.get(dl_url, headers=headers, timeout=90, proxies=proxies) except requests.exceptions.Timeout: logger.warn('Timeout fetching file from url: %s' % dl_url) return False except Exception as e: if hasattr(e, 'reason'): logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, dl_url, e.reason)) else: logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, dl_url, str(e))) return False if not str(r.status_code).startswith('2'): logger.debug("Got a %s response for %s" % (r.status_code, dl_url)) elif len(r.content) < 1000: logger.debug("Only got %s bytes for %s, rejecting" % (len(r.content), dl_title)) else: extn = '' basename = '' if ' ' in dl_title: basename, extn = dl_title.rsplit( ' ', 1) # last word is often the extension - but not always... if extn and extn in getList(lazylibrarian.CONFIG['EBOOK_TYPE']): dl_title = '.'.join(dl_title.rsplit(' ', 1)) elif magic: mtype = magic.from_buffer(r.content) if 'EPUB' in mtype: extn = '.epub' elif 'Mobipocket' in mtype: # also true for azw and azw3, does it matter? extn = '.mobi' elif 'PDF' in mtype: extn = '.pdf' else: logger.debug("magic reports %s" % mtype) basename = dl_title else: logger.warn("Don't know the filetype for %s" % dl_title) basename = dl_title logger.debug("File download got %s bytes for %s" % (len(r.content), dl_title)) destdir = os.path.join(lazylibrarian.DIRECTORY('Download'), basename) # destdir = os.path.join(lazylibrarian.DIRECTORY('Download'), '%s LL.(%s)' % (basename, bookid)) if not os.path.isdir(destdir): _ = mymakedirs(destdir) try: hashid = dl_url.split("md5=")[1].split("&")[0] except IndexError: hashid = sha1(encode(dl_url)).hexdigest() destfile = os.path.join(destdir, basename + extn) try: with open(destfile, 'wb') as bookfile: bookfile.write(r.content) setperm(destfile) downloadID = hashid except Exception as e: logger.error("%s writing book to %s, %s" % (type(e).__name__, destfile, e)) if downloadID: logger.debug('File %s has been downloaded from %s' % (dl_title, dl_url)) if library == 'eBook': myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid, )) elif library == 'AudioBook': myDB.action( 'UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid, )) myDB.action( 'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?', (Source, downloadID, dl_url)) return True else: logger.error('Failed to download file @ <a href="%s">%s</a>' % (dl_url, dl_url)) myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (dl_url, )) return False
def notify_download(self, title, bookid=None, force=False): # suppress notifications if the notifier is disabled but the notify options are checked if not lazylibrarian.CONFIG['USE_EMAIL'] and not force: return False if lazylibrarian.CONFIG['EMAIL_NOTIFY_ONDOWNLOAD']: files = None event = notifyStrings[NOTIFY_DOWNLOAD] logger.debug('Email send attachment is %s' % lazylibrarian.CONFIG['EMAIL_SENDFILE_ONDOWNLOAD']) if lazylibrarian.CONFIG['EMAIL_SENDFILE_ONDOWNLOAD']: if not bookid: logger.debug('Email request to attach book, but no bookid') else: filename = None preftype = None typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) if lazylibrarian.CONFIG['HTTP_LOOK'] == 'legacy' or not lazylibrarian.CONFIG['USER_ACCOUNTS']: preftype = typelist[0] logger.debug('Preferred filetype = %s' % preftype) else: myDB = database.DBConnection() cookie = cherrypy.request.cookie if cookie and 'll_uid' in list(cookie.keys()): res = myDB.match('SELECT BookType from users where UserID=?', (cookie['ll_uid'].value,)) if res and res['BookType']: preftype = res['BookType'] logger.debug('User preferred filetype = %s' % preftype) if not preftype: logger.debug('Default preferred filetype = %s' % preftype) preftype = typelist[0] myDB = database.DBConnection() data = myDB.match('SELECT BookFile,BookName from books where BookID=?', (bookid,)) if data: bookfile = data['BookFile'] types = [] if bookfile and os.path.isfile(bookfile): basename, extn = os.path.splitext(bookfile) for item in typelist: target = basename + '.' + item if os.path.isfile(target): types.append(item) logger.debug('Available filetypes: %s' % str(types)) if preftype in types: filename = basename + '.' + preftype else: filename = basename + '.' + types[0] title = data['BookName'] logger.debug('Found %s for bookid %s' % (filename, bookid)) else: logger.debug('[%s] is not a valid bookid' % bookid) data = myDB.match('SELECT IssueFile,Title,IssueDate from issues where IssueID=?', (bookid,)) if data: filename = data['IssueFile'] title = "%s - %s" % (data['Title'], data['IssueDate']) logger.debug('Found %s for issueid %s' % (filename, bookid)) else: logger.debug('[%s] is not a valid issueid' % bookid) filename = '' if filename: files = [filename] # could add cover_image, opf event = "LazyLibrarian Download" return self._notify(message=title, event=event, force=force, files=files) return False
def processResultList(resultlist, authorname, bookname, book, searchtype): myDB = database.DBConnection() dictrepl = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' ' } match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = getList(lazylibrarian.REJECT_WORDS) matches = [] # bit of a misnomer now, rss can search both tor and nzb rss feeds for tor in resultlist: torTitle = unaccented_str(replace_all(tor['tor_title'], dictrepl)).strip() torTitle = re.sub(r"\s\s+", " ", torTitle) # remove extra whitespace tor_Author_match = fuzz.token_set_ratio(authorname, torTitle) tor_Title_match = fuzz.token_set_ratio(bookname, torTitle) logger.debug("RSS Author/Title Match: %s/%s for %s" % (tor_Author_match, tor_Title_match, torTitle)) tor_url = tor['tor_url'] rejected = False already_failed = myDB.match( 'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % tor_url) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov'])) rejected = True if not rejected: for word in reject_list: if word in torTitle.lower() and word not in authorname.lower( ) and word not in bookname.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (torTitle, word)) break tor_size_temp = tor[ 'tor_size'] # Need to cater for when this is NONE (Issue 35) if tor_size_temp is None: tor_size_temp = 1000 tor_size = round(float(tor_size_temp) / 1048576, 2) maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0) if not rejected: if maxsize and tor_size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % torTitle) if not rejected: bookid = book['bookid'] tor_Title = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + book['bookid'] + ')').strip() tor_prov = tor['tor_prov'] tor_feed = tor['tor_feed'] controlValueDict = {"NZBurl": tor_url} newValueDict = { "NZBprov": tor_prov, "BookID": bookid, "NZBdate": now(), # when we asked for it "NZBsize": tor_size, "NZBtitle": tor_Title, "NZBmode": "torrent", "Status": "Skipped" } score = (tor_Title_match + tor_Author_match) / 2 # as a percentage # lose a point for each extra word in the title so we get the closest match words = len(getList(torTitle)) words -= len(getList(authorname)) words -= len(getList(bookname)) score -= abs(words) matches.append([score, torTitle, newValueDict, controlValueDict]) if matches: highest = max(matches, key=lambda x: x[0]) score = highest[0] nzb_Title = highest[1] newValueDict = highest[2] controlValueDict = highest[3] if score < match_ratio: logger.debug( u'Nearest RSS match (%s%%): %s using %s search for %s %s' % (score, nzb_Title, searchtype, authorname, bookname)) return False logger.info(u'Best RSS match (%s%%): %s using %s search' % (score, nzb_Title, searchtype)) snatchedbooks = myDB.match( 'SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]) if snatchedbooks: # check if one of the other downloaders got there first logger.info('%s already marked snatched' % nzb_Title) return True else: myDB.upsert("wanted", newValueDict, controlValueDict) tor_url = controlValueDict["NZBurl"] if '.nzb' in tor_url: snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], controlValueDict["NZBurl"]) else: """ # http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398 if not tor_url.startswith('magnet'): # magnets don't use auth pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS'] auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH'] # don't know what form of password hash is required, try sha1 tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd)) """ snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"], newValueDict["NZBtitle"], tor_url) if snatch: logger.info( 'Downloading %s from %s' % (newValueDict["NZBtitle"], newValueDict["NZBprov"])) notify_snatch( "%s from %s at %s" % (newValueDict["NZBtitle"], newValueDict["NZBprov"], now())) scheduleJob(action='Start', target='processDir') return True + True # we found it else: logger.debug("No RSS found for " + (book["authorName"] + ' ' + book['bookName']).strip()) return False
def searchItem(item=None, bookid=None): """ Call all active search providers asking for a "general" search for item return a list of results, each entry in list containing percentage_match, title, provider, size, url """ results = [] if not item: return results if not internet(): logger.debug('Search Item: No internet connection') return results book = {} searchterm = unaccented_str(item) book['searchterm'] = searchterm if bookid: book['bookid'] = bookid else: book['bookid'] = searchterm nproviders = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR( ) + lazylibrarian.USE_RSS() logger.debug('Searching %s providers for %s' % (nproviders, searchterm)) if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'general') if nproviders: results += resultlist if lazylibrarian.USE_TOR(): resultlist, nproviders = IterateOverTorrentSites(book, 'general') if nproviders: results += resultlist if lazylibrarian.USE_RSS(): resultlist, nproviders = IterateOverRSSSites() if nproviders: results += resultlist # reprocess to get consistent results searchresults = [] for item in results: provider = '' title = '' url = '' size = '' date = '' mode = '' if 'nzbtitle' in item: title = item['nzbtitle'] if 'nzburl' in item: url = item['nzburl'] if 'nzbprov' in item: provider = item['nzbprov'] if 'nzbsize' in item: size = item['nzbsize'] if 'nzbdate' in item: date = item['nzbdate'] if 'nzbmode' in item: mode = item['nzbmode'] if 'tor_title' in item: title = item['tor_title'] if 'tor_url' in item: url = item['tor_url'] if 'tor_prov' in item: provider = item['tor_prov'] if 'tor_size' in item: size = item['tor_size'] if 'tor_date' in item: date = item['tor_date'] if 'tor_type' in item: mode = item['tor_type'] if title and provider and mode and url: # Not all results have a date or a size if not date: date = 'Fri, 01 Jan 1970 00:00:00 +0100' if not size: size = '1000' # calculate match percentage score = fuzz.token_set_ratio(searchterm, title) # lose a point for each extra word in the title so we get the closest match words = len(getList(searchterm)) words -= len(getList(title)) score -= abs(words) if score >= 40: # ignore wildly wrong results? url = url.split('?')[0] result = { 'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date, 'url': urllib.quote_plus(url), 'mode': mode } searchresults.append(result) # from operator import itemgetter # searchresults = sorted(searchresults, key=itemgetter('score'), reverse=True) logger.debug('Found %s results for %s' % (len(searchresults), searchterm)) return searchresults
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' '} # ' the ': ' ', ' a ': ' ', ' and ': ' ', # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': ''} match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for nzb in resultlist: nzbTitle = formatter.latinToAscii(formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip() nzbTitle = re.sub(r"\s\s+", " ", nzbTitle) # remove extra whitespace author = formatter.latinToAscii(formatter.replace_all(book['authorName'], dic)) title = formatter.latinToAscii(formatter.replace_all(book['bookName'], dic)) # nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle) # logger.debug(u"NZB Title sort Match %: " + str(nzbTitle_match) + " for " + nzbTitle) nzbAuthor_match = fuzz.token_set_ratio(author, nzbTitle) nzbBook_match = fuzz.token_set_ratio(title, nzbTitle) logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzbTitle)) rejected = False for word in reject_list: if word in nzbTitle.lower() and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (nzbTitle, word)) break if (nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected): logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype)) bookid = book['bookid'] nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": nzbsize, "NZBtitle": nzbTitle, "NZBmode": nzbmode, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: if nzbmode == "torznab": snatch = TORDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) else: snatch = NZBDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) if snatch: notifiers.notify_snatch(nzbTitle + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No nzb's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + " using searchtype " + searchtype) return False
def notify_download(self, title, bookid=None, force=False): # suppress notifications if the notifier is disabled but the notify options are checked if not lazylibrarian.CONFIG['USE_EMAIL'] and not force: return False if lazylibrarian.CONFIG['EMAIL_NOTIFY_ONDOWNLOAD'] or force: files = None event = notifyStrings[NOTIFY_DOWNLOAD] logger.debug('Email send attachment is %s' % lazylibrarian.CONFIG['EMAIL_SENDFILE_ONDOWNLOAD']) if lazylibrarian.CONFIG['EMAIL_SENDFILE_ONDOWNLOAD']: if not bookid: logger.debug('Email request to attach book, but no bookid') else: filename = None preftype = None typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) if lazylibrarian.CONFIG[ 'HTTP_LOOK'] == 'legacy' or not lazylibrarian.CONFIG[ 'USER_ACCOUNTS']: preftype = typelist[0] logger.debug('Preferred filetype = %s' % preftype) else: myDB = database.DBConnection() cookie = cherrypy.request.cookie if cookie and 'll_uid' in list(cookie.keys()): res = myDB.match( 'SELECT BookType from users where UserID=?', (cookie['ll_uid'].value, )) if res and res['BookType']: preftype = res['BookType'] logger.debug('User preferred filetype = %s' % preftype) if not preftype: logger.debug('Default preferred filetype = %s' % preftype) preftype = typelist[0] myDB = database.DBConnection() data = myDB.match( 'SELECT BookFile,BookName from books where BookID=?', (bookid, )) if data: bookfile = data['BookFile'] types = [] if bookfile and os.path.isfile(bookfile): basename, extn = os.path.splitext(bookfile) for item in typelist: target = basename + '.' + item if os.path.isfile(target): types.append(item) logger.debug('Available filetypes: %s' % str(types)) if preftype in types: filename = basename + '.' + preftype else: filename = basename + '.' + types[0] title = data['BookName'] logger.debug('Found %s for bookid %s' % (filename, bookid)) else: logger.debug('[%s] is not a valid bookid' % bookid) data = myDB.match( 'SELECT IssueFile,Title,IssueDate from issues where IssueID=?', (bookid, )) if data: filename = data['IssueFile'] title = "%s - %s" % (data['Title'], data['IssueDate']) logger.debug('Found %s for issueid %s' % (filename, bookid)) else: logger.debug('[%s] is not a valid issueid' % bookid) filename = '' if filename: files = [filename] # could add cover_image, opf event = "LazyLibrarian Download" return self._notify(message=title, event=event, force=force, files=files) return False