def get_searchterm(book, searchType): authorname = cleanName(book['authorName'], "'") bookname = cleanName(book['bookName'], "'") if searchType in ['book', 'audio'] or 'short' in searchType: if bookname == authorname and book['bookSub']: # books like "Spike Milligan: Man of Letters" # where we split the title/subtitle on ':' bookname = cleanName(book['bookSub']) if bookname.startswith(authorname) and len(bookname) > len(authorname): # books like "Spike Milligan In his own words" # where we don't want to look for "Spike Milligan Spike Milligan In his own words" bookname = bookname[len(authorname) + 1:] bookname = bookname.strip() # no initials or extensions after surname eg L. E. Modesitt Jr. -> Modesitt # and Charles H. Elliott, Phd -> Charles Elliott # but Tom Holt -> Tom Holt # Calibre directories may have trailing '.' replaced by '_' eg Jr_ if ' ' in authorname: authorname_exploded = authorname.split(' ') authorname = '' postfix = getList(lazylibrarian.CONFIG['NAME_POSTFIX']) for word in authorname_exploded: word = word.strip('.').strip('_') if len(word) > 1 and word.lower() not in postfix: if authorname: authorname += ' ' authorname += word if 'short' in searchType and '(' in bookname: bookname = bookname.split('(')[0].strip() return authorname, bookname
def get_searchterm(book, searchType): authorname = book['authorName'] bookname = book['bookName'] if searchType == "book" or searchType == "shortbook": while authorname[1] in '. ': # strip any leading initials authorname = authorname[2:].strip() # and leading whitespace # middle initials can't have a dot authorname = authorname.replace('. ', ' ') authorname = cleanName(authorname) bookname = cleanName(bookname) if bookname == authorname and book['bookSub']: # books like "Spike Milligan: Man of Letters" # where we split the title/subtitle on ':' bookname = cleanName(book['bookSub']) if bookname.startswith(authorname) and len(bookname) > len(authorname): # books like "Spike Milligan In his own words" # where we don't want to look for "Spike Milligan Spike Milligan In his own words" bookname = bookname[len(authorname) + 1:] bookname = bookname.strip() if searchType == "book": return authorname, bookname if searchType == "shortbook" and '(' in bookname: bookname = bookname.split('(')[0].strip() return authorname, bookname # any other searchType return authorname, bookname
def getWorkSeries(bookID=None): """ Return the series names and numbers in series for the given bookid as a dictionary """ seriesdict = {} if not bookID: logger.error("getWorkSeries - No bookID") return seriesdict work = getBookWork(bookID, "Series") if work: try: serieslist = work.split('<h3><b>Series:')[1].split( '</h3>')[0].split('<a href="/series/') for item in serieslist[1:]: try: series = item.split('">')[1].split('</a>')[0] if series and '(' in series: seriesnum = series.split('(')[1].split(')')[0].strip() series = series.split(' (')[0].strip() else: seriesnum = '' series = series.strip() series = cleanName(unaccented(series), '&/') seriesnum = cleanName(unaccented(seriesnum)) seriesdict[series] = seriesnum except IndexError: pass except IndexError: pass return seriesdict
def getWorkSeries(bookID=None): """ Return the series names and numbers in series for the given id as a list of tuples For goodreads the id is a WorkID, for librarything it's a BookID """ myDB = database.DBConnection() serieslist = [] if not bookID: logger.error("getWorkSeries - No bookID") return serieslist if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': URL = "https://www.goodreads.com/work/" seriesurl = URL + bookID + "/series?format=xml&key=" + lazylibrarian.CONFIG['GR_API'] rootxml, in_cache = gr_xml_request(seriesurl) if rootxml is None: logger.warn('Error getting XML for %s' % seriesurl) else: resultxml = rootxml.getiterator('series_work') for item in resultxml: try: seriesname = item.find('./series/title').text seriesname = seriesname.strip('\n').strip('\n').strip() seriesid = item.find('./series/id').text seriesnum = item.find('./user_position').text except (KeyError, AttributeError): continue if seriesname and seriesid: seriesname = cleanName(unaccented(seriesname), '&/') seriesnum = cleanName(unaccented(seriesnum)) serieslist.append((seriesid, seriesnum, seriesname)) match = myDB.match('SELECT SeriesID from series WHERE SeriesName=?', (seriesname,)) if not match: myDB.action('INSERT INTO series VALUES (?, ?, ?, ?, ?)', (seriesid, seriesname, "Active", 0, 0)) elif match['SeriesID'] != seriesid: myDB.action('UPDATE series SET SeriesID=? WHERE SeriesName=?', (seriesid, seriesname)) else: work = getBookWork(bookID, "Series") if work: try: slist = work.split('<h3><b>Series:')[1].split('</h3>')[0].split('<a href="/series/') for item in slist[1:]: try: series = item.split('">')[1].split('</a>')[0] if series and '(' in series: seriesnum = series.split('(')[1].split(')')[0].strip() series = series.split(' (')[0].strip() else: seriesnum = '' series = series.strip() seriesname = cleanName(unaccented(series), '&/') seriesnum = cleanName(unaccented(seriesnum)) serieslist.append(('', seriesnum, seriesname)) except IndexError: pass except IndexError: pass return serieslist
def get_book_desc(isbn=None, author=None, title=None): """ GoodReads does not always have a book description in its api results due to restrictive TOS from some of its providers. Try to get missing descriptions from googlebooks Return description, empty string if not found, None if error""" if not author or not title: return '' author = cleanName(author) title = cleanName(title) if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': baseurl = 'https://www.googleapis.com/books/v1/volumes?q=' urls = [ baseurl + quote_plus('inauthor:%s intitle:%s' % (author, title)) ] if isbn: urls.insert(0, baseurl + quote_plus('isbn:' + isbn)) for url in urls: if lazylibrarian.CONFIG['GB_API']: url += '&key=' + lazylibrarian.CONFIG['GB_API'] if lazylibrarian.CONFIG['GB_COUNTRY'] and len( lazylibrarian.CONFIG['GB_COUNTRY'] == 2): url += '&country=' + lazylibrarian.CONFIG['GB_COUNTRY'] results, cached = gb_json_request(url) if results is None: # there was an error return None if results and not cached: time.sleep(1) if results and 'items' in results: for item in results['items']: # noinspection PyBroadException try: auth = item['volumeInfo']['authors'][0] book = item['volumeInfo']['title'] desc = item['volumeInfo']['description'] book_fuzz = fuzz.token_set_ratio(book, title) auth_fuzz = fuzz.token_set_ratio(auth, author) if book_fuzz > 98 and auth_fuzz > 80: return desc except Exception: pass return ''
def get_book_desc(isbn=None, author=None, title=None): """ GoodReads does not always have a book description in its api results due to restrictive TOS from some of its providers. Try to get missing descriptions from googlebooks Return description, empty string if not found, None if error""" if not author or not title: return '' author = cleanName(author) title = cleanName(title) if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': baseurl = 'https://www.googleapis.com/books/v1/volumes?q=' urls = [baseurl + quote_plus('inauthor:%s intitle:%s' % (author, title))] if isbn: urls.insert(0, baseurl + quote_plus('isbn:' + isbn)) for url in urls: if lazylibrarian.CONFIG['GB_API']: url += '&key=' + lazylibrarian.CONFIG['GB_API'] if lazylibrarian.CONFIG['GB_COUNTRY'] and len(lazylibrarian.CONFIG['GB_COUNTRY'] == 2): url += '&country=' + lazylibrarian.CONFIG['GB_COUNTRY'] results, cached = gb_json_request(url) if results is None: # there was an error return None if results and not cached: time.sleep(1) if results and 'items' in results: for item in results['items']: # noinspection PyBroadException try: auth = item['volumeInfo']['authors'][0] book = item['volumeInfo']['title'] desc = item['volumeInfo']['description'] book_fuzz = fuzz.token_set_ratio(book, title) auth_fuzz = fuzz.token_set_ratio(auth, author) if book_fuzz > 98 and auth_fuzz > 80: return desc except Exception: pass return ''
def ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode): params = None if searchType == "book": authorname, bookname = get_searchterm(book, searchType) if provider['BOOKSEARCH'] and provider[ 'BOOKCAT']: # if specific booksearch, use it params = { "t": provider['BOOKSEARCH'], "apikey": api_key, "title": bookname, "author": authorname, "cat": provider['BOOKCAT'] } elif provider['GENERALSEARCH'] and provider[ 'BOOKCAT']: # if not, try general search params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "q": authorname + ' ' + bookname, "cat": provider['BOOKCAT'] } elif searchType == "shortbook": authorname, bookname = get_searchterm(book, searchType) if provider['BOOKSEARCH'] and provider[ 'BOOKCAT']: # if specific booksearch, use it params = { "t": provider['BOOKSEARCH'], "apikey": api_key, "title": bookname, "author": authorname, "cat": provider['BOOKCAT'] } elif provider['GENERALSEARCH'] and provider['BOOKCAT']: params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "q": authorname + ' ' + bookname, "cat": provider['BOOKCAT'] } elif searchType == "mag": if provider['MAGSEARCH'] and provider[ 'MAGCAT']: # if specific magsearch, use it params = { "t": provider['MAGSEARCH'], "apikey": api_key, "cat": provider['MAGCAT'], "q": cleanName(book['searchterm']), "extended": provider['EXTENDED'], } elif provider['GENERALSEARCH'] and provider['MAGCAT']: params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "cat": provider['MAGCAT'], "q": cleanName(book['searchterm']), "extended": provider['EXTENDED'], } else: if provider['GENERALSEARCH']: params = { "t": provider['GENERALSEARCH'], "apikey": api_key, # this is a general search "q": cleanName(book['searchterm']), "extended": provider['EXTENDED'], } if params: logger.debug('[NewzNabPlus] - %s Search parameters set to %s' % (searchMode, str(params))) else: logger.debug('[NewzNabPlus] - %s No matching search parameters' % searchMode) return params
def find_book(self, bookid=None, bookstatus="None"): myDB = database.DBConnection() if not lazylibrarian.CONFIG['GB_API']: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API'] jsonresults, in_cache = gb_json_request(URL) if jsonresults is None: logger.debug('No results found for %s' % bookid) return if not bookstatus: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] book = bookdict(jsonresults) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace if not book['author']: logger.debug('Book %s does not contain author field, skipping' % bookname) return # warn if language is in ignore list, but user said they wanted this book valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if book['lang'] not in valid_langs and 'All' not in valid_langs: logger.debug( 'Book %s googlebooks language does not match preference, %s' % (bookname, book['lang'])) if lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date'] or book['date'] == '0000': logger.warn( 'Book %s Publication date does not match preference, %s' % (bookname, book['date'])) if lazylibrarian.CONFIG['NO_FUTURE']: if book['date'] > today()[:4]: logger.warn( 'Book %s Future publication date does not match preference, %s' % (bookname, book['date'])) authorname = book['author'] GR = GoodReads(authorname) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?', (AuthorID, )) if not match: match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorName=?', (author['authorname'], )) if match: logger.debug( '%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match[ 'AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author with newauthor status # User hit "add book" button from a search or a wishlist import newauthor_status = 'Active' if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in [ 'Skipped', 'Ignored' ]: newauthor_status = 'Paused' controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": newauthor_status } authorname = author['authorname'] myDB.upsert("authors", newValueDict, controlValueDict) if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']: self.get_author_books(AuthorID, entrystatus=lazylibrarian. CONFIG['NEWAUTHOR_STATUS']) else: logger.warn("No AuthorID for %s, unable to add book %s" % (book['author'], bookname)) return controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": book['lang'], "Status": bookstatus, "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'], "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s by %s added to the books database" % (bookname, authorname)) if 'nocover' in book['img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif book['img'] and book['img'].startswith('http'): link, success, _ = cache_img("book", bookid, book['img']) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug('Updated series: %s [%s]' % (bookid, serieslist)) setSeries(serieslist, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", entrystatus='Active', refresh=False): # noinspection PyBroadException try: logger.debug('[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names set_url = self.url + quote( 'inauthor:"%s"' % unaccented_str(authorname)) api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 startindex = 0 resultcount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request( URL, useCache=not refresh) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break if number_results == 0: logger.warn('Found no results for %s' % authorname) break else: logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname)) startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) # skip if no author, no author is no book. if not book['author']: logger.debug( 'Skipped a result without authorfield.') continue isbnhead = "" if len(book['isbn']) == 10: isbnhead = book['isbn'][0:3] elif len(book['isbn']) == 13: isbnhead = book['isbn'][3:6] booklang = book['lang'] # do we care about language? if "All" not in valid_langs: if book['isbn']: # seems google lies to us, sometimes tells us books are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = False lang = myDB.match( 'SELECT lang FROM languages where isbn=?', (isbnhead, )) if lang: booklang = lang['lang'] cache_hits += 1 logger.debug( "Found cached language [%s] for [%s]" % (booklang, isbnhead)) match = True if not match: # no match in cache, try lookup dict if isbnhead: if len( book['isbn'] ) == 13 and book[ 'isbn'].startswith('979'): for lang in lazylibrarian.isbn_979_dict: if isbnhead.startswith( lang): booklang = lazylibrarian.isbn_979_dict[ lang] logger.debug( "ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break elif (len(book['isbn']) == 10) or \ (len(book['isbn']) == 13 and book['isbn'].startswith('978')): for lang in lazylibrarian.isbn_978_dict: if isbnhead.startswith( lang): booklang = lazylibrarian.isbn_978_dict[ lang] logger.debug( "ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break if match: myDB.action( 'insert into languages values (?, ?)', (isbnhead, booklang)) if not match: booklang = thingLang(book['isbn']) lt_lang_hits += 1 if booklang: match = True myDB.action( 'insert into languages values (?, ?)', (isbnhead, booklang)) if match: # We found a better language match if googlelang == "en" and booklang not in [ "en-US", "en-GB", "eng" ]: # these are all english, may need to expand this list logger.debug( "%s Google thinks [%s], we think [%s]" % (book['name'], googlelang, booklang)) gb_lang_change += 1 else: # No match anywhere, accept google language booklang = googlelang # skip if language is in ignore list if booklang not in valid_langs: logger.debug('Skipped [%s] with language %s' % (book['name'], booklang)) ignored += 1 continue rejected = 0 check_status = False book_status = bookstatus # new_book status, or new_author status audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] added = today() locked = False existing_book = None bookname = book['name'] bookid = item['id'] if not bookname: logger.debug( 'Rejecting bookid %s for %s, no bookname' % (bookid, authorname)) removedResults += 1 rejected = 1 else: bookname = replace_all(unaccented(bookname), { ':': '.', '"': '', '\'': '' }).strip() # GoodReads sometimes has multiple bookids for the same book (same author/title, different # editions) and sometimes uses the same bookid if the book is the same but the title is # slightly different. Not sure if googlebooks does too, but we only want one... cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?' existing_book = myDB.match(cmd, (bookid, )) if existing_book: book_status = existing_book['Status'] audio_status = existing_book['AudioStatus'] locked = existing_book['Manual'] added = existing_book['BookAdded'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: if rejected in [3, 4, 5]: book_status = 'Ignored' audio_status = 'Ignored' else: book_status = bookstatus # new_book status, or new_author status audio_status = lazylibrarian.CONFIG[ 'NEWAUDIO_STATUS'] added = today() locked = False if not rejected and re.match( '[^\w-]', bookname ): # remove books with bad characters in title logger.debug( "[%s] removed book for bad characters" % bookname) removedResults += 1 rejected = 2 if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd if book['date'] > today()[:len(book['date'])]: logger.debug( 'Rejecting %s, future publication date %s' % (bookname, book['date'])) removedResults += 1 rejected = 3 if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date']: logger.debug( 'Rejecting %s, no publication date' % bookname) removedResults += 1 rejected = 4 if not rejected and lazylibrarian.CONFIG['NO_ISBN']: if not isbnhead: logger.debug('Rejecting %s, no isbn' % bookname) removedResults += 1 rejected = 5 if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE' match = myDB.match(cmd, (bookname.replace( '"', '""'), authorname.replace('"', '""'))) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug( 'Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorname, bookname, bookid)) rejected = 6 duplicates += 1 if not rejected: cmd = 'SELECT AuthorName,BookName FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?' match = myDB.match(cmd, (bookid, )) if match: # we have a book with this bookid already if bookname != match[ 'BookName'] or authorname != match[ 'AuthorName']: logger.debug( 'Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorname, bookname, match['AuthorName'], match['BookName'])) else: logger.debug( 'Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorname, bookname)) check_status = True duplicates += 1 rejected = 7 if check_status or not rejected or ( lazylibrarian.CONFIG['IMP_IGNORE'] and rejected in [3, 4, 5]): # dates, isbn if not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": booklang, "Status": book_status, "AudioStatus": audio_status, "BookAdded": added } resultcount += 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug("Book found: " + bookname + " " + book['date']) updated = False if 'nocover' in book[ 'img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug( 'Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) updated = True elif book['img'] and book['img'].startswith( 'http'): link, success, _ = cache_img( "book", bookid, book['img'], refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) updated = True else: logger.debug( 'Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [ ('', book['seriesNum'], cleanName(unaccented(book['series']), '&/')) ] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug( 'Updated series: %s [%s]' % (bookid, serieslist)) updated = True setSeries(serieslist, bookid) new_status = setStatus(bookid, serieslist, bookstatus) if not new_status == book_status: book_status = new_status updated = True worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug( "[%s] Added book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) added_count += 1 elif updated: logger.debug( "[%s] Updated book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) updated_count += 1 else: book_ignore_count += 1 except KeyError: pass deleteEmptySeries() logger.debug( '[%s] The Google Books API was hit %s time%s to populate book list' % (authorname, api_hits, plural(api_hits))) cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?' cmd += ' AND Status != "Ignored" order by BookDate DESC' lastbook = myDB.match(cmd, (authorid, )) if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": entrystatus, "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored))) logger.debug( "Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount))) myDB.action( 'insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info( "[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info( "[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
def getWorkSeries(bookID=None): """ Return the series names and numbers in series for the given id as a list of tuples For goodreads the id is a WorkID, for librarything it's a BookID """ myDB = database.DBConnection() serieslist = [] if not bookID: logger.error("getWorkSeries - No bookID") return serieslist if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': URL = "https://www.goodreads.com/work/" seriesurl = URL + bookID + "/series?format=xml&key=" + lazylibrarian.CONFIG[ 'GR_API'] rootxml, in_cache = gr_xml_request(seriesurl) if rootxml is None: logger.warn('Error getting XML for %s' % seriesurl) else: resultxml = rootxml.getiterator('series_work') for item in resultxml: try: seriesname = item.find('./series/title').text seriesname = seriesname.strip('\n').strip('\n').strip() seriesid = item.find('./series/id').text seriesnum = item.find('./user_position').text except (KeyError, AttributeError): continue if seriesname and seriesid: seriesname = cleanName(unaccented(seriesname), '&/') if seriesname: seriesnum = cleanName(unaccented(seriesnum)) serieslist.append((seriesid, seriesnum, seriesname)) match = myDB.match( 'SELECT SeriesID from series WHERE SeriesName=?', (seriesname, )) if not match: match = myDB.match( 'SELECT SeriesName from series WHERE SeriesID=?', (seriesid, )) if not match: myDB.action( 'INSERT INTO series VALUES (?, ?, ?, ?, ?)', (seriesid, seriesname, "Active", 0, 0)) else: logger.warn( "Name mismatch for series %s, [%s][%s]" % (seriesid, seriesname, match['SeriesName'])) elif match['SeriesID'] != seriesid: myDB.action( 'UPDATE series SET SeriesID=? WHERE SeriesName=?', (seriesid, seriesname)) else: work = getBookWork(bookID, "Series") if work: try: slist = work.split('<h3><b>Series:')[1].split( '</h3>')[0].split('<a href="/series/') for item in slist[1:]: try: series = item.split('">')[1].split('</a>')[0] if series and '(' in series: seriesnum = series.split('(')[1].split( ')')[0].strip() series = series.split(' (')[0].strip() else: seriesnum = '' series = series.strip() seriesname = cleanName(unaccented(series), '&/') seriesnum = cleanName(unaccented(seriesnum)) if seriesname: serieslist.append(('', seriesnum, seriesname)) except IndexError: pass except IndexError: pass return serieslist
def TORDownloadMethod(bookid=None, tor_prov=None, tor_title=None, tor_url=None): myDB = database.DBConnection() downloadID = False Source = '' full_url = tor_url # keep the url as stored in "wanted" table if tor_url and tor_url.startswith('magnet'): torrent = tor_url # allow magnet link to write to blackhole and hash to utorrent/rtorrent else: if '&file=' in tor_url: # torznab results need to be re-encoded # had a problem with torznab utf-8 encoded strings not matching # our utf-8 strings because of long/short form differences url = tor_url.split('&file=')[0] value = tor_url.split('&file=')[1] if isinstance(value, str): value = value.decode('utf-8') # make unicode value = unicodedata.normalize('NFC', value) # normalize to short form value = value.encode('unicode-escape') # then escape the result value = value.replace(' ', '%20') # and encode any spaces tor_url = url + '&file=' + value # strip url back to the .torrent as some sites add parameters if not tor_url.endswith('.torrent'): if '.torrent' in tor_url: tor_url = tor_url.split('.torrent')[0] + '.torrent' request = urllib2.Request(ur'%s' % tor_url) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('Accept-encoding', 'gzip') request.add_header('User-Agent', USER_AGENT) try: response = urllib2.urlopen(request, timeout=90) if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read()) f = gzip.GzipFile(fileobj=buf) torrent = f.read() else: torrent = response.read() except (socket.timeout) as e: logger.warn('Timeout fetching torrent from url: %s' % tor_url) return False except (urllib2.URLError) as e: logger.warn('Error fetching torrent from url: %s, %s' % (tor_url, e.reason)) return False if lazylibrarian.TOR_DOWNLOADER_BLACKHOLE: Source = "BLACKHOLE" logger.debug("Sending %s to blackhole" % tor_title) tor_name = cleanName(tor_title).replace(' ', '_') tor_title = None if tor_url and tor_url.startswith('magnet'): if lazylibrarian.TOR_CONVERT_MAGNET: hashid = CalcTorrentHash(tor_url) tor_name = 'meta-' + hashid + '.torrent' tor_path = os.path.join(lazylibrarian.TORRENT_DIR, tor_name) result = magnet2torrent(tor_url, tor_path) if result is not False: logger.debug('Magnet file saved as: %s' % tor_path) downloadID = Source else: tor_name = tor_name + '.magnet' tor_path = os.path.join(lazylibrarian.TORRENT_DIR, tor_name) with open(tor_path, 'wb') as torrent_file: torrent_file.write(torrent) logger.debug('Magnet file saved: %s' % tor_path) setperm(tor_path) downloadID = Source else: tor_name = tor_name + '.torrent' tor_path = os.path.join(lazylibrarian.TORRENT_DIR, tor_name) with open(tor_path, 'wb') as torrent_file: torrent_file.write(torrent) setperm(tor_path) logger.debug('Torrent file saved: %s' % tor_name) downloadID = Source if (lazylibrarian.TOR_DOWNLOADER_UTORRENT and lazylibrarian.UTORRENT_HOST): logger.debug("Sending %s to Utorrent" % tor_title) Source = "UTORRENT" hashid = CalcTorrentHash(torrent) downloadID = utorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = utorrent.nameTorrent(downloadID) if (lazylibrarian.TOR_DOWNLOADER_RTORRENT and lazylibrarian.RTORRENT_HOST): logger.debug("Sending %s to rTorrent" % tor_title) Source = "RTORRENT" hashid = CalcTorrentHash(torrent) downloadID = rtorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = rtorrent.getName(downloadID) if (lazylibrarian.TOR_DOWNLOADER_QBITTORRENT and lazylibrarian.QBITTORRENT_HOST): logger.debug("Sending %s to qbittorrent" % tor_title) Source = "QBITTORRENT" hashid = CalcTorrentHash(torrent) status = qbittorrent.addTorrent(tor_url) # returns hash or False if status: downloadID = hashid tor_title = qbittorrent.getName(hashid) else: logger.debug("qbittorrent returned: %s" % str(response)) if (lazylibrarian.TOR_DOWNLOADER_TRANSMISSION and lazylibrarian.TRANSMISSION_HOST): logger.debug("Sending %s to Transmission" % tor_title) Source = "TRANSMISSION" downloadID = transmission.addTorrent(tor_url) # returns id or False if downloadID: # transmission returns it's own int, but we store hashid instead downloadID = CalcTorrentHash(torrent) tor_title = transmission.getTorrentFolder(downloadID) if (lazylibrarian.TOR_DOWNLOADER_SYNOLOGY and lazylibrarian.USE_SYNOLOGY and lazylibrarian.SYNOLOGY_HOST): logger.debug("Sending %s to Synology" % tor_title) Source = "SYNOLOGY_TOR" downloadID = synology.addTorrent(tor_url) # returns id or False if downloadID: tor_title = synology.getName(downloadID) if (lazylibrarian.TOR_DOWNLOADER_DELUGE and lazylibrarian.DELUGE_HOST): logger.debug("Sending %s to Deluge" % tor_title) if not lazylibrarian.DELUGE_USER: # no username, talk to the webui Source = "DELUGEWEBUI" downloadID = deluge.addTorrent(tor_url) # returns hash or False if downloadID: tor_title = deluge.getTorrentFolder(downloadID) else: # have username, talk to the daemon Source = "DELUGERPC" client = DelugeRPCClient(lazylibrarian.DELUGE_HOST, int(lazylibrarian.DELUGE_PORT), lazylibrarian.DELUGE_USER, lazylibrarian.DELUGE_PASS) try: client.connect() args = {"name": tor_title} if tor_url.startswith('magnet'): downloadID = client.call('core.add_torrent_magnet', tor_url, args) else: downloadID = client.call('core.add_torrent_url', tor_url, args) if downloadID: if lazylibrarian.DELUGE_LABEL: labelled = client.call('label.set_torrent', downloadID, lazylibrarian.DELUGE_LABEL) result = client.call('core.get_torrent_status', downloadID, {}) #for item in result: # logger.debug ('Deluge RPC result %s: %s' % (item, result[item])) if 'name' in result: tor_title = result['name'] except Exception as e: logger.debug('DelugeRPC failed %s' % str(e)) return False if not Source: logger.warn('No torrent download method is enabled, check config.') return False if downloadID: myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid) myDB.action('UPDATE wanted SET status = "Snatched", Source = "%s", DownloadID = "%s" WHERE NZBurl="%s"' % (Source, downloadID, full_url)) if tor_title: if downloadID.upper() in tor_title.upper(): logger.warn('%s: name contains hash, probably unresolved magnet' % Source) else: tor_title = unaccented_str(tor_title) logger.debug('%s setting torrent name to [%s]' % (Source, tor_title)) myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl="%s"' % (tor_title, full_url)) return True else: logger.error(u'Failed to download torrent from %s, %s' % (Source, tor_url)) myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % full_url) return False
def getSeriesAuthors(seriesid): """ Get a list of authors contributing to a series and import those authors (and their books) into the database Return how many authors you added """ myDB = database.DBConnection() result = myDB.match("select count(*) as counter from authors") start = int(result['counter']) result = myDB.match('select SeriesName from series where SeriesID=?', (seriesid, )) seriesname = result['SeriesName'] members, api_hits = getSeriesMembers(seriesid, seriesname) dic = { u'\u2018': "", u'\u2019': "", u'\u201c': '', u'\u201d': '', "'": "", '"': '' } if members: myDB = database.DBConnection() for member in members: # order = member[0] bookname = member[1] authorname = member[2] # workid = member[3] authorid = member[4] # pubyear = member[5] bookname = replace_all(bookname, dic) if not authorid: # goodreads gives us all the info we need, librarything/google doesn't base_url = 'https://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} searchname = bookname + ' ' + authorname searchname = cleanName(unaccented(searchname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) try: rootxml, in_cache = gr_xml_request(set_url) if not in_cache: api_hits += 1 if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: try: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) except (KeyError, AttributeError): booktitle = "" book_fuzz = fuzz.token_set_ratio( booktitle, bookname) if book_fuzz >= 98: try: author = item.find( './best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find( './best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug( "Author Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: # try again with title only searchname = cleanName(unaccented(bookname)) if PY2: searchname = searchname.encode( lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode( params) rootxml, in_cache = gr_xml_request(set_url) if not in_cache: api_hits += 1 if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) book_fuzz = fuzz.token_set_ratio( booktitle, bookname) if book_fuzz >= 98: try: author = item.find( './best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find( './best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug( "Title Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname)) except Exception as e: logger.error("Error finding goodreads results: %s %s" % (type(e).__name__, str(e))) if authorid: lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid) result = myDB.match("select count(*) as counter from authors") finish = int(result['counter']) newauth = finish - start logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname)) return newauth
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", refresh=False): try: logger.debug('[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names set_url = self.url + urllib.quote( 'inauthor:"%s"' % unaccented_str(authorname)) api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 startindex = 0 resultcount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urllib.urlencode(self.params) try: jsonresults, in_cache = get_json_request( URL, useCache=not refresh) if not jsonresults: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] except HTTPError as err: logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % err.reason) break if number_results == 0: logger.warn('Found no results for %s' % authorname) break else: logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname)) startindex += 40 for item in jsonresults['items']: total_count += 1 # skip if no author, no author is no book. try: _ = item['volumeInfo']['authors'][0] except KeyError: logger.debug( 'Skipped a result without authorfield.') continue try: if item['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = item['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = "" except KeyError: bookisbn = "" isbnhead = "" if len(bookisbn) == 10: isbnhead = bookisbn[0:3] elif len(bookisbn) == 13: isbnhead = bookisbn[3:6] try: booklang = item['volumeInfo']['language'] except KeyError: booklang = "Unknown" # do we care about language? if "All" not in valid_langs: if bookisbn != "": # seems google lies to us, sometimes tells us books # are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = False lang = myDB.match( 'SELECT lang FROM languages where isbn = "%s"' % isbnhead) if lang: booklang = lang['lang'] cache_hits += 1 logger.debug( "Found cached language [%s] for [%s]" % (booklang, isbnhead)) match = True if not match: # no match in cache, try lookup dict if isbnhead: if len( bookisbn ) == 13 and bookisbn.startswith( '979'): for lang in lazylibrarian.isbn_979_dict: if isbnhead.startswith( lang): booklang = lazylibrarian.isbn_979_dict[ lang] logger.debug( "ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break elif (len(bookisbn) == 10) or \ (len(bookisbn) == 13 and bookisbn.startswith('978')): for lang in lazylibrarian.isbn_978_dict: if isbnhead.startswith( lang): booklang = lazylibrarian.isbn_978_dict[ lang] logger.debug( "ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break if match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, booklang)) logger.debug(u"GB language: " + booklang) if not match: # try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # librarything returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + bookisbn try: librarything_wait() resp = urllib2.urlopen( BOOK_URL, timeout=30).read() lt_lang_hits += 1 logger.debug( "LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if resp != 'invalid' and resp != 'unknown': booklang = resp # found a language code match = True myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, booklang)) logger.debug(u"LT language: " + booklang) except Exception as e: booklang = "" logger.error( "Error finding language: %s" % str(e)) if match: # We found a better language match if googlelang == "en" and booklang not in [ "en-US", "en-GB", "eng" ]: # these are all english, may need to expand this list booknamealt = item['volumeInfo'][ 'title'] logger.debug( "%s Google thinks [%s], we think [%s]" % (booknamealt, googlelang, booklang)) gb_lang_change += 1 else: # No match anywhere, accept google language booklang = googlelang # skip if language is in ignore list if booklang not in valid_langs: booknamealt = item['volumeInfo']['title'] logger.debug('Skipped [%s] with language %s' % (booknamealt, booklang)) ignored += 1 continue try: bookpub = item['volumeInfo']['publisher'] except KeyError: bookpub = "" try: booksub = item['volumeInfo']['subtitle'] except KeyError: booksub = "" if not booksub: series = "" seriesNum = "" else: try: series = booksub.split('(')[1].split( ' Series ')[0] except IndexError: series = "" if series.endswith(')'): series = series[:-1] try: seriesNum = booksub.split('(')[1].split( ' Series ')[1].split(')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = "" if not seriesNum and '#' in series: words = series.rsplit('#', 1) series = words[0].strip() seriesNum = words[1].strip() if not seriesNum and ' ' in series: words = series.rsplit(' ', 1) # has to be unicode for isnumeric() if (u"%s" % words[1]).isnumeric(): series = words[0] seriesNum = words[1] try: bookdate = item['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = item['volumeInfo']['imageLinks'][ 'thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = item['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = item['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = item['volumeInfo']['categories'][0] except KeyError: bookgenre = "" try: bookdesc = item['volumeInfo']['description'] except KeyError: bookdesc = "" bookname = item['volumeInfo']['title'] bookname = unaccented(bookname) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booklink = item['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) bookid = item['id'] # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions) # and sometimes uses the same bookid if the book is the same but the title is slightly different # # Not sure if googlebooks does too, but we only want one... existing_book = myDB.match( 'SELECT Status,Manual FROM books WHERE BookID = "%s"' % bookid) if existing_book: book_status = existing_book['Status'] locked = existing_book['Manual'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: book_status = bookstatus # new_book status, or new_author status locked = False rejected = False check_status = False if re.match( '[^\w-]', bookname ): # remove books with bad characters in title logger.debug( "[%s] removed book for bad characters" % bookname) removedResults += 1 rejected = True if not rejected and not bookname: logger.debug( 'Rejecting bookid %s for %s, no bookname' % (bookid, authorname)) removedResults += 1 rejected = True if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd if bookdate > today()[:len(bookdate)]: logger.debug( 'Rejecting %s, future publication date %s' % (bookname, bookdate)) removedResults += 1 rejected = True if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName = "%s" COLLATE NOCASE and AuthorName = "%s" COLLATE NOCASE'% \ (bookname.replace('"', '""'), authorname.replace('"', '""')) match = myDB.match(cmd) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug( 'Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorname, bookname, bookid)) rejected = True duplicates += 1 if not rejected: cmd = 'SELECT AuthorName,BookName FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID="%s"' % bookid match = myDB.match(cmd) if match: # we have a book with this bookid already if bookname != match[ 'BookName'] or authorname != match[ 'AuthorName']: logger.debug( 'Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorname, bookname, match['AuthorName'], match['BookName'])) else: logger.debug( 'Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorname, bookname)) check_status = True duplicates += 1 rejected = True if check_status or not rejected: if book_status != "Ignored" and not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": book_status, "BookAdded": today() } resultcount += 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + bookname + " " + bookdate) updated = False if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug( u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) updated = True elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg, refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) updated = True else: logger.debug( 'Failed to cache image for %s' % bookimg) seriesdict = {} if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug( u'Updated series: %s [%s]' % (bookid, seriesdict)) updated = True else: # librarything doesn't have series info. Any in the title? if series: seriesdict = { cleanName(unaccented(series)): seriesNum } setSeries(seriesdict, bookid) new_status = setStatus(bookid, seriesdict, bookstatus) if not new_status == book_status: book_status = new_status updated = True worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug( "[%s] Added book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) added_count += 1 elif updated: logger.debug( "[%s] Updated book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) updated_count += 1 else: book_ignore_count += 1 except KeyError: pass deleteEmptySeries() logger.debug( '[%s] The Google Books API was hit %s time%s to populate book list' % (authorname, api_hits, plural(api_hits))) lastbook = myDB.match( 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID="%s" \ AND Status != "Ignored" order by BookDate DESC' % authorid) if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored))) logger.debug( "Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount))) myDB.action( 'insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' % (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info( "[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info( "[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
def ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode): params = None if searchType == "book": authorname = book['authorName'] while authorname[1] in '. ': # strip any leading initials authorname = authorname[2:].strip() # and leading whitespace # middle initials can't have a dot authorname = authorname.replace('. ', ' ') authorname = cleanName(authorname) bookname = cleanName(book['bookName']) if bookname == authorname and book['bookSub']: # books like "Spike Milligan: Man of Letters" # where we split the title/subtitle on ':' bookname = cleanName(book['bookSub']) if bookname.startswith(authorname) and len(bookname) > len(authorname): # books like "Spike Milligan In his own words" # where we don't want to look for "Spike Milligan Spike Milligan In his own words" bookname = bookname[len(authorname) + 1:] if provider['BOOKSEARCH'] and provider[ 'BOOKCAT']: # if specific booksearch, use it params = { "t": provider['BOOKSEARCH'], "apikey": api_key, "title": bookname, "author": authorname, "cat": provider['BOOKCAT'] } elif provider['GENERALSEARCH'] and provider[ 'BOOKCAT']: # if not, try general search params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "q": authorname + ' ' + bookname, "cat": provider['BOOKCAT'] } elif searchType == "shortbook": authorname = book['authorName'] while authorname[1] in '. ': # strip any leading initials authorname = authorname[2:].strip() # and leading whitespace # middle initials can't have a dot authorname = authorname.replace('. ', ' ') authorname = cleanName(authorname) bookname = cleanName(book['bookName']) if bookname == authorname and book['bookSub']: # books like "Spike Milligan: Man of Letters" # where we split the title/subtitle on ':' bookname = cleanName(book['bookSub']) if bookname.startswith(authorname) and len(bookname) > len(authorname): # books like "Spike Milligan in his own words" # where we don't want to look for "Spike Milligan Spike Milligan in his own words" bookname = bookname[len(authorname) + 1:] if '(' in bookname: bookname = bookname.split('(')[0].strip() if provider['BOOKSEARCH'] and provider[ 'BOOKCAT']: # if specific booksearch, use it params = { "t": provider['BOOKSEARCH'], "apikey": api_key, "title": bookname, "author": authorname, "cat": provider['BOOKCAT'] } elif provider['GENERALSEARCH'] and provider['BOOKCAT']: params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "q": authorname + ' ' + bookname, "cat": provider['BOOKCAT'] } elif searchType == "mag": if provider['MAGSEARCH'] and provider[ 'MAGCAT']: # if specific magsearch, use it params = { "t": provider['MAGSEARCH'], "apikey": api_key, "cat": provider['MAGCAT'], "q": book['searchterm'], "extended": provider['EXTENDED'], } elif provider['GENERALSEARCH'] and provider['MAGCAT']: params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "cat": provider['MAGCAT'], "q": book['searchterm'], "extended": provider['EXTENDED'], } else: if provider['GENERALSEARCH']: params = { "t": provider['GENERALSEARCH'], "apikey": api_key, # this is a general search "q": book['searchterm'], "extended": provider['EXTENDED'], } if params: logger.debug('[NewzNabPlus] - %s Search parameters set to %s' % (searchMode, str(params))) else: logger.debug('[NewzNabPlus] - %s No matching search parameters' % searchMode) return params
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", entrystatus='Active', refresh=False): # noinspection PyBroadException try: logger.debug('[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname)) api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 startindex = 0 resultcount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request(URL, useCache=not refresh) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break if number_results == 0: logger.warn('Found no results for %s' % authorname) break else: logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname)) startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) # skip if no author, no author is no book. if not book['author']: logger.debug('Skipped a result without authorfield.') continue isbnhead = "" if len(book['isbn']) == 10: isbnhead = book['isbn'][0:3] elif len(book['isbn']) == 13: isbnhead = book['isbn'][3:6] booklang = book['lang'] # do we care about language? if "All" not in valid_langs: if book['isbn']: # seems google lies to us, sometimes tells us books are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = False lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,)) if lang: booklang = lang['lang'] cache_hits += 1 logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead)) match = True if not match: # no match in cache, try lookup dict if isbnhead: if len(book['isbn']) == 13 and book['isbn'].startswith('979'): for lang in lazylibrarian.isbn_979_dict: if isbnhead.startswith(lang): booklang = lazylibrarian.isbn_979_dict[lang] logger.debug("ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break elif (len(book['isbn']) == 10) or \ (len(book['isbn']) == 13 and book['isbn'].startswith('978')): for lang in lazylibrarian.isbn_978_dict: if isbnhead.startswith(lang): booklang = lazylibrarian.isbn_978_dict[lang] logger.debug("ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break if match: myDB.action('insert into languages values (?, ?)', (isbnhead, booklang)) if not match: booklang = thingLang(book['isbn']) lt_lang_hits += 1 if booklang: match = True myDB.action('insert into languages values (?, ?)', (isbnhead, booklang)) if match: # We found a better language match if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]: # these are all english, may need to expand this list logger.debug("%s Google thinks [%s], we think [%s]" % (book['name'], googlelang, booklang)) gb_lang_change += 1 else: # No match anywhere, accept google language booklang = googlelang # skip if language is in ignore list if booklang not in valid_langs: logger.debug('Skipped [%s] with language %s' % (book['name'], booklang)) ignored += 1 continue rejected = 0 check_status = False book_status = bookstatus # new_book status, or new_author status audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] added = today() locked = False existing_book = None bookname = book['name'] bookid = item['id'] if not bookname: logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname)) removedResults += 1 rejected = 1 else: bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip() # GoodReads sometimes has multiple bookids for the same book (same author/title, different # editions) and sometimes uses the same bookid if the book is the same but the title is # slightly different. Not sure if googlebooks does too, but we only want one... cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?' existing_book = myDB.match(cmd, (bookid,)) if existing_book: book_status = existing_book['Status'] audio_status = existing_book['AudioStatus'] locked = existing_book['Manual'] added = existing_book['BookAdded'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: if rejected in [3, 4, 5]: book_status = 'Ignored' audio_status = 'Ignored' else: book_status = bookstatus # new_book status, or new_author status audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] added = today() locked = False if not rejected and re.match('[^\w-]', bookname): # remove books with bad characters in title logger.debug("[%s] removed book for bad characters" % bookname) removedResults += 1 rejected = 2 if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd if book['date'] > today()[:len(book['date'])]: logger.debug('Rejecting %s, future publication date %s' % (bookname, book['date'])) removedResults += 1 rejected = 3 if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date']: logger.debug('Rejecting %s, no publication date' % bookname) removedResults += 1 rejected = 4 if not rejected and lazylibrarian.CONFIG['NO_ISBN']: if not isbnhead: logger.debug('Rejecting %s, no isbn' % bookname) removedResults += 1 rejected = 5 if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE' match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""'))) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug('Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorname, bookname, bookid)) rejected = 6 duplicates += 1 if not rejected: cmd = 'SELECT AuthorName,BookName FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?' match = myDB.match(cmd, (bookid,)) if match: # we have a book with this bookid already if bookname != match['BookName'] or authorname != match['AuthorName']: logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorname, bookname, match['AuthorName'], match['BookName'])) else: logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorname, bookname)) check_status = True duplicates += 1 rejected = 7 if check_status or not rejected or ( lazylibrarian.CONFIG['IMP_IGNORE'] and rejected in [3, 4, 5]): # dates, isbn if not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": booklang, "Status": book_status, "AudioStatus": audio_status, "BookAdded": added } resultcount += 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug("Book found: " + bookname + " " + book['date']) updated = False if 'nocover' in book['img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) updated = True elif book['img'] and book['img'].startswith('http'): link, success, _ = cache_img("book", bookid, book['img'], refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) updated = True else: logger.debug('Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug('Updated series: %s [%s]' % (bookid, serieslist)) updated = True setSeries(serieslist, bookid) new_status = setStatus(bookid, serieslist, bookstatus) if not new_status == book_status: book_status = new_status updated = True worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug("[%s] Added book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) added_count += 1 elif updated: logger.debug("[%s] Updated book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) updated_count += 1 else: book_ignore_count += 1 except KeyError: pass deleteEmptySeries() logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' % (authorname, api_hits, plural(api_hits))) cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?' cmd += ' AND Status != "Ignored" order by BookDate DESC' lastbook = myDB.match(cmd, (authorid,)) if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": entrystatus, "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored))) logger.debug("Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount))) myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info("[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
def find_book(self, bookid=None, bookstatus="None"): myDB = database.DBConnection() if not lazylibrarian.CONFIG['GB_API']: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API'] jsonresults, in_cache = gb_json_request(URL) if jsonresults is None: logger.debug('No results found for %s' % bookid) return if not bookstatus: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] book = bookdict(jsonresults) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace if not book['author']: logger.debug('Book %s does not contain author field, skipping' % bookname) return # warn if language is in ignore list, but user said they wanted this book valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if book['lang'] not in valid_langs and 'All' not in valid_langs: logger.debug('Book %s googlebooks language does not match preference, %s' % (bookname, book['lang'])) if lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date'] or book['date'] == '0000': logger.warn('Book %s Publication date does not match preference, %s' % (bookname, book['date'])) if lazylibrarian.CONFIG['NO_FUTURE']: if book['date'] > today()[:4]: logger.warn('Book %s Future publication date does not match preference, %s' % (bookname, book['date'])) authorname = book['author'] GR = GoodReads(authorname) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?', (AuthorID,)) if not match: match = myDB.match('SELECT AuthorID from authors WHERE AuthorName=?', (author['authorname'],)) if match: logger.debug('%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match['AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author with newauthor status # User hit "add book" button from a search or a wishlist import newauthor_status = 'Active' if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in ['Skipped', 'Ignored']: newauthor_status = 'Paused' controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": newauthor_status } authorname = author['authorname'] myDB.upsert("authors", newValueDict, controlValueDict) if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']: self.get_author_books(AuthorID, entrystatus=lazylibrarian.CONFIG['NEWAUTHOR_STATUS']) else: logger.warn("No AuthorID for %s, unable to add book %s" % (book['author'], bookname)) return controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": book['lang'], "Status": bookstatus, "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'], "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s by %s added to the books database" % (bookname, authorname)) if 'nocover' in book['img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif book['img'] and book['img'].startswith('http'): link, success, _ = cache_img("book", bookid, book['img']) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug('Updated series: %s [%s]' % (bookid, serieslist)) setSeries(serieslist, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def getSeriesAuthors(seriesid): """ Get a list of authors contributing to a series and import those authors (and their books) into the database Return how many authors you added """ myDB = database.DBConnection() result = myDB.match("select count(*) as counter from authors") start = int(result['counter']) result = myDB.match('select SeriesName from series where SeriesID=?', (seriesid,)) seriesname = result['SeriesName'] members = getSeriesMembers(seriesid) dic = {u'\u2018': "", u'\u2019': "", u'\u201c': '', u'\u201d': '', "'": "", '"': ''} if members: myDB = database.DBConnection() for member in members: # order = member[0] bookname = member[1] authorname = member[2] # workid = member[3] authorid = member[4] bookname = replace_all(bookname, dic) if not authorid: # goodreads gives us all the info we need, librarything/google doesn't base_url = 'https://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} searchname = bookname + ' ' + authorname searchname = cleanName(unaccented(searchname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) try: rootxml, in_cache = gr_xml_request(set_url) if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: try: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) except (KeyError, AttributeError): booktitle = "" book_fuzz = fuzz.token_set_ratio(booktitle, bookname) if book_fuzz >= 98: try: author = item.find('./best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find('./best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug("Author Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: # try again with title only searchname = cleanName(unaccented(bookname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) rootxml, in_cache = gr_xml_request(set_url) if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) book_fuzz = fuzz.token_set_ratio(booktitle, bookname) if book_fuzz >= 98: try: author = item.find('./best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find('./best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug("Title Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname)) except Exception as e: logger.error("Error finding goodreads results: %s %s" % (type(e).__name__, str(e))) if authorid: lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid) result = myDB.match("select count(*) as counter from authors") finish = int(result['counter']) newauth = finish - start logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname)) return newauth
def TORDownloadMethod(bookid=None, tor_title=None, tor_url=None, library='eBook'): myDB = database.DBConnection() downloadID = False Source = '' full_url = tor_url # keep the url as stored in "wanted" table if tor_url and tor_url.startswith('magnet:?'): torrent = tor_url # allow magnet link to write to blackhole and hash to utorrent/rtorrent elif 'magnet:?' in tor_url: # discard any other parameters and just use the magnet link torrent = 'magnet:?' + tor_url.split('magnet:?')[1] else: # h = HTMLParser() # tor_url = h.unescape(tor_url) # HTMLParser is probably overkill, we only seem to get & # tor_url = tor_url.replace('&', '&') if '&file=' in tor_url: # torznab results need to be re-encoded # had a problem with torznab utf-8 encoded strings not matching # our utf-8 strings because of long/short form differences url, value = tor_url.split('&file=', 1) value = makeUnicode(value) # ensure unicode value = unicodedata.normalize('NFC', value) # normalize to short form value = value.encode('unicode-escape') # then escape the result value = makeUnicode(value) # ensure unicode value = value.replace(' ', '%20') # and encode any spaces tor_url = url + '&file=' + value # strip url back to the .torrent as some sites add extra parameters if not tor_url.endswith('.torrent'): if '.torrent' in tor_url: tor_url = tor_url.split('.torrent')[0] + '.torrent' headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT} proxies = proxyList() try: r = requests.get(tor_url, headers=headers, timeout=90, proxies=proxies) torrent = r.content except requests.exceptions.Timeout: logger.warn('Timeout fetching file from url: %s' % tor_url) return False except Exception as e: # some jackett providers redirect internally using http 301 to a magnet link # which requests can't handle, so throws an exception if "magnet:?" in str(e): torrent = 'magnet:?' + str(e).split('magnet:?')[1]. strip("'") else: if hasattr(e, 'reason'): logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, e.reason)) else: logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, str(e))) return False if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']: Source = "BLACKHOLE" logger.debug("Sending %s to blackhole" % tor_title) tor_name = cleanName(tor_title).replace(' ', '_') if tor_url and tor_url.startswith('magnet'): if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']: hashid = CalcTorrentHash(tor_url) tor_name = 'meta-' + hashid + '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) result = magnet2torrent(tor_url, tor_path) if result is not False: logger.debug('Magnet file saved as: %s' % tor_path) downloadID = Source else: tor_name += '.magnet' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) msg = '' try: msg = 'Opening ' with open(tor_path, 'wb') as torrent_file: msg += 'Writing ' if isinstance(torrent, text_type): torrent = torrent.encode('iso-8859-1') torrent_file.write(torrent) msg += 'SettingPerm ' setperm(tor_path) msg += 'Saved ' logger.debug('Magnet file saved: %s' % tor_path) downloadID = Source except Exception as e: logger.warn("Failed to write magnet to file: %s %s" % (type(e).__name__, str(e))) logger.debug("Progress: %s" % msg) logger.debug("Filename [%s]" % (repr(tor_path))) return False else: tor_name += '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) msg = '' try: msg = 'Opening ' with open(tor_path, 'wb') as torrent_file: msg += 'Writing ' if isinstance(torrent, text_type): torrent = torrent.encode('iso-8859-1') torrent_file.write(torrent) msg += 'SettingPerm ' setperm(tor_path) msg += 'Saved ' logger.debug('Torrent file saved: %s' % tor_name) downloadID = Source except Exception as e: logger.warn("Failed to write torrent to file: %s %s" % (type(e).__name__, str(e))) logger.debug("Progress: %s" % msg) logger.debug("Filename [%s]" % (repr(tor_path))) return False hashid = CalcTorrentHash(torrent) if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG['UTORRENT_HOST']: logger.debug("Sending %s to Utorrent" % tor_title) Source = "UTORRENT" downloadID = utorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = utorrent.nameTorrent(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG['RTORRENT_HOST']: logger.debug("Sending %s to rTorrent" % tor_title) Source = "RTORRENT" downloadID = rtorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = rtorrent.getName(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG['QBITTORRENT_HOST']: logger.debug("Sending %s to qbittorrent" % tor_title) Source = "QBITTORRENT" if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'): status = qbittorrent.addTorrent(torrent, hashid) elif isinstance(torrent, text_type) and torrent.startswith('magnet'): status = qbittorrent.addTorrent(torrent, hashid) else: status = qbittorrent.addTorrent(tor_url, hashid) # returns True or False if status: downloadID = hashid tor_title = qbittorrent.getName(hashid) if lazylibrarian.CONFIG['TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG['TRANSMISSION_HOST']: logger.debug("Sending %s to Transmission" % tor_title) if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug("TORRENT %s [%s] [%s]" % (len(torrent), torrent[:20], torrent[-20:])) Source = "TRANSMISSION" if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'): downloadID = transmission.addTorrent(torrent) # returns id or False elif isinstance(torrent, text_type) and torrent.startswith('magnet'): downloadID = transmission.addTorrent(torrent) elif torrent: downloadID = transmission.addTorrent(None, metainfo=b64encode(torrent)) else: downloadID = transmission.addTorrent(tor_url) # returns id or False if downloadID: # transmission returns it's own int, but we store hashid instead downloadID = hashid tor_title = transmission.getTorrentFolder(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \ lazylibrarian.CONFIG['SYNOLOGY_HOST']: logger.debug("Sending %s to Synology" % tor_title) Source = "SYNOLOGY_TOR" downloadID = synology.addTorrent(tor_url) # returns id or False if downloadID: tor_title = synology.getName(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG['DELUGE_HOST']: logger.debug("Sending %s to Deluge" % tor_title) if not lazylibrarian.CONFIG['DELUGE_USER']: # no username, talk to the webui Source = "DELUGEWEBUI" if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'): downloadID = deluge.addTorrent(torrent) elif isinstance(torrent, text_type) and torrent.startswith('magnet'): downloadID = deluge.addTorrent(torrent) elif torrent: downloadID = deluge.addTorrent(tor_title, data=b64encode(torrent)) else: downloadID = deluge.addTorrent(tor_url) # can be link or magnet, returns hash or False if downloadID: tor_title = deluge.getTorrentFolder(downloadID) else: # have username, talk to the daemon Source = "DELUGERPC" client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'], int(lazylibrarian.CONFIG['DELUGE_PORT']), lazylibrarian.CONFIG['DELUGE_USER'], lazylibrarian.CONFIG['DELUGE_PASS']) try: client.connect() args = {"name": tor_title} if tor_url.startswith('magnet'): downloadID = client.call('core.add_torrent_magnet', tor_url, args) elif isinstance(torrent, binary_type) and torrent.startswith(b'magnet'): downloadID = client.call('core.add_torrent_magnet', torrent, args) elif isinstance(torrent, text_type) and torrent.startswith('magnet'): downloadID = client.call('core.add_torrent_magnet', torrent, args) elif torrent: downloadID = client.call('core.add_torrent_file', tor_title, b64encode(torrent), args) else: downloadID = client.call('core.add_torrent_url', tor_url, args) if downloadID: if lazylibrarian.CONFIG['DELUGE_LABEL']: _ = client.call('label.set_torrent', downloadID, lazylibrarian.CONFIG['DELUGE_LABEL'].lower()) result = client.call('core.get_torrent_status', downloadID, {}) # for item in result: # logger.debug ('Deluge RPC result %s: %s' % (item, result[item])) if 'name' in result: tor_title = result['name'] except Exception as e: logger.error('DelugeRPC failed %s %s' % (type(e).__name__, str(e))) return False if not Source: logger.warn('No torrent download method is enabled, check config.') return False if downloadID: if tor_title: if downloadID.upper() in tor_title.upper(): logger.warn('%s: name contains hash, probably unresolved magnet' % Source) else: tor_title = unaccented_str(tor_title) # need to check against reject words list again as the name may have changed # library = magazine eBook AudioBook to determine which reject list # but we can't easily do the per-magazine rejects if library == 'magazine': reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS']) elif library == 'eBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS']) elif library == 'AudioBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO']) else: logger.debug("Invalid library [%s] in TORDownloadMethod" % library) reject_list = [] rejected = False lower_title = tor_title.lower() for word in reject_list: if word in lower_title: rejected = True logger.debug("Rejecting torrent name %s, contains %s" % (tor_title, word)) break if rejected: myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url,)) delete_task(Source, downloadID, True) return False else: logger.debug('%s setting torrent name to [%s]' % (Source, tor_title)) myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?', (tor_title, full_url)) if library == 'eBook': myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid,)) elif library == 'AudioBook': myDB.action('UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid,)) myDB.action('UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?', (Source, downloadID, full_url)) return True logger.error('Failed to download torrent from %s, %s' % (Source, tor_url)) myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url,)) return False
def TORDownloadMethod(bookid=None, tor_prov=None, tor_title=None, tor_url=None): myDB = database.DBConnection() download = False full_url = tor_url # keep the url as stored in "wanted" table if (lazylibrarian.TOR_DOWNLOADER_DELUGE or lazylibrarian.TOR_DOWNLOADER_UTORRENT or lazylibrarian.TOR_DOWNLOADER_QBITTORRENT or lazylibrarian.TOR_DOWNLOADER_BLACKHOLE or lazylibrarian.TOR_DOWNLOADER_TRANSMISSION): if tor_url and tor_url.startswith('magnet'): torrent = tor_url # allow magnet link to write to blackhole and hash to utorrent else: if '&file=' in tor_url: # torznab results need to be re-encoded # had a problem with torznab utf-8 encoded strings not matching # our utf-8 strings because of long/short form differences url = tor_url.split('&file=')[0] value = tor_url.split('&file=')[1] if isinstance(value, str): value = value.decode('utf-8') # make unicode value = unicodedata.normalize('NFC', value) # normalize to short form value = value.encode( 'unicode-escape') # then escape the result value = value.replace(' ', '%20') # and encode any spaces tor_url = url + '&file=' + value # strip url back to the .torrent as some sites add parameters if not tor_url.endswith('.torrent'): if '.torrent' in tor_url: tor_url = tor_url.split('.torrent')[0] + '.torrent' request = urllib2.Request(ur'%s' % tor_url) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('Accept-encoding', 'gzip') request.add_header('User-Agent', USER_AGENT) # PAB removed this, KAT serves us html instead of torrent if this header is sent # if tor_prov == 'KAT': # host = lazylibrarian.KAT_HOST # if not str(host)[:4] == "http": # host = 'http://' + host # request.add_header('Referer', host) try: response = urllib2.urlopen(request, timeout=90) if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read()) f = gzip.GzipFile(fileobj=buf) torrent = f.read() else: torrent = response.read() except (urllib2.URLError, socket.timeout) as e: logger.warn('Error fetching torrent from url: %s, %s' % (tor_url, e)) return False if lazylibrarian.TOR_DOWNLOADER_BLACKHOLE: tor_title = cleanName(tor_title) logger.debug("Sending %s to blackhole" % tor_title) tor_name = str.replace(str(tor_title), ' ', '_') if tor_url and tor_url.startswith('magnet'): tor_name = tor_name + '.magnet' else: tor_name = tor_name + '.torrent' tor_path = os.path.join(lazylibrarian.TORRENT_DIR, tor_name) with open(tor_path, 'wb') as torrent_file: torrent_file.write(torrent) logger.debug('Torrent file saved: %s' % tor_title) download = True if (lazylibrarian.TOR_DOWNLOADER_UTORRENT and lazylibrarian.UTORRENT_HOST): logger.debug("Sending %s to Utorrent" % tor_title) hash = CalcTorrentHash(torrent) download = utorrent.addTorrent(tor_url, hash) if (lazylibrarian.TOR_DOWNLOADER_QBITTORRENT and lazylibrarian.QBITTORRENT_HOST): logger.debug("Sending %s to qbittorrent" % tor_title) download = qbittorrent.addTorrent(tor_url) if (lazylibrarian.TOR_DOWNLOADER_TRANSMISSION and lazylibrarian.TRANSMISSION_HOST): logger.debug("Sending %s to Transmission" % tor_title) download = transmission.addTorrent(tor_url) if (lazylibrarian.TOR_DOWNLOADER_DELUGE and lazylibrarian.DELUGE_HOST): logger.debug("Sending %s to Deluge" % tor_title) if not lazylibrarian.DELUGE_USER: # no username, talk to the webui download = deluge.addTorrent(tor_url) else: # have username, talk to the daemon client = DelugeRPCClient(lazylibrarian.DELUGE_HOST, int(lazylibrarian.DELUGE_PORT), lazylibrarian.DELUGE_USER, lazylibrarian.DELUGE_PASS) client.connect() args = {"name": tor_title} download = client.call('core.add_torrent_url', tor_url, args) logger.debug('Deluge torrent_id: %s' % download) if download and lazylibrarian.DELUGE_LABEL: labelled = client.call('label.set_torrent', download, lazylibrarian.DELUGE_LABEL) logger.debug('Deluge label returned: %s' % labelled) else: logger.warn('No torrent download method is enabled, check config.') return False if download: logger.debug(u'Torrent id %s has been downloaded from %s' % (download, tor_url)) myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid) myDB.action('UPDATE wanted SET status = "Snatched" WHERE NZBurl="%s"' % full_url) return True else: logger.error(u'Failed to download torrent @ <a href="%s">%s</a>' % (full_url, tor_url)) myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % full_url) return False
def find_book(bookid=None, queue=None): myDB = database.DBConnection() if not lazylibrarian.CONFIG['GB_API']: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API'] jsonresults, in_cache = get_json_request(URL) if not jsonresults: logger.debug('No results found for %s' % bookid) return bookname = jsonresults['volumeInfo']['title'] dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug('Book %s does not contain author field, skipping' % bookname) return try: # warn if language is in ignore list, but user said they wanted this book booklang = jsonresults['volumeInfo']['language'] valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if booklang not in valid_langs and 'All' not in valid_langs: logger.debug( 'Book %s googlebooks language does not match preference, %s' % (bookname, booklang)) except KeyError: logger.debug('Book does not have language field') booklang = "Unknown" try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = "" series = "" seriesNum = "" try: booksub = jsonresults['volumeInfo']['subtitle'] try: series = booksub.split('(')[1].split(' Series ')[0] except IndexError: series = "" try: seriesNum = booksub.split('(')[1].split(' Series ')[1].split( ')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = "" except KeyError: booksub = "" try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = "" try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = "" try: if jsonresults['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][ 'identifier'] else: bookisbn = "" except KeyError: bookisbn = "" booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) GR = GoodReads(authorname) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID) if not match: match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorName="%s"' % author['authorname']) if match: logger.debug( '%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match[ 'AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author as "ignored" # User hit "add book" button from a search controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Ignored" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn("No AuthorID for %s, unable to add book %s" % (authorname, bookname)) return controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % bookimg) if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict)) else: if series: seriesdict = {cleanName(unaccented(series)): seriesNum} setSeries(seriesdict, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", refresh=False): try: api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) except Exception as e: logger.error("Error fetching author books: %s" % str(e)) return if rootxml is None: logger.debug("Error requesting author books") return if not in_cache: api_hits += 1 resultxml = rootxml.getiterator('book') valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) resultsCount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 if not len(resultxml): logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid)) else: logger.debug("[%s] Now processing books with GoodReads API" % authorname) logger.debug(u"url " + URL) authorNameResult = rootxml.find('./author/name').text # Goodreads sometimes puts extra whitepase in the author names! authorNameResult = ' '.join(authorNameResult.split()) logger.debug(u"GoodReads author name [%s]" % authorNameResult) loopCount = 1 while resultxml: for book in resultxml: total_count += 1 if book.find('publication_year').text is None: pubyear = "0000" else: pubyear = book.find('publication_year').text try: bookimg = book.find('image_url').text if 'nocover' in bookimg: bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' bookLanguage = "Unknown" find_field = "id" isbn = "" isbnhead = "" if "All" not in valid_langs: # do we care about language if book.find('isbn').text: find_field = "isbn" isbn = book.find('isbn').text isbnhead = isbn[0:3] else: if book.find('isbn13').text: find_field = "isbn13" isbn = book.find('isbn13').text isbnhead = isbn[3:6] # Try to use shortcut of ISBN identifier codes described here... # https://en.wikipedia.org/wiki/List_of_ISBN_identifier_groups if isbnhead: if find_field == "isbn13" and isbn.startswith('979'): for item in lazylibrarian.isbn_979_dict: if isbnhead.startswith(item): bookLanguage = lazylibrarian.isbn_979_dict[item] break if bookLanguage != "Unknown": logger.debug("ISBN979 returned %s for %s" % (bookLanguage, isbnhead)) elif (find_field == "isbn") or (find_field == "isbn13" and isbn.startswith('978')): for item in lazylibrarian.isbn_978_dict: if isbnhead.startswith(item): bookLanguage = lazylibrarian.isbn_978_dict[item] break if bookLanguage != "Unknown": logger.debug("ISBN978 returned %s for %s" % (bookLanguage, isbnhead)) if bookLanguage == "Unknown" and isbnhead: # Nothing in the isbn dictionary, try any cached results match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead) if match: bookLanguage = match['lang'] cache_hits += 1 logger.debug("Found cached language [%s] for %s [%s]" % (bookLanguage, find_field, isbnhead)) else: # no match in cache, try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn try: librarything_wait() resp = urllib2.urlopen(BOOK_URL, timeout=30).read() lt_lang_hits += 1 logger.debug("LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if 'invalid' in resp or 'Unknown' in resp: bookLanguage = "Unknown" else: bookLanguage = resp # found a language code myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug(u"LT language %s: %s" % (isbnhead, bookLanguage)) except Exception as e: logger.error("Error finding LT language result for [%s], %s" % (isbn, str(e))) if bookLanguage == "Unknown": # still no earlier match, we'll have to search the goodreads api try: if book.find(find_field).text: BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \ book.find(find_field).text + '&' + urllib.urlencode(self.params) logger.debug(u"Book URL: " + BOOK_URL) time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) bookLanguage = "" try: BOOK_rootxml, in_cache = get_xml_request(BOOK_URL) if BOOK_rootxml is None: logger.debug('Error requesting book language code') else: if not in_cache: # only update last_goodreads if the result wasn't found in the cache lazylibrarian.LAST_GOODREADS = time_now try: bookLanguage = BOOK_rootxml.find('./book/language_code').text except Exception as e: logger.debug("Error finding language_code in book xml: %s" % str(e)) except Exception as e: logger.debug("Error getting book xml: %s" % str(e)) if not in_cache: gr_lang_hits += 1 if not bookLanguage: bookLanguage = "Unknown" # At this point, give up? # WhatWork on author/title doesn't give us a language. # It might give us the "original language" of the book (but not always) # and our copy might not be in the original language anyway # eg "The Girl With the Dragon Tattoo" original language Swedish # If we have an isbn, try WhatISBN to get alternatives # in case any of them give us a language, but it seems if thinglang doesn't # have a language for the first isbn code, it doesn't for any of the # alternatives either # Goodreads search results don't include the language. Although sometimes # it's in the html page, it's not in the xml results if isbnhead != "": # if GR didn't give an isbn we can't cache it, just use language for this book myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug("GoodReads reports language [%s] for %s" % (bookLanguage, isbnhead)) else: not_cached += 1 logger.debug(u"GR language: " + bookLanguage) else: logger.debug("No %s provided for [%s]" % (find_field, book.find('title').text)) # continue except Exception as e: logger.debug(u"Goodreads language search failed: %s" % str(e)) if bookLanguage not in valid_langs: logger.debug('Skipped %s with language %s' % (book.find('title').text, bookLanguage)) ignored += 1 continue bookname = book.find('title').text bookid = book.find('id').text bookdesc = book.find('description').text bookisbn = book.find('isbn').text bookpub = book.find('publisher').text booklink = book.find('link').text bookrate = float(book.find('average_rating').text) bookpages = book.find('num_pages').text bookname = unaccented(bookname) bookname, booksub = split_title(authorNameResult, bookname) dic = {':': '.', '"': ''} # do we need to strip apostrophes , '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booksub = replace_all(booksub, dic) booksub = booksub.strip() # strip whitespace if booksub: series, seriesNum = bookSeries(booksub) else: series, seriesNum = bookSeries(bookname) rejected = False check_status = False if re.match('[^\w-]', bookname): # reject books with bad characters in title logger.debug(u"removed result [" + bookname + "] for bad characters") removedResults += 1 rejected = True if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: if pubyear > today()[:4]: logger.debug('Rejecting %s, future publication date %s' % (bookname, pubyear)) removedResults += 1 rejected = True if not rejected and not bookname: logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorNameResult)) removedResults += 1 rejected = True if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName = "%s" COLLATE NOCASE and AuthorName = "%s" COLLATE NOCASE' % \ (bookname, authorNameResult.replace('"', '""')) match = myDB.match(cmd) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug('Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorNameResult, bookname, bookid)) duplicates += 1 rejected = True if not rejected: cmd = 'SELECT AuthorName,BookName FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=%s' % bookid match = myDB.match(cmd) if match: # we have a book with this bookid already if bookname != match['BookName'] or authorNameResult != match['AuthorName']: logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorNameResult, bookname, match['AuthorName'], match['BookName'])) else: logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorNameResult, bookname)) check_status = True duplicates += 1 rejected = True if check_status or not rejected: existing_book = myDB.match('SELECT Status,Manual FROM books WHERE BookID = "%s"' % bookid) if existing_book: book_status = existing_book['Status'] locked = existing_book['Manual'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: book_status = bookstatus # new_book status, or new_author status locked = False # Is the book already in the database? # Leave alone if locked or status "ignore" if not locked and book_status != "Ignored": controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": "", "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": pubyear, "BookLang": bookLanguage, "Status": book_status, "BookAdded": today() } resultsCount += 1 updated = False myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + book.find('title').text + " " + pubyear) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) updated = True elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg, refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) updated = True else: logger.debug('Failed to cache image for %s' % bookimg) seriesdict = {} if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict)) updated = True else: if series: seriesdict = {cleanName(unaccented(series)): seriesNum} setSeries(seriesdict, bookid) new_status = setStatus(bookid, seriesdict, bookstatus) if not new_status == book_status: book_status = new_status updated = True worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug(u"[%s] Added book: %s [%s] status %s" % (authorname, bookname, bookLanguage, book_status)) added_count += 1 elif updated: logger.debug(u"[%s] Updated book: %s [%s] status %s" % (authorname, bookname, bookLanguage, book_status)) updated_count += 1 else: book_ignore_count += 1 loopCount += 1 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \ urllib.urlencode(self.params) + '&page=' + str(loopCount) resultxml = None try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) if rootxml is None: logger.debug('Error requesting next page of results') else: resultxml = rootxml.getiterator('book') if not in_cache: api_hits += 1 except Exception as e: resultxml = None logger.error("Error finding next page of results: %s" % str(e)) if resultxml: if all(False for _ in resultxml): # returns True if iterator is empty resultxml = None deleteEmptySeries() lastbook = myDB.match('SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID="%s" \ AND Status != "Ignored" order by BookDate DESC' % authorid) if lastbook: lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) # This is here because GoodReads sometimes has several entries with the same BookID! modified_count = added_count + updated_count logger.debug("Found %s result%s" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored))) logger.debug( "Removed %s bad character or no-name result%s" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s" % (modified_count, plural(modified_count))) myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' % (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info("[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GR.get_author_books: %s' % traceback.format_exc())
def dbupgrade(db_current_version): try: myDB = database.DBConnection() db_version = 0 result = myDB.match('PRAGMA user_version') if result and result[0]: value = str(result[0]) if value.isdigit(): db_version = int(value) check = myDB.match('PRAGMA integrity_check') if check and check[0]: result = check[0] if result == 'ok': logger.debug('Database integrity check: %s' % result) else: logger.error('Database integrity check: %s' % result) # should probably abort now if db_version < db_current_version: myDB = database.DBConnection() if db_version < 1: if not has_column(myDB, "authors", "AuthorID"): # it's a new database. Create tables but no need for any upgrading db_version = db_current_version lazylibrarian.UPDATE_MSG = 'Creating new database, version %s' % db_version else: lazylibrarian.UPDATE_MSG = 'Updating database to version %s, current version is %s' % ( db_current_version, db_version) logger.info(lazylibrarian.UPDATE_MSG) myDB.action( 'CREATE TABLE IF NOT EXISTS authors (AuthorID TEXT UNIQUE, AuthorName TEXT UNIQUE, \ AuthorImg TEXT, AuthorLink TEXT, DateAdded TEXT, Status TEXT, LastBook TEXT, LastBookImg TEXT, \ LastLink Text, LastDate TEXT, HaveBooks INTEGER, TotalBooks INTEGER, AuthorBorn TEXT, \ AuthorDeath TEXT, UnignoredBooks INTEGER, Manual TEXT)') myDB.action('CREATE TABLE IF NOT EXISTS books (AuthorID TEXT, \ BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \ BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, BookFile TEXT, \ BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, WorkPage TEXT, Manual TEXT)' ) myDB.action( 'CREATE TABLE IF NOT EXISTS wanted (BookID TEXT, NZBurl TEXT, NZBtitle TEXT, NZBdate TEXT, \ NZBprov TEXT, Status TEXT, NZBsize TEXT, AuxInfo TEXT, NZBmode TEXT, Source TEXT, DownloadID TEXT)' ) myDB.action( 'CREATE TABLE IF NOT EXISTS pastissues AS SELECT * FROM wanted WHERE 0' ) # same columns myDB.action( 'CREATE TABLE IF NOT EXISTS magazines (Title TEXT UNIQUE, Regex TEXT, Status TEXT, \ MagazineAdded TEXT, LastAcquired TEXT, IssueDate TEXT, IssueStatus TEXT, Reject TEXT, \ LatestCover TEXT)') myDB.action( 'CREATE TABLE IF NOT EXISTS languages (isbn TEXT, lang TEXT)' ) myDB.action( 'CREATE TABLE IF NOT EXISTS issues (Title TEXT, IssueID TEXT UNIQUE, IssueAcquired TEXT, \ IssueDate TEXT, IssueFile TEXT)') myDB.action( 'CREATE TABLE IF NOT EXISTS stats (authorname text, GR_book_hits int, GR_lang_hits int, \ LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int, \ duplicates int)') myDB.action( 'CREATE TABLE IF NOT EXISTS series (SeriesID INTEGER PRIMARY KEY, SeriesName TEXT, \ Status TEXT)') myDB.action( 'CREATE TABLE IF NOT EXISTS member (SeriesID INTEGER, BookID TEXT, SeriesNum TEXT)' ) myDB.action( 'CREATE TABLE IF NOT EXISTS seriesauthors (SeriesID INTEGER, AuthorID TEXT)' ) # These are the incremental changes before database versioning was introduced. # Old database tables might already have these incorporated depending on version, so we need to check... if db_version < 1: if not has_column(myDB, "books", "BookSub"): lazylibrarian.UPDATE_MSG = 'Updating database to hold book subtitles.' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN BookSub TEXT') if not has_column(myDB, "books", "BookSub"): lazylibrarian.UPDATE_MSG = 'Updating database to hold book publisher' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN BookPub TEXT') if not has_column(myDB, "books", "BookGenre"): lazylibrarian.UPDATE_MSG = 'Updating database to hold bookgenre' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN BookGenre TEXT') if not has_column(myDB, "books", "BookFile"): lazylibrarian.UPDATE_MSG = 'Updating database to hold book filename' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN BookFile TEXT') if not has_column(myDB, "wanted", "AuxInfo"): lazylibrarian.UPDATE_MSG = 'Updating database to hold AuxInfo' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE wanted ADD COLUMN AuxInfo TEXT') if not has_column(myDB, "wanted", "NZBsize"): lazylibrarian.UPDATE_MSG = 'Updating database to hold NZBsize' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE wanted ADD COLUMN NZBsize TEXT') if not has_column(myDB, "wanted", "NZBmode"): lazylibrarian.UPDATE_MSG = 'Updating database to hold NZBmode' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE wanted ADD COLUMN NZBmode TEXT') if not has_column(myDB, "authors", "UnignoredBooks"): lazylibrarian.UPDATE_MSG = 'Updating database to hold UnignoredBooks' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'ALTER TABLE authors ADD COLUMN UnignoredBooks INTEGER' ) if not has_column(myDB, "magazines", "IssueStatus"): lazylibrarian.UPDATE_MSG = 'Updating database to hold IssueStatus' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'ALTER TABLE magazines ADD COLUMN IssueStatus TEXT') addedWorkPage = False if not has_column(myDB, "books", "WorkPage"): lazylibrarian.UPDATE_MSG = 'Updating database to hold WorkPage' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN WorkPage TEXT') addedWorkPage = True addedSeries = False if not has_column(myDB, "series", "SeriesID") and not has_column( myDB, "books", "Series"): lazylibrarian.UPDATE_MSG = 'Updating database to hold Series' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN Series TEXT') addedSeries = True # SeriesOrder shouldn't be an integer, some later written books # and novellas logically go inbetween books of the main series, # and their SeriesOrder is not an integer, eg 1.5 # so we need to update SeriesOrder to store as text. # Because sqlite can't drop columns we create a new column SeriesNum, # inherit the old column values, and use SeriesNum instead if not has_column(myDB, "books", "SeriesNum") and has_column( myDB, "books", "SeriesOrder"): # no SeriesNum column, so create one lazylibrarian.UPDATE_MSG = 'Updating books to hold SeriesNum' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN SeriesNum TEXT') myDB.action('UPDATE books SET SeriesNum = SeriesOrder') myDB.action('UPDATE books SET SeriesOrder = Null') addedIssues = False if not has_column(myDB, "issues", "Title"): lazylibrarian.UPDATE_MSG = 'Updating database to hold Issues table' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'CREATE TABLE issues (Title TEXT, IssueID TEXT, IssueAcquired TEXT, IssueDate TEXT, IssueFile TEXT)' ) addedIssues = True if not has_column(myDB, "issues", "IssueID"): lazylibrarian.UPDATE_MSG = 'Updating Issues table to hold IssueID' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE issues ADD COLUMN IssueID TEXT') addedIssues = True myDB.action('DROP TABLE if exists capabilities') if addedIssues: try: magazinescan.magazineScan() except Exception as e: logger.debug("Failed to scan magazines, %s" % str(e)) if addedWorkPage: try: lazylibrarian.UPDATE_MSG = 'Adding WorkPage to existing books' logger.debug(lazylibrarian.UPDATE_MSG) threading.Thread(target=bookwork.setWorkPages, name="ADDWORKPAGE", args=[]).start() except Exception as e: logger.debug("Failed to update WorkPages, %s" % str(e)) if addedSeries: try: books = myDB.select( 'SELECT BookID, BookName FROM books') if books: lazylibrarian.UPDATE_MSG = 'Adding series to existing books' logger.debug(lazylibrarian.UPDATE_MSG) tot = len(books) cnt = 0 for book in books: cnt += 1 lazylibrarian.UPDATE_MSG = 'Adding series to existing books: %s of %s' % ( cnt, tot) series, seriesNum = bookSeries( book["BookName"]) if series: controlValueDict = { "BookID": book["BookID"] } newValueDict = { "series": series, "seriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) except Exception as e: logger.error('Error: ' + str(e)) if db_version < 2: try: results = myDB.select( 'SELECT BookID,NZBsize FROM wanted WHERE NZBsize LIKE "% MB"' ) if results: lazylibrarian.UPDATE_MSG = 'Removing units from wanted table' logger.debug(lazylibrarian.UPDATE_MSG) tot = len(results) cnt = 0 for units in results: cnt += 1 lazylibrarian.UPDATE_MSG = 'Removing units from wanted table: %s of %s' % ( cnt, tot) nzbsize = units["NZBsize"] nzbsize = nzbsize.split(' ')[0] myDB.action( 'UPDATE wanted SET NZBsize = "%s" WHERE BookID = "%s"' % (nzbsize, units["BookID"])) except Exception as e: logger.error('Error: ' + str(e)) if db_version < 3: if has_column(myDB, "books", "SeriesOrder"): lazylibrarian.UPDATE_MSG = 'Removing SeriesOrder from books table' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'CREATE TABLE IF NOT EXISTS temp_books (AuthorID TEXT, AuthorName TEXT, AuthorLink TEXT, \ BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \ BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, \ BookFile TEXT, BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, Series TEXT, \ SeriesNum TEXT, WorkPage TEXT)') myDB.action( 'INSERT INTO temp_books SELECT AuthorID,AuthorName,AuthorLink,BookName,BookSub, \ BookDesc,BookGenre,BookIsbn,BookPub,BookRate,BookImg,BookPages,BookLink,BookID, \ BookFile,BookDate,BookLang,BookAdded,Status,Series,SeriesNum,WorkPage FROM books' ) myDB.action('DROP TABLE books') myDB.action('ALTER TABLE temp_books RENAME TO books') if not has_column(myDB, "pastissues", "BookID"): lazylibrarian.UPDATE_MSG = 'Moving magazine past issues into new table' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'CREATE TABLE pastissues AS SELECT * FROM wanted WHERE Status="Skipped" AND length(AuxInfo) > 0' ) myDB.action( 'DELETE FROM wanted WHERE Status="Skipped" AND length(AuxInfo) > 0' ) if db_version < 4: if not has_column(myDB, "stats", "duplicates"): lazylibrarian.UPDATE_MSG = 'Updating stats table to hold duplicates' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE stats ADD COLUMN duplicates INT') if db_version < 5: issues = myDB.select( 'SELECT IssueID,IssueDate from issues WHERE length(IssueDate) < 4 and length(IssueDate) > 0' ) if issues: lazylibrarian.UPDATE_MSG = 'Updating issues table to hold 4 digit issue numbers' logger.debug(lazylibrarian.UPDATE_MSG) tot = len(issues) cnt = 0 for issue in issues: cnt += 1 lazylibrarian.UPDATE_MSG = 'Updating issues table 4 digits: %s of %s' % ( cnt, tot) issueid = issue['IssueID'] issuedate = str(issue['IssueDate']) issuedate = issuedate.zfill(4) myDB.action( 'UPDATE issues SET IssueDate="%s" WHERE IssueID="%s"' % (issuedate, issueid)) mags = myDB.select( 'SELECT Title,IssueDate from magazines WHERE length(IssueDate) < 4 and length(IssueDate) > 0' ) if mags: lazylibrarian.UPDATE_MSG = 'Updating magazines table to 4 digits' logger.debug(lazylibrarian.UPDATE_MSG) tot = len(mags) cnt = 0 for mag in mags: cnt += 1 lazylibrarian.UPDATE_MSG = 'Updating magazines table to 4 digits: %s of %s' % ( cnt, tot) title = mag['Title'] issuedate = str(mag['IssueDate']) issuedate = issuedate.zfill(4) myDB.action( 'UPDATE magazines SET IssueDate="%s" WHERE Title="%s"' % (issuedate, title)) if db_version < 6: if not has_column(myDB, "books", "Manual"): lazylibrarian.UPDATE_MSG = 'Updating books table to hold Manual setting' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN Manual TEXT') if db_version < 7: if not has_column(myDB, "wanted", "Source"): lazylibrarian.UPDATE_MSG = 'Updating wanted table to hold Source and DownloadID' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE wanted ADD COLUMN Source TEXT') myDB.action( 'ALTER TABLE wanted ADD COLUMN DownloadID TEXT') if db_version < 8: src = os.path.join(lazylibrarian.PROG_DIR, 'data/images/cache/') dst = lazylibrarian.CACHEDIR images = myDB.select( 'SELECT AuthorID, AuthorImg FROM authors WHERE AuthorImg LIKE "images/cache/%"' ) if images: logger.debug('Moving author images to new location') tot = len(images) cnt = 0 for image in images: cnt += 1 lazylibrarian.UPDATE_MSG = "Moving author images to new location: %s of %s" % ( cnt, tot) img = image['AuthorImg'] img = img[7:] myDB.action( 'UPDATE authors SET AuthorImg="%s" WHERE AuthorID="%s"' % (img, image['AuthorID'])) img = img[6:] srcfile = os.path.join(src, img) if os.path.isfile(srcfile): try: shutil.move(os.path.join(src, img), os.path.join(dst, img)) except Exception as e: logger.warn("dbupgrade: %s" % str(e)) logger.debug("Author Image cache updated") images = myDB.select( 'SELECT BookID, BookImg FROM books WHERE BookImg LIKE "images/cache/%"' ) if images: logger.debug('Moving book images to new location') tot = len(images) cnt = 0 for image in images: cnt += 1 lazylibrarian.UPDATE_MSG = "Moving book images to new location: %s of %s" % ( cnt, tot) img = image['BookImg'] img = img[7:] myDB.action( 'UPDATE books SET BookImg="%s" WHERE BookID="%s"' % (img, image['BookID'])) img = img[6:] srcfile = os.path.join(src, img) if os.path.isfile(srcfile): try: shutil.move(srcfile, os.path.join(dst, img)) except Exception as e: logger.warn("dbupgrade: %s" % str(e)) logger.debug("Book Image cache updated") if db_version < 9: if not has_column(myDB, "magazines", "Reject"): # remove frequency column, rename regex to reject, add new regex column for searches lazylibrarian.UPDATE_MSG = 'Updating magazines table' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'CREATE TABLE IF NOT EXISTS temp_table (Title TEXT, Regex TEXT, Status TEXT, \ MagazineAdded TEXT, LastAcquired TEXT, IssueDate TEXT, IssueStatus TEXT, Reject TEXT)' ) myDB.action( 'INSERT INTO temp_table SELECT Title, Regex, Status, MagazineAdded, LastAcquired, \ IssueDate, IssueStatus, Regex FROM magazines') myDB.action('DROP TABLE magazines') myDB.action('ALTER TABLE temp_table RENAME TO magazines') myDB.action('UPDATE magazines SET Regex = Null') if db_version < 10: # make sure columns in pastissues match those in wanted table # needed when upgrading from old 3rd party packages (eg freenas) myDB.action('DROP TABLE pastissues') myDB.action( 'CREATE TABLE pastissues AS SELECT * FROM wanted WHERE 0' ) # same columns, but empty table if db_version < 11: # keep last book image if not has_column(myDB, "authors", "LastBookImg"): lazylibrarian.UPDATE_MSG = 'Updating author table to hold last book image' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'ALTER TABLE authors ADD COLUMN LastBookImg TEXT') books = myDB.select( 'SELECT AuthorID, AuthorName, LastBook from authors') if books: for book in books: lazylibrarian.UPDATE_MSG = 'Updating last book image for %s' % book[ 'AuthorName'] if book['LastBook']: match = myDB.match( 'SELECT BookImg from books WHERE AuthorID="%s" AND BookName="%s"' % (book['AuthorID'], book['LastBook'])) if match: myDB.action( 'UPDATE authors SET LastBookImg="%s" WHERE AuthorID=%s' % (match['BookImg'], book['AuthorID'])) if db_version < 12: # keep last magazine issue image if not has_column(myDB, "Magazines", "LatestCover"): lazylibrarian.UPDATE_MSG = 'Updating magazine table to hold last issue image' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'ALTER TABLE magazines ADD COLUMN LatestCover TEXT') mags = myDB.select( 'SELECT Title, LastAcquired from magazines') if mags: for mag in mags: lazylibrarian.UPDATE_MSG = 'Updating last issue image for %s' % mag[ 'Title'] match = myDB.match( 'SELECT IssueFile from issues WHERE IssueAcquired="%s" AND Title="%s"' % (mag['LastAcquired'], mag['Title'])) if match: coverfile = os.path.splitext( match['IssueFile'])[0] + '.jpg' if os.path.exists(coverfile): myDB.action( 'UPDATE magazines SET LatestCover="%s" WHERE Title="%s"' % (coverfile, mag['Title'])) if db_version < 13: if not has_column(myDB, "authors", "Manual"): lazylibrarian.UPDATE_MSG = 'Updating authors table to hold Manual setting' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE authors ADD COLUMN Manual TEXT') if db_version < 14: src = lazylibrarian.CACHEDIR try: os.mkdir(os.path.join(src, 'author')) except OSError as e: if e.errno is not 17: # already exists is ok logger.debug('mkdir author cache reports: %s' % str(e)) query = 'SELECT AuthorName, AuthorID, AuthorImg FROM authors ' query += 'WHERE AuthorImg LIKE "cache/%" ' query += 'AND AuthorImg NOT LIKE "cache/author/%"' images = myDB.select(query) if images: tot = len(images) logger.debug('Moving %s author images to new location' % tot) cnt = 0 for image in images: cnt += 1 lazylibrarian.UPDATE_MSG = "Moving author images to new location: %s of %s" % ( cnt, tot) try: img = image['AuthorImg'] img = img.rsplit('/', 1)[1] srcfile = os.path.join(src, img) if os.path.isfile(srcfile): try: shutil.move( srcfile, os.path.join(src, "author", img)) myDB.action( 'UPDATE authors SET AuthorImg="cache/author/%s" WHERE AuthorID="%s"' % (img, image['AuthorID'])) except Exception as e: logger.warn("dbupgrade: %s" % str(e)) except Exception as e: logger.warn( 'Failed to update author image for %s: %s' % (image['AuthorName'], str(e))) logger.debug("Author Image cache updated") try: os.mkdir(os.path.join(src, 'book')) except OSError as e: if e.errno is not 17: # already exists is ok logger.debug('mkdir book cache reports: %s' % str(e)) query = 'SELECT BookName, BookID, BookImg FROM books ' query += 'WHERE BookImg LIKE "cache/%" ' query += 'AND BookImg NOT LIKE "cache/book/%"' images = myDB.select(query) if images: tot = len(images) logger.debug('Moving %s book images to new location' % tot) cnt = 0 for image in images: cnt += 1 lazylibrarian.UPDATE_MSG = "Moving book images to new location: %s of %s" % ( cnt, tot) try: img = image['BookImg'] img = img.rsplit('/', 1)[1] srcfile = os.path.join(src, img) if os.path.isfile(srcfile): try: shutil.move(srcfile, os.path.join(src, "book", img)) myDB.action( 'UPDATE books SET BookImg="cache/book/%s" WHERE BookID="%s"' % (img, image['BookID'])) except Exception as e: logger.warn("dbupgrade: %s" % str(e)) except Exception as e: logger.warn( 'Failed to update book image for %s: %s' % (image['BookName'], str(e))) logger.debug("Book Image cache updated") # at this point there should be no more .jpg files in the root of the cachedir # any that are still there are for books/authors deleted from database # or magazine latest issue cover files that get copied as required for image in os.listdir(src): if image.endswith('.jpg'): os.remove(os.path.join(src, image)) if db_version < 15: myDB.action( 'CREATE TABLE IF NOT EXISTS series (SeriesID INTEGER PRIMARY KEY, SeriesName TEXT, \ AuthorID TEXT, Status TEXT)') myDB.action( 'CREATE TABLE IF NOT EXISTS member (SeriesID INTEGER, BookID TEXT, SeriesNum TEXT)' ) if has_column(myDB, "books", "SeriesNum"): lazylibrarian.UPDATE_MSG = 'Populating series and member tables' books = myDB.select( 'SELECT BookID, Series, SeriesNum from books') if books: tot = len(books) logger.debug("Updating book series for %s book%s" % (tot, plural(tot))) cnt = 0 for book in books: cnt += 1 lazylibrarian.UPDATE_MSG = "Updating book series: %s of %s" % ( cnt, tot) seriesdict = getWorkSeries(book['BookID']) if not seriesdict: # no workpage series, use the current values if present if book['Series'] and book['SeriesNum']: seriesdict = { cleanName(unaccented(book['Series'])): book['SeriesNum'] } setSeries(seriesdict, book['BookID'], seriesauthors=False) # deleteEmptySeries # shouldn't be any on first run? lazylibrarian.UPDATE_MSG = "Book series update complete" logger.debug(lazylibrarian.UPDATE_MSG) lazylibrarian.UPDATE_MSG = 'Removing seriesnum from books table' myDB.action( 'CREATE TABLE IF NOT EXISTS temp_table (AuthorID TEXT, AuthorName TEXT, AuthorLink TEXT, \ BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \ BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, \ BookFile TEXT, BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, Series TEXT, \ WorkPage TEXT, Manual TEXT)') myDB.action( 'INSERT INTO temp_table SELECT AuthorID, AuthorName, AuthorLink, BookName, BookSub, \ BookDesc, BookGenre, BookIsbn, BookPub, BookRate, BookImg, BookPages, BookLink, BookID, \ BookFile, BookDate, BookLang, BookAdded, Status, Series, WorkPage, Manual from books' ) myDB.action('DROP TABLE books') myDB.action('ALTER TABLE temp_table RENAME TO books') lazylibrarian.UPDATE_MSG = 'Reorganisation of books table complete' if db_version < 16: if has_column(myDB, "books", "AuthorLink"): lazylibrarian.UPDATE_MSG = 'Removing series, authorlink and authorname from books table' myDB.action( 'CREATE TABLE IF NOT EXISTS temp_table (AuthorID TEXT, \ BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \ BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, \ BookFile TEXT, BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, WorkPage TEXT, \ Manual TEXT)') myDB.action( 'INSERT INTO temp_table SELECT AuthorID, BookName, BookSub, \ BookDesc, BookGenre, BookIsbn, BookPub, BookRate, BookImg, BookPages, BookLink, BookID, \ BookFile, BookDate, BookLang, BookAdded, Status, WorkPage, Manual from books' ) myDB.action('DROP TABLE books') myDB.action('ALTER TABLE temp_table RENAME TO books') lazylibrarian.UPDATE_MSG = 'Reorganisation of books table complete' if db_version < 17: if has_column(myDB, "series", "AuthorID"): lazylibrarian.UPDATE_MSG = 'Creating seriesauthors table' # In this version of the database there is only one author per series so use that as starting point myDB.action( 'CREATE TABLE seriesauthors (SeriesID INTEGER, AuthorID TEXT, UNIQUE (SeriesID,AuthorID))' ) series = myDB.select( 'SELECT SeriesID,AuthorID from series') cnt = 0 tot = len(series) for item in series: cnt += 1 lazylibrarian.UPDATE_MSG = "Updating seriesauthors: %s of %s" % ( cnt, tot) myDB.action( 'insert into seriesauthors (SeriesID, AuthorID) values (%s, %s)' % (item['SeriesID'], item['AuthorID']), suppress='UNIQUE') myDB.action('DROP TABLE temp_table') myDB.action( 'CREATE TABLE temp_table (SeriesID INTEGER PRIMARY KEY, SeriesName TEXT, \ Status TEXT)') myDB.action( 'INSERT INTO temp_table SELECT SeriesID, SeriesName, Status FROM series' ) myDB.action('DROP TABLE series') myDB.action('ALTER TABLE temp_table RENAME TO series') lazylibrarian.UPDATE_MSG = 'Reorganisation of series table complete' if db_version < 18: data = myDB.match('pragma index_list(seriesauthors)') if not data: lazylibrarian.UPDATE_MSG = 'Adding unique constraint to seriesauthors table' myDB.action('DROP TABLE IF EXISTS temp_table') myDB.action( 'ALTER TABLE seriesauthors RENAME to temp_table') myDB.action( 'CREATE TABLE seriesauthors (SeriesID INTEGER, AuthorID TEXT, UNIQUE (SeriesID,AuthorID))' ) series = myDB.select( 'SELECT SeriesID,AuthorID from temp_table') cnt = 0 tot = len(series) for item in series: cnt += 1 lazylibrarian.UPDATE_MSG = "Updating seriesauthors: %s of %s" % ( cnt, tot) myDB.action( 'insert into seriesauthors (SeriesID, AuthorID) values (%s, %s)' % (item['SeriesID'], item['AuthorID']), suppress='UNIQUE') myDB.action('DROP TABLE temp_table') lazylibrarian.UPDATE_MSG = 'Reorganisation of seriesauthors complete' # Now do any non-version-specific tidying try: authors = myDB.select( 'SELECT AuthorID FROM authors WHERE AuthorName IS NULL') if authors: logger.debug( 'Removing %s un-named author%s from database' % (len(authors), plural(len(authors)))) for author in authors: authorid = author["AuthorID"] myDB.action('DELETE from authors WHERE AuthorID="%s"' % authorid) myDB.action('DELETE from books WHERE AuthorID="%s"' % authorid) except Exception as e: logger.error('Error: ' + str(e)) myDB.action('PRAGMA user_version = %s' % db_current_version) lazylibrarian.UPDATE_MSG = 'Cleaning Database after upgrade' myDB.action('vacuum') lazylibrarian.UPDATE_MSG = 'Database updated to version %s' % db_current_version logger.info(lazylibrarian.UPDATE_MSG) restartJobs(start='Start') lazylibrarian.UPDATE_MSG = '' except Exception: logger.error('Unhandled exception in database update: %s' % traceback.format_exc()) lazylibrarian.UPDATE_MSG = ''
def find_book(self, bookid=None, queue=None): myDB = database.DBConnection() URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params) try: rootxml, in_cache = get_xml_request(URL) if rootxml is None: logger.debug("Error requesting book") return except Exception as e: logger.error("Error finding book: %s" % str(e)) return bookLanguage = rootxml.find('./book/language_code').text bookname = rootxml.find('./book/title').text if not bookLanguage: bookLanguage = "Unknown" # # PAB user has said they want this book, don't block for unwanted language, just warn # valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if bookLanguage not in valid_langs: logger.debug('Book %s goodreads language does not match preference, %s' % (bookname, bookLanguage)) if rootxml.find('./book/publication_year').text is None: bookdate = "0000" else: bookdate = rootxml.find('./book/publication_year').text try: bookimg = rootxml.find('./book/img_url').text if 'assets/nocover' in bookimg: bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' authorname = rootxml.find('./book/authors/author/name').text bookdesc = rootxml.find('./book/description').text bookisbn = rootxml.find('./book/isbn').text bookpub = rootxml.find('./book/publisher').text booklink = rootxml.find('./book/link').text bookrate = float(rootxml.find('./book/average_rating').text) bookpages = rootxml.find('.book/num_pages').text name = authorname GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match('SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID) if not match: match = myDB.match('SELECT AuthorID from authors WHERE AuthorName="%s"' % author['authorname']) if match: logger.debug('%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match['AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author as "ignored" # User hit "add book" button from a search controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Ignored" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn("No AuthorID for %s, unable to add book %s" % (authorname, bookname)) return bookname = unaccented(bookname) bookname, booksub = split_title(authorname, bookname) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(bookname, dic).strip() booksub = replace_all(booksub, dic).strip() if booksub: series, seriesNum = bookSeries(booksub) else: series, seriesNum = bookSeries(bookname) controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": "", "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": bookLanguage, "Status": "Wanted", "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % bookimg) if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict)) else: if series: seriesdict = {cleanName(unaccented(series)): seriesNum} setSeries(seriesdict, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def TORDownloadMethod(bookid=None, tor_title=None, tor_url=None, library='eBook'): myDB = database.DBConnection() downloadID = False Source = '' torrent = '' full_url = tor_url # keep the url as stored in "wanted" table if 'magnet:?' in tor_url: # discard any other parameters and just use the magnet link tor_url = 'magnet:?' + tor_url.split('magnet:?')[1] else: # h = HTMLParser() # tor_url = h.unescape(tor_url) # HTMLParser is probably overkill, we only seem to get & # tor_url = tor_url.replace('&', '&') if '&file=' in tor_url: # torznab results need to be re-encoded # had a problem with torznab utf-8 encoded strings not matching # our utf-8 strings because of long/short form differences url, value = tor_url.split('&file=', 1) value = makeUnicode(value) # ensure unicode value = unicodedata.normalize('NFC', value) # normalize to short form value = value.encode('unicode-escape') # then escape the result value = makeUnicode(value) # ensure unicode value = value.replace(' ', '%20') # and encode any spaces tor_url = url + '&file=' + value # strip url back to the .torrent as some sites add extra parameters if not tor_url.endswith('.torrent') and '.torrent' in tor_url: tor_url = tor_url.split('.torrent')[0] + '.torrent' headers = {'Accept-encoding': 'gzip', 'User-Agent': getUserAgent()} proxies = proxyList() try: logger.debug("Fetching %s" % tor_url) r = requests.get(tor_url, headers=headers, timeout=90, proxies=proxies) if str(r.status_code).startswith('2'): torrent = r.content if not len(torrent): res = "Got empty response for %s" % tor_url logger.warn(res) return False, res elif len(torrent) < 100: res = "Only got %s bytes for %s" % (len(torrent), tor_url) logger.warn(res) return False, res else: logger.debug("Got %s bytes for %s" % (len(torrent), tor_url)) else: res = "Got a %s response for %s" % (r.status_code, tor_url) logger.warn(res) return False, res except requests.exceptions.Timeout: res = 'Timeout fetching file from url: %s' % tor_url logger.warn(res) return False, res except Exception as e: # some jackett providers redirect internally using http 301 to a magnet link # which requests can't handle, so throws an exception logger.debug("Requests exception: %s" % str(e)) if "magnet:?" in str(e): tor_url = 'magnet:?' + str(e).split('magnet:?')[1].strip("'") logger.debug("Redirecting to %s" % tor_url) else: if hasattr(e, 'reason'): res = '%s fetching file from url: %s, %s' % ( type(e).__name__, tor_url, e.reason) else: res = '%s fetching file from url: %s, %s' % ( type(e).__name__, tor_url, str(e)) logger.warn(res) return False, res if not torrent and not tor_url.startswith('magnet:?'): res = "No magnet or data, cannot continue" logger.warn(res) return False, res if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']: Source = "BLACKHOLE" logger.debug("Sending %s to blackhole" % tor_title) tor_name = cleanName(tor_title).replace(' ', '_') if tor_url and tor_url.startswith('magnet'): if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']: hashid = calculate_torrent_hash(tor_url) if not hashid: hashid = tor_name tor_name = 'meta-' + hashid + '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) result = magnet2torrent(tor_url, tor_path) if result is not False: logger.debug('Magnet file saved as: %s' % tor_path) downloadID = Source else: tor_name += '.magnet' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) msg = '' try: msg = 'Opening ' with open(tor_path, 'wb') as torrent_file: msg += 'Writing ' if isinstance(torrent, text_type): torrent = torrent.encode('iso-8859-1') torrent_file.write(torrent) msg += 'SettingPerm ' setperm(tor_path) msg += 'Saved ' logger.debug('Magnet file saved: %s' % tor_path) downloadID = Source except Exception as e: res = "Failed to write magnet to file: %s %s" % ( type(e).__name__, str(e)) logger.warn(res) logger.debug("Progress: %s Filename [%s]" % (msg, repr(tor_path))) return False, res else: tor_name += '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) msg = '' try: msg = 'Opening ' with open(tor_path, 'wb') as torrent_file: msg += 'Writing ' if isinstance(torrent, text_type): torrent = torrent.encode('iso-8859-1') torrent_file.write(torrent) msg += 'SettingPerm ' setperm(tor_path) msg += 'Saved ' logger.debug('Torrent file saved: %s' % tor_name) downloadID = Source except Exception as e: res = "Failed to write torrent to file: %s %s" % ( type(e).__name__, str(e)) logger.warn(res) logger.debug("Progress: %s Filename [%s]" % (msg, repr(tor_path))) return False, res hashid = calculate_torrent_hash(tor_url, torrent) if not hashid: res = "Unable to calculate torrent hash from url/data" logger.error(res) logger.debug("url: %s" % tor_url) logger.debug("data: %s" % makeUnicode(str(torrent[:50]))) return False, res if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG[ 'UTORRENT_HOST']: logger.debug("Sending %s to Utorrent" % tor_title) Source = "UTORRENT" downloadID, res = utorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = utorrent.nameTorrent(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG[ 'RTORRENT_HOST']: logger.debug("Sending %s to rTorrent" % tor_title) Source = "RTORRENT" if torrent: logger.debug("Sending %s data to rTorrent" % tor_title) downloadID, res = rtorrent.addTorrent(tor_title, hashid, data=torrent) else: logger.debug("Sending %s url to rTorrent" % tor_title) downloadID, res = rtorrent.addTorrent( tor_url, hashid) # returns hash or False if downloadID: tor_title = rtorrent.getName(downloadID) if lazylibrarian.CONFIG[ 'TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG[ 'QBITTORRENT_HOST']: Source = "QBITTORRENT" if torrent: logger.debug("Sending %s data to qBittorrent" % tor_title) status, res = qbittorrent.addFile(torrent, hashid, tor_title) else: logger.debug("Sending %s url to qBittorrent" % tor_title) status, res = qbittorrent.addTorrent( tor_url, hashid) # returns True or False if status: downloadID = hashid tor_title = qbittorrent.getName(hashid) if lazylibrarian.CONFIG[ 'TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG[ 'TRANSMISSION_HOST']: Source = "TRANSMISSION" if torrent: logger.debug("Sending %s data to Transmission" % tor_title) # transmission needs b64encoded metainfo to be unicode, not bytes downloadID, res = transmission.addTorrent(None, metainfo=makeUnicode( b64encode(torrent))) else: logger.debug("Sending %s url to Transmission" % tor_title) downloadID, res = transmission.addTorrent( tor_url) # returns id or False if downloadID: # transmission returns it's own int, but we store hashid instead downloadID = hashid tor_title = transmission.getTorrentFolder(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \ lazylibrarian.CONFIG['SYNOLOGY_HOST']: logger.debug("Sending %s url to Synology" % tor_title) Source = "SYNOLOGY_TOR" downloadID, res = synology.addTorrent(tor_url) # returns id or False if downloadID: tor_title = synology.getName(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG[ 'DELUGE_HOST']: if not lazylibrarian.CONFIG['DELUGE_USER']: # no username, talk to the webui Source = "DELUGEWEBUI" if torrent: logger.debug("Sending %s data to Deluge" % tor_title) downloadID, res = deluge.addTorrent(tor_title, data=b64encode(torrent)) else: logger.debug("Sending %s url to Deluge" % tor_title) downloadID, res = deluge.addTorrent( tor_url) # can be link or magnet, returns hash or False if downloadID: tor_title = deluge.getTorrentFolder(downloadID) else: return False, res else: # have username, talk to the daemon Source = "DELUGERPC" client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'], int(lazylibrarian.CONFIG['DELUGE_PORT']), lazylibrarian.CONFIG['DELUGE_USER'], lazylibrarian.CONFIG['DELUGE_PASS']) try: client.connect() args = {"name": tor_title} if tor_url.startswith('magnet'): res = "Sending %s magnet to DelugeRPC" % tor_title logger.debug(res) downloadID = client.call('core.add_torrent_magnet', tor_url, args) elif torrent: res = "Sending %s data to DelugeRPC" % tor_title logger.debug(res) downloadID = client.call('core.add_torrent_file', tor_title, b64encode(torrent), args) else: res = "Sending %s url to DelugeRPC" % tor_title logger.debug(res) downloadID = client.call('core.add_torrent_url', tor_url, args) if downloadID: if lazylibrarian.CONFIG['DELUGE_LABEL']: _ = client.call( 'label.set_torrent', downloadID, lazylibrarian.CONFIG['DELUGE_LABEL'].lower()) result = client.call('core.get_torrent_status', downloadID, {}) if 'name' in result: tor_title = result['name'] else: res += ' failed' logger.error(res) return False, res except Exception as e: res = 'DelugeRPC failed %s %s' % (type(e).__name__, str(e)) logger.error(res) return False, res if not Source: res = 'No torrent download method is enabled, check config.' logger.warn(res) return False, res if downloadID: if tor_title: if downloadID.upper() in tor_title.upper(): logger.warn( '%s: name contains hash, probably unresolved magnet' % Source) else: tor_title = unaccented_str(tor_title) # need to check against reject words list again as the name may have changed # library = magazine eBook AudioBook to determine which reject list # but we can't easily do the per-magazine rejects if library == 'Magazine': reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',') elif library == 'eBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'], ',') elif library == 'AudioBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'], ',') else: logger.debug("Invalid library [%s] in TORDownloadMethod" % library) reject_list = [] rejected = False lower_title = tor_title.lower() for word in reject_list: if word in lower_title: rejected = "Rejecting torrent name %s, contains %s" % ( tor_title, word) logger.debug(rejected) break if not rejected: rejected = check_contents(Source, downloadID, library, tor_title) if rejected: myDB.action( 'UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?', (rejected, full_url)) delete_task(Source, downloadID, True) return False else: logger.debug('%s setting torrent name to [%s]' % (Source, tor_title)) myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?', (tor_title, full_url)) if library == 'eBook': myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid, )) elif library == 'AudioBook': myDB.action( 'UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid, )) myDB.action( 'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?', (Source, downloadID, full_url)) return True, '' res = 'Failed to send torrent to %s' % Source logger.error(res) return False, res
def LibraryScan(startdir=None): """ Scan a directory tree adding new books into database Return how many books you added """ try: destdir = lazylibrarian.DIRECTORY('Destination') if not startdir: if not destdir: logger.warn('Cannot find destination directory: %s. Not scanning' % destdir) return 0 startdir = destdir if not os.path.isdir(startdir): logger.warn('Cannot find directory: %s. Not scanning' % startdir) return 0 if not internet(): logger.warn('Libraryscan: No internet connection') return 0 myDB = database.DBConnection() # keep statistics of full library scans if startdir == destdir: myDB.action('DELETE from stats') try: # remove any extra whitespace in authornames authors = myDB.select('SELECT AuthorID,AuthorName FROM authors WHERE AuthorName like "% %"') if authors: logger.info('Removing extra spaces from %s authorname%s' % (len(authors), plural(len(authors)))) for author in authors: authorid = author["AuthorID"] authorname = ' '.join(author['AuthorName'].split()) # Have we got author name both with-and-without extra spaces? If so, merge them duplicate = myDB.match( 'Select AuthorID,AuthorName FROM authors WHERE AuthorName="%s"' % authorname) if duplicate: myDB.action('DELETE from authors where authorname="%s"' % author['AuthorName']) if author['AuthorID'] != duplicate['AuthorID']: myDB.action('UPDATE books set AuthorID="%s" WHERE AuthorID="%s"' % (duplicate['AuthorID'], author['AuthorID'])) else: myDB.action( 'UPDATE authors set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid)) except Exception as e: logger.info('Error: ' + str(e)) logger.info('Scanning ebook directory: %s' % startdir) new_book_count = 0 modified_count = 0 rescan_count = 0 rescan_hits = 0 file_count = 0 author = "" if lazylibrarian.CONFIG['FULL_SCAN']: cmd = 'select AuthorName, BookName, BookFile, BookID from books,authors' cmd += ' where books.AuthorID = authors.AuthorID and books.Status="Open"' if not startdir == destdir: cmd += ' and BookFile like "' + startdir + '%"' books = myDB.select(cmd) status = lazylibrarian.CONFIG['NOTFOUND_STATUS'] logger.info('Missing books will be marked as %s' % status) for book in books: bookID = book['BookID'] bookfile = book['BookFile'] if not (bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (book['AuthorName'], book['BookName'])) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] warned = False # have we warned about no new authors setting matchString = '' for char in lazylibrarian.CONFIG['EBOOK_DEST_FILE']: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(startdir): for directory in d[:]: # prevent magazine being scanned if directory.startswith("_") or directory.startswith("."): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode(lazylibrarian.SYS_ENCODING) subdirectory = r.replace(startdir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if lazylibrarian.CONFIG['IMP_SINGLEBOOK'] and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" gr_id = "" gb_id = "" extn = os.path.splitext(files)[1] # if it's an epub or a mobi we can try to read metadata from it if (extn == ".epub") or (extn == ".mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (book_filename, str(e))) res = {} # title and creator are the minimum we need if 'title' in res and 'creator' in res: book = res['title'] author = res['creator'] if book and len(book) > 2 and author and len(author) > 2: match = 1 if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) if not match: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (metafile, str(e))) res = {} # title and creator are the minimum we need if 'title' in res and 'creator' in res: book = res['title'] author = res['creator'] if book and len(book) > 2 and author and len(author) > 2: match = 1 if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'gr_id' in res: gr_id = res['gr_id'] logger.debug("file meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, gr_id)) if not match: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") if len(book) <= 2 or len(author) <= 2: match = 0 if not match: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and is_valid_isbn(isbn): logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead) if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) author, authorid, new = addAuthorNameToDB(author) # get the author name as we know it... if author: # author exists, check if this book by this author is in our database # metadata might have quotes in book name # some books might be stored under a different author name # eg books by multiple authors, books where author is "writing as" # or books we moved to "merge" authors book = book.replace("'", "") # First try and find it under author and bookname # as we may have it under a different bookid or isbn to goodreads/googlebooks # which might have several bookid/isbn for the same book bookid = find_book_in_db(myDB, author, book) if not bookid: # Title or author name might not match or multiple authors # See if the gr_id, gb_id is already in our database if gr_id: bookid = gr_id elif gb_id: bookid = gb_id else: bookid = "" if bookid: match = myDB.match('SELECT BookID FROM books where BookID = "%s"' % bookid) if not match: msg = 'Unable to find book %s by %s in database, trying to add it using ' if bookid == gr_id: msg += "GoodReads ID " + gr_id if bookid == gb_id: msg += "GoogleBooks ID " + gb_id logger.debug(msg % (book, author)) if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and gr_id: GR_ID = GoodReads(gr_id) GR_ID.find_book(gr_id, None) elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks" and gb_id: GB_ID = GoogleBooks(gb_id) GB_ID.find_book(gb_id, None) # see if it's there now... match = myDB.match('SELECT BookID from books where BookID="%s"' % bookid) if not match: logger.debug("Unable to add bookid %s to database" % bookid) bookid = "" if not bookid and isbn: # See if the isbn is in our database match = myDB.match('SELECT BookID FROM books where BookIsbn = "%s"' % isbn) if match: bookid = match['BookID'] if not bookid: # get author name from parent directory of this book directory newauthor = os.path.basename(os.path.dirname(r)) # calibre replaces trailing periods with _ eg Smith Jr. -> Smith Jr_ if newauthor.endswith('_'): newauthor = newauthor[:-1] + '.' if author.lower() != newauthor.lower(): logger.debug("Trying authorname [%s]" % newauthor) bookid = find_book_in_db(myDB, newauthor, book) if bookid: logger.warn("%s not found under [%s], found under [%s]" % (book, author, newauthor)) # at this point if we still have no bookid, it looks like we # have author and book title but no database entry for it if not bookid: if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": # Either goodreads doesn't have the book or it didn't match language prefs # Since we have the book anyway, try and reload it ignoring language prefs rescan_count += 1 base_url = 'http://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} if author[1] in '. ': surname = author forename = '' while surname[1] in '. ': forename = forename + surname[0] + '.' surname = surname[2:].strip() if author != forename + ' ' + surname: logger.debug('Stripped authorname [%s] to [%s %s]' % (author, forename, surname)) author = forename + ' ' + surname author = ' '.join(author.split()) # ensure no extra whitespace searchname = author + ' ' + book searchname = cleanName(unaccented(searchname)) searchterm = urllib.quote_plus(searchname.encode(lazylibrarian.SYS_ENCODING)) set_url = base_url + searchterm + '&' + urllib.urlencode(params) try: rootxml, in_cache = get_xml_request(set_url) if not len(rootxml): logger.debug("Error requesting results from GoodReads") else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text book_fuzz = fuzz.token_set_ratio(booktitle, book) if book_fuzz >= 98: logger.debug("Rescan found %s : %s" % (booktitle, language)) rescan_hits += 1 bookid = item.find('./best_book/id').text GR_ID = GoodReads(bookid) GR_ID.find_book(bookid, None) if language and language != "Unknown": # set language from book metadata logger.debug("Setting language from metadata %s : %s" % (booktitle, language)) myDB.action('UPDATE books SET BookLang="%s" WHERE BookID="%s"' % (language, bookid)) break if not bookid: logger.warn("GoodReads doesn't know about %s" % book) except Exception as e: logger.error("Error finding rescan results: %s" % str(e)) elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": # if we get here using googlebooks it's because googlebooks # doesn't have the book. No point in looking for it again. logger.warn("GoogleBooks doesn't know about %s" % book) # see if it's there now... if bookid: cmd = 'SELECT books.Status, BookFile, AuthorName, BookName from books,authors ' cmd += 'where books.AuthorID = authors.AuthorID and BookID="%s"' % bookid check_status = myDB.match(cmd) if not check_status: logger.debug('Unable to find bookid %s in database' % bookid) else: if check_status['Status'] != 'Open': # we found a new book new_book_count += 1 myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) # store book location so we can check if it gets removed book_filename = os.path.join(r, files) if not check_status['BookFile']: # no previous location myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # location may have changed since last scan elif book_filename != check_status['BookFile']: modified_count += 1 logger.warn("Updating book location for %s %s from %s to %s" % (author, book, check_status['BookFile'], book_filename)) logger.debug("%s %s matched %s BookID %s, [%s][%s]" % (author, book, check_status['Status'], bookid, check_status['AuthorName'], check_status['BookName'])) myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # update cover file to cover.jpg in book folder (if exists) bookdir = os.path.dirname(book_filename) coverimg = os.path.join(bookdir, 'cover.jpg') if os.path.isfile(coverimg): cachedir = lazylibrarian.CACHEDIR cacheimg = os.path.join(cachedir, 'book', bookid + '.jpg') copyfile(coverimg, cacheimg) else: logger.warn( "Failed to match book [%s] by [%s] in database" % (book, author)) else: if not warned and not lazylibrarian.CONFIG['ADD_AUTHOR']: logger.warn("Add authors to database is disabled") warned = True logger.info("%s/%s new/modified book%s found and added to the database" % (new_book_count, modified_count, plural(new_book_count + modified_count))) logger.info("%s file%s processed" % (file_count, plural(file_count))) if startdir == destdir: # On full library scans, check for missing workpages setWorkPages() # and books with unknown language nolang = myDB.match( "select count('BookID') as counter from Books where status='Open' and BookLang='Unknown'") nolang = nolang['counter'] if nolang: logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang))) # show stats if new books were added stats = myDB.match( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats") st= {'GR_book_hits': stats['sum(GR_book_hits)'], 'GB_book_hits': stats['sum(GR_book_hits)'], 'GR_lang_hits': stats['sum(GR_lang_hits)'], 'LT_lang_hits': stats['sum(LT_lang_hits)'], 'GB_lang_change': stats['sum(GB_lang_change)'], 'cache_hits': stats['sum(cache_hits)'], 'bad_lang': stats['sum(bad_lang)'], 'bad_char': stats['sum(bad_char)'], 'uncached': stats['sum(uncached)'], 'duplicates': stats['sum(duplicates)']} for item in st.keys(): if st[item] is None: st[item] = 0 if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": logger.debug("GoogleBooks was hit %s time%s for books" % (st['GR_book_hits'], plural(st['GR_book_hits']))) logger.debug("GoogleBooks language was changed %s time%s" % (st['GB_lang_change'], plural(st['GB_lang_change']))) if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": logger.debug("GoodReads was hit %s time%s for books" % (st['GR_book_hits'], plural(st['GR_book_hits']))) logger.debug("GoodReads was hit %s time%s for languages" % (st['GR_lang_hits'], plural(st['GR_lang_hits']))) logger.debug("LibraryThing was hit %s time%s for languages" % (st['LT_lang_hits'], plural(st['LT_lang_hits']))) logger.debug("Language cache was hit %s time%s" % (st['cache_hits'], plural(st['cache_hits']))) logger.debug("Unwanted language removed %s book%s" % (st['bad_lang'], plural(st['bad_lang']))) logger.debug("Unwanted characters removed %s book%s" % (st['bad_char'], plural(st['bad_char']))) logger.debug("Unable to cache language for %s book%s with missing ISBN" % (st['uncached'], plural(st['uncached']))) logger.debug("Found %s duplicate book%s" % (st['duplicates'], plural(st['duplicates']))) logger.debug("Rescan %s hit%s, %s miss" % (rescan_hits, plural(rescan_hits), rescan_count - rescan_hits)) logger.debug("Cache %s hit%s, %s miss" % (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS)) cachesize = myDB.match("select count('ISBN') as counter from languages") logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) # Cache any covers and images images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"') if len(images): logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: bookid = item['bookid'] bookimg = item['bookimg'] # bookname = item['bookname'] newimg, success = cache_img("book", bookid, bookimg) if success: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"') if len(images): logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: authorid = item['authorid'] authorimg = item['authorimg'] # authorname = item['authorname'] newimg, success = cache_img("author", authorid, authorimg) if success: myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid)) # On full scan, update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr or changed wanted languages authors = myDB.select('select AuthorID from authors') else: # On single author/book import, just update bookcount for that author authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author.replace('"', '""')) logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors)))) for author in authors: update_totals(author['AuthorID']) logger.info('Library scan complete') return new_book_count except Exception: logger.error('Unhandled exception in libraryScan: %s' % traceback.format_exc())
def TORDownloadMethod(bookid=None, tor_title=None, tor_url=None): myDB = database.DBConnection() downloadID = False Source = '' full_url = tor_url # keep the url as stored in "wanted" table if tor_url and tor_url.startswith('magnet'): torrent = tor_url # allow magnet link to write to blackhole and hash to utorrent/rtorrent else: # h = HTMLParser() # tor_url = h.unescape(tor_url) # HTMLParser is probably overkill, we only seem to get & # tor_url = tor_url.replace('&', '&') if '&file=' in tor_url: # torznab results need to be re-encoded # had a problem with torznab utf-8 encoded strings not matching # our utf-8 strings because of long/short form differences url, value = tor_url.split('&file=', 1) if isinstance(value, str): value = value.decode('utf-8') # make unicode value = unicodedata.normalize('NFC', value) # normalize to short form value = value.encode('unicode-escape') # then escape the result value = value.replace(' ', '%20') # and encode any spaces tor_url = url + '&file=' + value # strip url back to the .torrent as some sites add parameters if not tor_url.endswith('.torrent'): if '.torrent' in tor_url: tor_url = tor_url.split('.torrent')[0] + '.torrent' request = urllib2.Request(ur'%s' % tor_url) if lazylibrarian.CONFIG['PROXY_HOST']: request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], lazylibrarian.CONFIG['PROXY_TYPE']) request.add_header('Accept-encoding', 'gzip') request.add_header('User-Agent', USER_AGENT) try: response = urllib2.urlopen(request, timeout=90) if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read()) f = gzip.GzipFile(fileobj=buf) torrent = f.read() else: torrent = response.read() except socket.timeout: logger.warn('Timeout fetching torrent from url: %s' % tor_url) return False except urllib2.URLError as e: logger.warn('Error fetching torrent from url: %s, %s' % (tor_url, e.reason)) return False except ValueError as e: logger.warn('Error, invalid url: [%s] %s' % (full_url, str(e))) return False if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']: Source = "BLACKHOLE" logger.debug("Sending %s to blackhole" % tor_title) tor_name = cleanName(tor_title).replace(' ', '_') if tor_url and tor_url.startswith('magnet'): if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']: hashid = CalcTorrentHash(tor_url) tor_name = 'meta-' + hashid + '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) result = magnet2torrent(tor_url, tor_path) if result is not False: logger.debug('Magnet file saved as: %s' % tor_path) downloadID = Source else: tor_name += '.magnet' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) try: with open(tor_path, 'wb') as torrent_file: torrent_file.write(torrent) logger.debug('Magnet file saved: %s' % tor_path) setperm(tor_path) downloadID = Source except Exception as e: logger.debug("Failed to write magnet to file %s, %s" % (tor_path, str(e))) return False else: tor_name += '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) try: with open(tor_path, 'wb') as torrent_file: torrent_file.write(torrent) setperm(tor_path) logger.debug('Torrent file saved: %s' % tor_name) downloadID = Source except Exception as e: logger.debug("Failed to write torrent to file %s, %s" % (tor_path, str(e))) return False if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG['UTORRENT_HOST']: logger.debug("Sending %s to Utorrent" % tor_title) Source = "UTORRENT" hashid = CalcTorrentHash(torrent) downloadID = utorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = utorrent.nameTorrent(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG['RTORRENT_HOST']: logger.debug("Sending %s to rTorrent" % tor_title) Source = "RTORRENT" hashid = CalcTorrentHash(torrent) downloadID = rtorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = rtorrent.getName(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG['QBITTORRENT_HOST']: logger.debug("Sending %s to qbittorrent" % tor_title) Source = "QBITTORRENT" hashid = CalcTorrentHash(torrent) status = qbittorrent.addTorrent(tor_url) # returns hash or False if status: downloadID = hashid tor_title = qbittorrent.getName(hashid) if lazylibrarian.CONFIG['TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG['TRANSMISSION_HOST']: logger.debug("Sending %s to Transmission" % tor_title) Source = "TRANSMISSION" downloadID = transmission.addTorrent(tor_url) # returns id or False if downloadID: # transmission returns it's own int, but we store hashid instead downloadID = CalcTorrentHash(torrent) tor_title = transmission.getTorrentFolder(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and lazylibrarian.CONFIG['SYNOLOGY_HOST']: logger.debug("Sending %s to Synology" % tor_title) Source = "SYNOLOGY_TOR" downloadID = synology.addTorrent(tor_url) # returns id or False if downloadID: tor_title = synology.getName(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG['DELUGE_HOST']: logger.debug("Sending %s to Deluge" % tor_title) if not lazylibrarian.CONFIG['DELUGE_USER']: # no username, talk to the webui Source = "DELUGEWEBUI" downloadID = deluge.addTorrent(tor_url) # returns hash or False if downloadID: tor_title = deluge.getTorrentFolder(downloadID) else: # have username, talk to the daemon Source = "DELUGERPC" client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'], int(lazylibrarian.CONFIG['DELUGE_PORT']), lazylibrarian.CONFIG['DELUGE_USER'], lazylibrarian.CONFIG['DELUGE_PASS']) try: client.connect() args = {"name": tor_title} if tor_url.startswith('magnet'): downloadID = client.call('core.add_torrent_magnet', tor_url, args) else: downloadID = client.call('core.add_torrent_url', tor_url, args) if downloadID: if lazylibrarian.CONFIG['DELUGE_LABEL']: _ = client.call('label.set_torrent', downloadID, lazylibrarian.CONFIG['DELUGE_LABEL']) result = client.call('core.get_torrent_status', downloadID, {}) # for item in result: # logger.debug ('Deluge RPC result %s: %s' % (item, result[item])) if 'name' in result: tor_title = result['name'] except Exception as e: logger.debug('DelugeRPC failed %s' % str(e)) return False if not Source: logger.warn('No torrent download method is enabled, check config.') return False if downloadID: myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid) myDB.action('UPDATE wanted SET status = "Snatched", Source = "%s", DownloadID = "%s" WHERE NZBurl="%s"' % (Source, downloadID, full_url)) if tor_title: if downloadID.upper() in tor_title.upper(): logger.warn('%s: name contains hash, probably unresolved magnet' % Source) else: tor_title = unaccented_str(tor_title) logger.debug('%s setting torrent name to [%s]' % (Source, tor_title)) myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl="%s"' % (tor_title, full_url)) return True else: logger.error(u'Failed to download torrent from %s, %s' % (Source, tor_url)) myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % full_url) return False
def ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode): params = None if searchType == "book": authorname = book['authorName'] while authorname[1] in '. ': # strip any leading initials authorname = authorname[2:].strip() # and leading whitespace # middle initials can't have a dot authorname = authorname.replace('. ', ' ') authorname = cleanName(authorname) bookname = cleanName(book['bookName']) if bookname == authorname and book['bookSub']: # books like "Spike Milligan: Man of Letters" # where we split the title/subtitle on ':' bookname = cleanName(book['bookSub']) if bookname.startswith(authorname) and len(bookname) > len(authorname): # books like "Spike Milligan In his own words" # where we don't want to look for "Spike Milligan Spike Milligan In his own words" bookname = bookname[len(authorname)+1:] if provider['BOOKSEARCH'] and provider['BOOKCAT']: # if specific booksearch, use it params = { "t": provider['BOOKSEARCH'], "apikey": api_key, "title": bookname, "author": authorname, "cat": provider['BOOKCAT'] } elif provider['GENERALSEARCH'] and provider['BOOKCAT']: # if not, try general search params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "q": authorname + ' ' + bookname, "cat": provider['BOOKCAT'] } elif searchType == "shortbook": authorname = book['authorName'] while authorname[1] in '. ': # strip any leading initials authorname = authorname[2:].strip() # and leading whitespace # middle initials can't have a dot authorname = authorname.replace('. ', ' ') authorname = cleanName(authorname) bookname = cleanName(book['bookName']) if bookname == authorname and book['bookSub']: # books like "Spike Milligan: Man of Letters" # where we split the title/subtitle on ':' bookname = cleanName(book['bookSub']) if bookname.startswith(authorname) and len(bookname) > len(authorname): # books like "Spike Milligan in his own words" # where we don't want to look for "Spike Milligan Spike Milligan in his own words" bookname = bookname[len(authorname)+1:] if '(' in bookname: bookname = bookname.split('(')[0].strip() if provider['BOOKSEARCH'] and provider['BOOKCAT']: # if specific booksearch, use it params = { "t": provider['BOOKSEARCH'], "apikey": api_key, "title": bookname, "author": authorname, "cat": provider['BOOKCAT'] } elif provider['GENERALSEARCH'] and provider['BOOKCAT']: params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "q": authorname + ' ' + bookname, "cat": provider['BOOKCAT'] } elif searchType == "mag": if provider['MAGSEARCH'] and provider['MAGCAT']: # if specific magsearch, use it params = { "t": provider['MAGSEARCH'], "apikey": api_key, "cat": provider['MAGCAT'], "q": book['searchterm'], "extended": provider['EXTENDED'], } elif provider['GENERALSEARCH'] and provider['MAGCAT']: params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "cat": provider['MAGCAT'], "q": book['searchterm'], "extended": provider['EXTENDED'], } else: if provider['GENERALSEARCH']: params = { "t": provider['GENERALSEARCH'], "apikey": api_key, # this is a general search "q": book['searchterm'], "extended": provider['EXTENDED'], } if params: logger.debug('[NewzNabPlus] - %s Search parameters set to %s' % (searchMode, str(params))) else: logger.debug('[NewzNabPlus] - %s No matching search parameters' % searchMode) return params
def TORDownloadMethod(bookid=None, tor_title=None, tor_url=None, library='eBook'): myDB = database.DBConnection() downloadID = False Source = '' full_url = tor_url # keep the url as stored in "wanted" table if tor_url and tor_url.startswith('magnet'): torrent = tor_url # allow magnet link to write to blackhole and hash to utorrent/rtorrent else: # h = HTMLParser() # tor_url = h.unescape(tor_url) # HTMLParser is probably overkill, we only seem to get & # tor_url = tor_url.replace('&', '&') if '&file=' in tor_url: # torznab results need to be re-encoded # had a problem with torznab utf-8 encoded strings not matching # our utf-8 strings because of long/short form differences url, value = tor_url.split('&file=', 1) value = makeUnicode(value) # ensure unicode value = unicodedata.normalize('NFC', value) # normalize to short form value = value.encode('unicode-escape') # then escape the result value = value.replace(' ', '%20') # and encode any spaces tor_url = url + '&file=' + value # strip url back to the .torrent as some sites add parameters if not tor_url.endswith('.torrent'): if '.torrent' in tor_url: tor_url = tor_url.split('.torrent')[0] + '.torrent' headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT} proxies = proxyList() try: r = requests.get(tor_url, headers=headers, timeout=90, proxies=proxies) except requests.exceptions.Timeout: logger.warn('Timeout fetching file from url: %s' % tor_url) return False except Exception as e: if hasattr(e, 'reason'): logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, e.reason)) else: logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, str(e))) return False torrent = r.content if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']: Source = "BLACKHOLE" logger.debug("Sending %s to blackhole" % tor_title) tor_name = cleanName(tor_title).replace(' ', '_') if tor_url and tor_url.startswith('magnet'): if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']: hashid = CalcTorrentHash(tor_url) tor_name = 'meta-' + hashid + '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) result = magnet2torrent(tor_url, tor_path) if result is not False: logger.debug('Magnet file saved as: %s' % tor_path) downloadID = Source else: tor_name += '.magnet' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) msg = '' try: msg = 'Opening ' with open(tor_path, 'wb') as torrent_file: msg += 'Writing ' if isinstance(torrent, unicode): torrent = torrent.encode('iso-8859-1') torrent_file.write(torrent) msg += 'SettingPerm' setperm(tor_path) msg += 'Saved' logger.debug('Magnet file saved: %s' % tor_path) downloadID = Source except Exception as e: logger.debug("Failed to write magnet to file: %s %s" % (type(e).__name__, str(e))) logger.debug("Progress: %s" % msg) logger.debug("Filename [%s]" % (repr(tor_path))) return False else: tor_name += '.torrent' tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name) msg = '' try: msg = 'Opening ' with open(tor_path, 'wb') as torrent_file: msg += 'Writing ' if isinstance(torrent, unicode): torrent = torrent.encode('iso-8859-1') torrent_file.write(torrent) msg += 'SettingPerm ' setperm(tor_path) msg += 'Saved' logger.debug('Torrent file saved: %s' % tor_name) downloadID = Source except Exception as e: logger.debug("Failed to write torrent to file: %s %s" % (type(e).__name__, str(e))) logger.debug("Progress: %s" % msg) logger.debug("Filename [%s]" % (repr(tor_path))) return False if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG[ 'UTORRENT_HOST']: logger.debug("Sending %s to Utorrent" % tor_title) Source = "UTORRENT" hashid = CalcTorrentHash(torrent) downloadID = utorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = utorrent.nameTorrent(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG[ 'RTORRENT_HOST']: logger.debug("Sending %s to rTorrent" % tor_title) Source = "RTORRENT" hashid = CalcTorrentHash(torrent) downloadID = rtorrent.addTorrent(tor_url, hashid) # returns hash or False if downloadID: tor_title = rtorrent.getName(downloadID) if lazylibrarian.CONFIG[ 'TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG[ 'QBITTORRENT_HOST']: logger.debug("Sending %s to qbittorrent" % tor_title) Source = "QBITTORRENT" hashid = CalcTorrentHash(torrent) status = qbittorrent.addTorrent(tor_url, hashid) # returns True or False if status: downloadID = hashid tor_title = qbittorrent.getName(hashid) if lazylibrarian.CONFIG[ 'TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG[ 'TRANSMISSION_HOST']: logger.debug("Sending %s to Transmission" % tor_title) Source = "TRANSMISSION" downloadID = transmission.addTorrent(tor_url) # returns id or False if downloadID: # transmission returns it's own int, but we store hashid instead downloadID = CalcTorrentHash(torrent) tor_title = transmission.getTorrentFolder(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \ lazylibrarian.CONFIG['SYNOLOGY_HOST']: logger.debug("Sending %s to Synology" % tor_title) Source = "SYNOLOGY_TOR" downloadID = synology.addTorrent(tor_url) # returns id or False if downloadID: tor_title = synology.getName(downloadID) if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG[ 'DELUGE_HOST']: logger.debug("Sending %s to Deluge" % tor_title) if not lazylibrarian.CONFIG['DELUGE_USER']: # no username, talk to the webui Source = "DELUGEWEBUI" downloadID = deluge.addTorrent(tor_url) # returns hash or False if downloadID: tor_title = deluge.getTorrentFolder(downloadID) else: # have username, talk to the daemon Source = "DELUGERPC" client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'], lazylibrarian.CONFIG['DELUGE_URL_BASE'], int(lazylibrarian.CONFIG['DELUGE_PORT']), lazylibrarian.CONFIG['DELUGE_USER'], lazylibrarian.CONFIG['DELUGE_PASS']) try: client.connect() args = {"name": tor_title} if tor_url.startswith('magnet'): downloadID = client.call('core.add_torrent_magnet', tor_url, args) else: downloadID = client.call('core.add_torrent_url', tor_url, args) if downloadID: if lazylibrarian.CONFIG['DELUGE_LABEL']: _ = client.call('label.set_torrent', downloadID, lazylibrarian.CONFIG['DELUGE_LABEL']) result = client.call('core.get_torrent_status', downloadID, {}) # for item in result: # logger.debug ('Deluge RPC result %s: %s' % (item, result[item])) if 'name' in result: tor_title = result['name'] except Exception as e: logger.debug('DelugeRPC failed %s %s' % (type(e).__name__, str(e))) return False if not Source: logger.warn('No torrent download method is enabled, check config.') return False if downloadID: if tor_title: if downloadID.upper() in tor_title.upper(): logger.warn( '%s: name contains hash, probably unresolved magnet' % Source) else: tor_title = unaccented_str(tor_title) # need to check against reject words list again as the name may have changed # library = magazine eBook AudioBook to determine which reject list # but we can't easily do the per-magazine rejects if library == 'magazine': reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS']) elif library == 'eBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS']) elif library == 'AudioBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO']) else: logger.debug("Invalid library [%s] in TORDownloadMethod" % library) reject_list = [] rejected = False lower_title = tor_title.lower() for word in reject_list: if word in lower_title: rejected = True logger.debug("Rejecting torrent name %s, contains %s" % (tor_title, word)) break if rejected: myDB.action( 'UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url, )) delete_task(Source, downloadID, True) return False else: logger.debug('%s setting torrent name to [%s]' % (Source, tor_title)) myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?', (tor_title, full_url)) if library == 'eBook': myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid, )) elif library == 'AudioBook': myDB.action( 'UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid, )) myDB.action( 'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?', (Source, downloadID, full_url)) return True logger.error('Failed to download torrent from %s, %s' % (Source, tor_url)) myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url, )) return False
def getSeriesAuthors(seriesid): """ Get a list of authors contributing to a series and import those authors (and their books) into the database Return how many authors you added """ myDB = database.DBConnection() result = myDB.match("select count('AuthorID') as counter from authors") start = int(result['counter']) result = myDB.match('select SeriesName from series where SeriesID="%s"' % seriesid) seriesname = result['SeriesName'] members = getSeriesMembers(seriesid) if members: myDB = database.DBConnection() for member in members: #order = member[0] bookname = member[1] authorname = member[2] base_url = 'http://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} searchname = bookname + ' ' + authorname searchname = cleanName(unaccented(searchname)) searchterm = urllib.quote_plus( searchname.encode(lazylibrarian.SYS_ENCODING)) set_url = base_url + searchterm + '&' + urllib.urlencode(params) authorid = '' try: rootxml, in_cache = get_xml_request(set_url) if len(rootxml): resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text book_fuzz = fuzz.token_set_ratio(booktitle, bookname) if book_fuzz >= 98: author = item.find('./best_book/author/name').text authorid = item.find('./best_book/author/id').text logger.debug( "Author Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: # try again with title only searchname = cleanName(unaccented(bookname)) searchterm = urllib.quote_plus( searchname.encode(lazylibrarian.SYS_ENCODING)) set_url = base_url + searchterm + '&' + urllib.urlencode( params) rootxml, in_cache = get_xml_request(set_url) if len(rootxml): resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text book_fuzz = fuzz.token_set_ratio( booktitle, bookname) if book_fuzz >= 98: author = item.find( './best_book/author/name').text authorid = item.find( './best_book/author/id').text logger.debug( "Title Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname)) except Exception as e: logger.error("Error finding goodreads results: %s" % str(e)) if authorid: lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid) result = myDB.match("select count('AuthorID') as counter from authors") finish = int(result['counter']) newauth = finish - start logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname)) return newauth