def setBookAuthors(book): myDB = database.DBConnection() newauthors = 0 newrefs = 0 try: authorlist = getBookAuthors(book['bookid']) for author in authorlist: authtype = author['type'] if authtype in [ 'primary author', 'main author', 'secondary author' ]: if author['role'] in ['Author', '—' ] and author['work'] == 'all editions': name = formatAuthorName(unaccented(author['name'])) exists = myDB.match( 'select authorid from authors where authorname = "%s"' % name) if exists: authorid = exists['authorid'] else: # try to add new author to database by name name, authorid, new = lazylibrarian.importer.addAuthorNameToDB( name, False, False) if new and authorid: newauthors += 1 if authorid: # suppress duplicates in bookauthors myDB.action( 'INSERT into bookauthors (AuthorID, BookID) VALUES ("%s", "%s")' % (authorid, book['bookid']), suppress='UNIQUE') newrefs += 1 except: logger.debug("Error parsing authorlist for " + book['bookname']) return newauthors, newrefs
def _findAuthor(self, **kwargs): if 'name' not in kwargs: self.data = 'Missing parameter: name' return authorname = formatAuthorName(kwargs['name']) if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": GB = GoogleBooks(authorname) queue = Queue.Queue() search_api = threading.Thread(target=GB.find_results, name='API-GBRESULTS', args=[authorname, queue]) search_api.start() else: # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": queue = Queue.Queue() GR = GoodReads(authorname) search_api = threading.Thread(target=GR.find_results, name='API-GRRESULTS', args=[authorname, queue]) search_api.start() search_api.join() self.data = queue.get()
def find_author_id(self, refresh=False): author = self.name author = formatAuthorName(author) URL = 'http://www.goodreads.com/api/author_url/' + urllib.quote(author) + '?' + urllib.urlencode(self.params) # googlebooks gives us author names with long form unicode characters if isinstance(author, str): author = author.decode('utf-8') # make unicode author = unicodedata.normalize('NFC', author) # normalize to short form logger.debug("Searching for author with name: %s" % author) authorlist = [] try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) except Exception as e: logger.error("Error finding authorid: %s, %s" % (URL, str(e))) return authorlist if rootxml is None: logger.debug("Error requesting authorid") return authorlist resultxml = rootxml.getiterator('author') if not len(resultxml): logger.warn('No authors found with name: %s' % author) else: # In spite of how this looks, goodreads only returns one result, even if there are multiple matches # we just have to hope we get the right one. eg search for "James Lovelock" returns "James E. Lovelock" # who only has one book listed under googlebooks, the rest are under "James Lovelock" # goodreads has all his books under "James E. Lovelock". Can't come up with a good solution yet. # For now we'll have to let the user handle this by selecting/adding the author manually for author in resultxml: authorid = author.attrib.get("id") authorlist = self.get_author_info(authorid) return authorlist
def addAuthorNameToDB(author=None, refresh=False, addbooks=True): # get authors name in a consistent format, look them up in the database # if not in database, try to import them. # return authorname,new where new=False if author already in db, new=True if added # authorname returned is our preferred name, or empty string if not found or unable to add myDB = database.DBConnection() new = False if len(author) < 2: logger.debug('Invalid Author Name [%s]' % author) return "", "", False author = formatAuthorName(author) # Check if the author exists, and import the author if not, check_exist_author = myDB.match( 'SELECT AuthorID FROM authors where AuthorName="%s"' % author.replace('"', '""')) if not check_exist_author and lazylibrarian.CONFIG['ADD_AUTHOR']: logger.debug('Author %s not found in database, trying to add' % author) # no match for supplied author, but we're allowed to add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except Exception as e: logger.warn("Error finding author id for [%s] %s" % (author, str(e))) return "", "", False # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] #authorid = author_gr['authorid'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', ' ') match_auth = ' '.join(match_auth.split()) match_name = authorname.replace('.', ' ') match_name = ' '.join(match_name.split()) match_name = unaccented(match_name) match_auth = unaccented(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug( "Failed to match author [%s] to authorname [%s] fuzz [%d]" % (author, match_name, match_fuzz)) # To save loading hundreds of books by unknown authors at GR or GB, ignore unknown if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] authorid = author_gr['authorid'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.match( 'SELECT AuthorID FROM authors where AuthorID="%s"' % authorid) if check_exist_author: logger.debug('Found goodreads authorname %s in database' % author) else: logger.info("Adding new author [%s]" % author) try: addAuthorToDB(authorname=author, refresh=refresh, authorid=authorid, addbooks=addbooks) check_exist_author = myDB.match( 'SELECT AuthorID FROM authors where AuthorID="%s"' % authorid) if check_exist_author: new = True except Exception: logger.debug('Failed to add author [%s] to db' % author) # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug("Failed to match author [%s] in database" % author) return "", "", False return author, check_exist_author['AuthorID'], new
def search_rss_book(books=None, reset=False): try: threadname = threading.currentThread().name if "Thread-" in threadname: if books is None: threading.currentThread().name = "SEARCHALLRSS" else: threading.currentThread().name = "SEARCHRSS" if not (lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') scheduleJob(action='Stop', target='search_rss_book') return if not internet(): logger.warn('Search RSS Book: No internet connection') return myDB = database.DBConnection() resultlist, wishproviders = IterateOverGoodReads() if not wishproviders: logger.debug('No rss wishlists are set') else: # for each item in resultlist, add to database if necessary, and mark as wanted for book in resultlist: # we get rss_author, rss_title, rss_isbn, rss_bookid (goodreads bookid) # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks # not sure if anyone would use a goodreads wishlist if not using goodreads interface... logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist)))) if book['rss_bookid'] and lazylibrarian.CONFIG[ 'BOOK_API'] == "GoodReads": bookmatch = myDB.match( 'select Status,BookName from books where bookid="%s"' % book['rss_bookid']) if bookmatch: bookstatus = bookmatch['Status'] bookname = bookmatch['BookName'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info( u'Found book %s, already marked as "%s"' % (bookname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s, marking as "Wanted"' % bookname) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) else: import_book(book['rss_bookid']) else: item = {} headers = [] item['Title'] = book['rss_title'] if book['rss_bookid']: item['BookID'] = book['rss_bookid'] headers.append('BookID') if book['rss_isbn']: item['ISBN'] = book['rss_isbn'] headers.append('ISBN') bookmatch = finditem(item, book['rss_author'], headers) if bookmatch: # it's already in the database authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info( u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info( u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) else: # not in database yet results = '' if book['rss_isbn']: results = search_for(book['rss_isbn']) if results: result = results[0] if result['isbn_fuzz'] > lazylibrarian.CONFIG[ 'MATCH_RATIO']: logger.info( "Found (%s%%) %s: %s" % (result['isbn_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) bookmatch = True if not results: searchterm = "%s <ll> %s" % ( item['Title'], formatAuthorName(book['rss_author'])) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']: logger.info( "Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) bookmatch = True if not bookmatch: msg = "Skipping book %s by %s" % (item['Title'], book['rss_author']) # noinspection PyUnboundLocalVariable if not results: msg += ', No results returned' logger.warn(msg) else: msg += ', No match found' logger.warn(msg) msg = "Closest match (%s%% %s%%) %s: %s" % ( result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) if books is None: # We are performing a backlog search cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books,authors ' cmd += 'WHERE books.AuthorID = authors.AuthorID and books.Status="Wanted" order by BookAdded desc' searchbooks = myDB.select(cmd) else: # The user has added a new book searchbooks = [] for book in books: cmd = 'SELECT BookID, AuthorName, BookName, BookSub from books,authors ' cmd += 'WHERE books.AuthorID = authors.AuthorID and BookID="%s" ' % book[ 'bookid'] cmd += 'AND books.Status="Wanted"' searchbook = myDB.select(cmd) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: return resultlist, nproviders = IterateOverRSSSites() if not nproviders: if not wishproviders: logger.warn('No rss providers are set, check config') return # No point in continuing logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks)))) rss_count = 0 for book in searchbooks: authorname, bookname = get_searchterm(book, "book") found = processResultList(resultlist, authorname, bookname, book, 'book') # if you can't find the book, try title without any "(extended details, series etc)" if not found and '(' in bookname: # anything to shorten? authorname, bookname = get_searchterm(book, "shortbook") found = processResultList(resultlist, authorname, bookname, book, 'shortbook') if not found: logger.debug( "Searches returned no results. Adding book %s - %s to queue." % (authorname, bookname)) if found > True: rss_count += 1 logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count))) if reset: scheduleJob(action='Restart', target='search_rss_book') except Exception: logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc())
def import_CSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found and marking the books as "Wanted" """ try: if not search_dir: logger.warn("Alternate Directory not configured") return False elif not os.path.isdir(search_dir): logger.warn("Alternate Directory [%s] not found" % search_dir) return False csvFile = csv_file(search_dir) headers = None content = {} if not csvFile: logger.warn(u"No CSV file found in %s" % search_dir) else: logger.debug(u'Reading file %s' % csvFile) reader = csv.reader(open(csvFile)) for row in reader: if reader.line_num == 1: # If we are on the first line, create the headers list from the first row headers = row else: # Otherwise, the key in the content dictionary is the first item in the # row and we can create the sub-dictionary by using the zip() function. # we include the key in the dictionary as our exported csv files use # bookid as the key content[row[0]] = dict(zip(headers, row)) # We can now get to the content by using the resulting dictionary, so to see # the list of lines, we can do: print content.keys() to get a list of keys # To see the list of fields available for each book: print headers if 'Author' not in headers or 'Title' not in headers: logger.warn(u'Invalid CSV file found %s' % csvFile) return myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys())))) for item in content.keys(): authorname = formatAuthorName(content[item]['Author']) authmatch = myDB.match('SELECT * FROM authors where AuthorName="%s"' % authorname) if authmatch: logger.debug(u"CSV: Author %s found in database" % authorname) else: logger.debug(u"CSV: Author %s not found" % authorname) authcount += 1 bookmatch = finditem(content[item], authorname, headers) result = '' if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount += 1 else: searchterm = "%s <ll> %s" % (content[item]['Title'], formatAuthorName(authorname)) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']: logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) bookcount += 1 bookmatch = True if not bookmatch: msg = "Skipping book %s by %s" % (content[item]['Title'], content[item]['Author']) if not result: msg += ', No results returned' logger.warn(msg) else: msg += ', No match found' logger.warn(msg) msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) skipcount += 1 msg = "Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % \ (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount)) logger.info(msg) return msg except Exception: msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc() logger.error(msg) return msg