Ejemplo n.º 1
0
    def addKeyword(self, type=None, title=None, frequency=None, **args):
        myDB = database.DBConnection()
        if type == 'magazine':
            if len(title) == 0:
                raise cherrypy.HTTPRedirect("config")
            else:
                controlValueDict = {"Title": title}
                newValueDict = {
                    "Frequency": frequency,
                    "Regex": None,
                    "Status": "Active",
                    "MagazineAdded": formatter.today(),
                    "IssueStatus": "Wanted"
                }
                myDB.upsert("magazines", newValueDict, controlValueDict)

                mags = []
                mags.append({"bookid": title})
                books = False
                if (lazylibrarian.USE_NZB):
                    threading.Thread(target=search_nzb_book,
                                     args=[books, mags]).start()
                if (lazylibrarian.USE_TOR):
                    threading.Thread(target=search_tor_book,
                                     args=[books, mags]).start()
                logger.debug("Searching for magazine with title: " +
                             str(title))
                raise cherrypy.HTTPRedirect("magazines")
Ejemplo n.º 2
0
    def addKeyword(self, type=None, title=None, frequency=None, **args):
        myDB = database.DBConnection()
        if type == 'magazine':
            if len(title) == 0:
                raise cherrypy.HTTPRedirect("config")
            else:
                controlValueDict = {"Title": title}
                newValueDict = {
                    "Frequency":   frequency,
                    "Regex":   None,
                    "Status":       "Active",
                    "MagazineAdded":    formatter.today(),
                    "IssueStatus": "Wanted"
                    }
                myDB.upsert("magazines", newValueDict, controlValueDict)

                mags = []
                mags.append({"bookid": title})
                books=False
                if (lazylibrarian.USE_NZB):
                    threading.Thread(target=search_nzb_book, args=[books, mags]).start()
                if (lazylibrarian.USE_TOR):
                    threading.Thread(target=search_tor_book, args=[books, mags]).start()
                logger.debug("Searching for magazine with title: " + str(title));
                raise cherrypy.HTTPRedirect("magazines")
Ejemplo n.º 3
0
def addAuthorToDB(authorname=None, refresh=False):

    myDB = database.DBConnection()

    GR = GoodReads(authorname)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace(
        "'", "''")
    dbauthor = myDB.match(query)
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
        logger.debug("Now adding new author: %s to database" % authorname)
    else:
        newValueDict = {"Status": "Loading"}
        logger.debug("Now updating author: %s" % authorname)
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id(refresh=refresh)
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        if 'nophoto' in authorimg:
            authorimg = getAuthorImage(authorid)
        if authorimg and authorimg.startswith('http'):
            newimg = cache_cover(authorid, authorimg)
            if newimg:
                authorimg = newimg
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.warn(u"Nothing found for %s" % authorname)
        myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname)
        return


# process books
    if lazylibrarian.BOOK_API == "GoogleBooks":
        book_api = GoogleBooks()
        book_api.get_author_books(authorid, authorname, refresh=refresh)
    elif lazylibrarian.BOOK_API == "GoodReads":
        GR.get_author_books(authorid, authorname, refresh=refresh)

    update_totals(authorid)
    logger.debug("[%s] Author update complete" % authorname)
Ejemplo n.º 4
0
def addAuthorToDB(authorname=None):
    threading.currentThread().name = "DBIMPORT"
    type = 'author'
    myDB = database.DBConnection()

    GR = GoodReads(authorname, type)
    
    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'","''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID":   "0: %s" % (authorname),
            "Status":       "Loading"
            }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID":     authorid,
            "AuthorLink":   authorlink,
            "AuthorImg":    authorimg,
            "AuthorBorn":   author['authorborn'],
            "AuthorDeath":  author['authordeath'],
            "DateAdded":    formatter.today(),
            "Status":       "Loading"
            }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books
    GR.get_author_books(authorid)

    lastbook = myDB.action("SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' order by BookDate DESC" % authorid).fetchone()
    bookCount = myDB.select("SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" % authorid)
    for count in bookCount:
		controlValueDict = {"AuthorID": authorid}
		newValueDict = {
			"Status": "Active",
			"TotalBooks": count['counter'],
			"LastBook": lastbook['BookName'],
			"LastLink": lastbook['BookLink'],
			"LastDate": lastbook['BookDate']
		}

		myDB.upsert("authors", newValueDict, controlValueDict)
		logger.info("Processing complete: Added %s books to the database" % str(count['counter']))
Ejemplo n.º 5
0
def addAuthorToDB(authorname=None, refresh=False):

    myDB = database.DBConnection()

    GR = GoodReads(authorname)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
        logger.debug("Now adding new author: %s to database" % authorname)
    else:
        newValueDict = {"Status": "Loading"}
        logger.debug("Now updating author: %s" % authorname)
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id(refresh=refresh)
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        if 'nophoto' in authorimg:
            authorimg = getAuthorImage(authorid)
        if authorimg and authorimg.startswith('http'):
            newimg = cache_cover(authorid, authorimg)
            if newimg:
                authorimg = newimg
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.warn(u"Nothing found for %s" % authorname)
        myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname)
        return
# process books
    if lazylibrarian.BOOK_API == "GoogleBooks":
        book_api = GoogleBooks()
        book_api.get_author_books(authorid, authorname, refresh=refresh)
    elif lazylibrarian.BOOK_API == "GoodReads":
        GR.get_author_books(authorid, authorname, refresh=refresh)

    update_totals(authorid)
    logger.debug("[%s] Author update complete" % authorname)
Ejemplo n.º 6
0
def addAuthorToDB(authorname=None, refresh=False):
    threading.currentThread().name = "DBIMPORT"

    myDB = database.DBConnection()

    GR = GoodReads(authorname)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace(
        "'", "''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
        logger.debug("Now adding new author: %s to database" % authorname)
    else:
        newValueDict = {"Status": "Loading"}
        logger.debug("Now updating author: %s" % authorname)
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id(refresh=refresh)
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.warn(u"Nothing found for %s" % authorname)
        myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname)
        return
# process books
    if lazylibrarian.BOOK_API == "GoogleBooks":
        book_api = GoogleBooks()
        book_api.get_author_books(authorid, authorname, refresh=refresh)
    elif lazylibrarian.BOOK_API == "GoodReads":
        GR.get_author_books(authorid, authorname, refresh=refresh)

    logger.debug("[%s] Author update complete" % authorname)
Ejemplo n.º 7
0
def addAuthorToDB(authorname=None, refresh=False):
    threading.currentThread().name = "DBIMPORT"
    #type = 'author'

    myDB = database.DBConnection()

    GR = GoodReads(authorname)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID":   "0: %s" % (authorname),
            "Status":       "Loading"
        }
        logger.info("Now adding new author: %s to database" % authorname)
    else:
        newValueDict = {"Status": "Loading"}
        logger.info("Now updating author: %s" % authorname)
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID":     authorid,
            "AuthorLink":   authorlink,
            "AuthorImg":    authorimg,
            "AuthorBorn":   author['authorborn'],
            "AuthorDeath":  author['authordeath'],
            "DateAdded":    formatter.today(),
            "Status":       "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books
    if lazylibrarian.BOOK_API == "GoogleBooks":
        book_api = GoogleBooks()
        book_api.get_author_books(authorid, authorname, refresh=refresh)
    elif lazylibrarian.BOOK_API == "GoodReads":
        GR.get_author_books(authorid, authorname, refresh=refresh)

    logger.info("[%s] Author update complete" % authorname)
Ejemplo n.º 8
0
 def addKeyword(self, type=None, title=None, frequency=None, **args):
     myDB = database.DBConnection()
     if type == 'magazine':
         if len(title) == 0:
             raise cherrypy.HTTPRedirect("config")
         else:
             controlValueDict = {"Title": title}
             newValueDict = {
                 "Frequency":   frequency,
                 "Regex":   None,
                 "Status":       "Active",
                 "MagazineAdded":    formatter.today(),
                 }
             myDB.upsert("magazines", newValueDict, controlValueDict)
             raise cherrypy.HTTPRedirect("magazines")
Ejemplo n.º 9
0
    def _addMagazine(self, **kwargs):
        if 'name' not in kwargs:
            self.data = 'Missing parameter: name'
            return
        else:
            self.id = kwargs['name']

        controlValueDict = {"Title": self.id}
        newValueDict = {
            "Frequency": None,
            "Regex": None,
            "Status": "Active",
            "MagazineAdded": formatter.today(),
            "IssueStatus": "Wanted"
        }
        myDB.upsert("magazines", newValueDict, controlValueDict)
Ejemplo n.º 10
0
    def _addMagazine(self, **kwargs):
        if 'name' not in kwargs:
            self.data = 'Missing parameter: name'
            return
        else:
            self.id = kwargs['name']

        controlValueDict = {"Title": self.id}
        newValueDict = {
            "Regex": None,
            "Status": "Active",
            "MagazineAdded": today(),
            "IssueStatus": "Wanted",
            "Reject": None
        }
        myDB.upsert("magazines", newValueDict, controlValueDict)
Ejemplo n.º 11
0
    def _addMagazine(self, **kwargs):
        if 'name' not in kwargs:
            self.data = 'Missing parameter: name'
            return
        else:
            self.id = kwargs['name']

        myDB = database.DBConnection()
        controlValueDict = {"Title": self.id}
        newValueDict = {
            "Regex": None,
            "Status": "Active",
            "MagazineAdded": today(),
            "IssueStatus": "Wanted",
            "Reject": None
        }
        myDB.upsert("magazines", newValueDict, controlValueDict)
Ejemplo n.º 12
0
def import_book(pp_path=None, bookID=None):

    # Separated this into a function so we can more easily import books from an alternate directory
    # and move them into LL folder structure given just the bookID, returns True or False
    # eg if import_book(source_directory, bookID):
    #         ppcount = ppcount + 1
    #
    myDB = database.DBConnection()
    data = myDB.select('SELECT * from books WHERE BookID="%s"' % bookID)
    if data:
        authorname = data[0]['AuthorName']
        bookname = data[0]['BookName']

        #try:
        #    auth_dir = os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode(lazylibrarian.SYS_ENCODING)
        #    os.chmod(auth_dir, 0777)
        #except Exception, e:
        #    logger.debug("Could not chmod author directory: " + str(auth_dir))

        if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
            logger.warn('Please check your EBOOK_DEST_FOLDER setting')
            lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')
            
        dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname)
        global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname)
        # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
        dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic))
        dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING)

        processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name, bookID)

        if processBook:
            # update nzbs
            controlValueDict = {"BookID": bookID}
            newValueDict = {"Status": "Processed", "NZBDate": formatter.today()}  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)
            processExtras(myDB, dest_path, global_name, data)
            return True
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s' % pp_path)
            return False
Ejemplo n.º 13
0
def addAuthorToDB(authorname=None):
    threading.currentThread().name = "DBIMPORT"
    type = 'author'
    myDB = database.DBConnection()

    GR = GoodReads(authorname, type)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace(
        "'", "''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books

    GR.get_author_books(authorid)
Ejemplo n.º 14
0
def addAuthorToDB(authorname=None):
    threading.currentThread().name = "DBIMPORT"
    type = 'author'
    myDB = database.DBConnection()

    GR = GoodReads(authorname, type)
    
    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'","''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID":   "0: %s" % (authorname),
            "Status":       "Loading"
            }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID":     authorid,
            "AuthorLink":   authorlink,
            "AuthorImg":    authorimg,
            "AuthorBorn":   author['authorborn'],
            "AuthorDeath":  author['authordeath'],
            "DateAdded":    formatter.today(),
            "Status":       "Loading"
            }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books

    GR.get_author_books(authorid)
Ejemplo n.º 15
0
def get_capabilities(provider):
    """
    query provider for caps if none loaded yet, or if config entry is too old and not set manually. 
    """                
    match = False
    if len(provider['UPDATED']) == 10: # any stored values?
        match = True
        if (formatter.age(provider['UPDATED']) > lazylibrarian.CACHE_AGE) and not provider['MANUAL']:
            logger.debug('Stored capabilities for %s are too old' % provider['HOST'])
            match = False

    if match:
        logger.debug('Using stored capabilities for %s' % provider['HOST'])
    else:
        host = provider['HOST']
        if not str(host)[:4] == "http":
            host = 'http://' + host
        URL = host + '/api?t=caps&apikey=' + provider['API']
        logger.debug('Requesting capabilities for %s' % URL)
        
        request = urllib2.Request(URL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', common.USER_AGENT)
        resp = ""
        try:
            resp = urllib2.urlopen(request, timeout=30)  # don't get stuck
        except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e:
            logger.debug("Error getting capabilities: %s" % e)
            resp = ""
        if resp:
            if str(resp.getcode()).startswith("2"):  # (200 OK etc)
                logger.debug(u"Got capabilities for %s" % request.get_full_url())
                try:
                    source_xml = resp.read()  # .decode('utf-8')
                    data = ElementTree.fromstring(source_xml)
                except:
                    logger.debug(u"Error getting xml from %s" % URL)
                    data = None
                if len(data):
                    logger.debug(u"Parsing xml for capabilities of %s" % URL)
                    
                    ############################################################################# 
                    # book search isn't mentioned in the caps xml returned by
                    # nzbplanet,jackett,oznzb,usenet-crawler, so we can't use it as a test
                    # but the newznab+ ones usually support t=book and categories in 7000 range
                    # whereas nZEDb ones don't support t=book and use categories in 8000 range
                    # also some providers give searchtype but no supportedparams, so we still
                    # can't tell what queries will be accepted
                    # also category names can be lowercase or Mixed, magazine subcat name isn't
                    # consistent, and subcat can be just subcat or category/subcat subcat > lang
                    # eg "Magazines" "Mags" or "Books/Magazines" "Mags > French" 
                    # Load all languages for now as we don't know which the user might want
                    #############################################################################
                    #
                    #  set some defaults
                    #
                    provider['GENERALSEARCH'] = ''
                    provider['EXTENDED'] = '1'
                    provider['BOOKCAT'] = ''
                    provider['MAGCAT'] = ''
                    provider['BOOKSEARCH'] = ''
                    provider['MAGSEARCH'] = ''
                    #
                    search = data.find('searching/search')
                    if search is not None:
                        if 'available' in search.attrib:
                            if search.attrib['available'] == 'yes': 
                                provider['GENERALSEARCH'] = 'search'    
                    categories = data.getiterator('category')
                    for cat in categories:
                        if 'name' in cat.attrib:
                            if cat.attrib['name'].lower() == 'books':
                                bookcat = cat.attrib['id'] # keep main bookcat for later
                                provider['BOOKCAT'] = bookcat
                                provider['MAGCAT'] = ''
                                if provider['BOOKCAT'] == '7000':
                                    # looks like newznab+, should support book-search
                                    provider['BOOKSEARCH'] = 'books'
                                    # but check in case
                                    search = data.find('searching/book-search')
                                    if search is not None:
                                        if 'available' in search.attrib:
                                            if search.attrib['available'] == 'yes': 
                                                provider['BOOKSEARCH'] = 'books'    
                                            else:
                                                provider['BOOKSEARCH'] = ''
                                else:
                                    # looks like nZEDb, probably no book-search
                                    provider['BOOKSEARCH'] = ''
                                    # but check in case
                                    search = data.find('searching/book-search')
                                    if search is not None:
                                        if 'available' in search.attrib:
                                            if search.attrib['available'] == 'yes': 
                                                provider['BOOKSEARCH'] = 'books'    
                                            else:
                                                provider['BOOKSEARCH'] = ''
                                subcats = cat.getiterator('subcat')
                                for subcat in subcats:
                                    if 'ebook' in subcat.attrib['name'].lower():
                                        provider['BOOKCAT'] = "%s,%s" % (provider['BOOKCAT'],subcat.attrib['id'])
                                    if  'magazines' in subcat.attrib['name'].lower() or 'mags' in subcat.attrib['name'].lower():
                                        if provider['MAGCAT']:
                                            provider['MAGCAT'] = "%s,%s" % (provider['MAGCAT'],subcat.attrib['id'])
                                        else:
                                            provider['MAGCAT'] = subcat.attrib['id']
                                # if no specific magazine subcategory, use books
                                if not provider['MAGCAT']:
                                    provider['MAGCAT'] = bookcat
                    logger.debug("Categories: Books %s : Mags %s" % (provider['BOOKCAT'], provider['MAGCAT']))
                    provider['UPDATED'] = formatter.today()
                else:
                    logger.warn(u"Unable to get capabilities for %s: No data returned" % URL)
            else:
                logger.warn(u"Unable to get capabilities for %s: Got %s" % (URL, resp.getcode()))
    return provider
Ejemplo n.º 16
0
    def find_book(self, bookid=None, bookstatus="None"):
        myDB = database.DBConnection()
        if not lazylibrarian.CONFIG['GB_API']:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
              str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API']
        jsonresults, in_cache = gb_json_request(URL)

        if jsonresults is None:
            logger.debug('No results found for %s' % bookid)
            return

        if not bookstatus:
            bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS']

        book = bookdict(jsonresults)
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(book['name'], dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        if not book['author']:
            logger.debug('Book %s does not contain author field, skipping' % bookname)
            return
        # warn if language is in ignore list, but user said they wanted this book
        valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
        if book['lang'] not in valid_langs and 'All' not in valid_langs:
            logger.debug('Book %s googlebooks language does not match preference, %s' % (bookname, book['lang']))

        if lazylibrarian.CONFIG['NO_PUBDATE']:
            if not book['date'] or book['date'] == '0000':
                logger.warn('Book %s Publication date does not match preference, %s' % (bookname, book['date']))

        if lazylibrarian.CONFIG['NO_FUTURE']:
            if book['date'] > today()[:4]:
                logger.warn('Book %s Future publication date does not match preference, %s' % (bookname, book['date']))

        authorname = book['author']
        GR = GoodReads(authorname)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?', (AuthorID,))
            if not match:
                match = myDB.match('SELECT AuthorID from authors WHERE AuthorName=?', (author['authorname'],))
                if match:
                    logger.debug('%s: Changing authorid from %s to %s' %
                                 (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match['AuthorID']  # we have a different authorid for that authorname
                else:  # no author but request to add book, add author with newauthor status
                    # User hit "add book" button from a search or a wishlist import
                    newauthor_status = 'Active'
                    if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in ['Skipped', 'Ignored']:
                        newauthor_status = 'Paused'
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": newauthor_status
                    }
                    authorname = author['authorname']
                    myDB.upsert("authors", newValueDict, controlValueDict)
                    if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']:
                        self.get_author_books(AuthorID, entrystatus=lazylibrarian.CONFIG['NEWAUTHOR_STATUS'])
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" % (book['author'], bookname))
            return

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": book['sub'],
            "BookDesc": book['desc'],
            "BookIsbn": book['isbn'],
            "BookPub": book['pub'],
            "BookGenre": book['genre'],
            "BookImg": book['img'],
            "BookLink": book['link'],
            "BookRate": float(book['rate']),
            "BookPages": book['pages'],
            "BookDate": book['date'],
            "BookLang": book['lang'],
            "Status": bookstatus,
            "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'],
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s by %s added to the books database" % (bookname, authorname))

        if 'nocover' in book['img'] or 'nophoto' in book['img']:
            # try to get a cover from another source
            workcover, source = getBookCover(bookid)
            if workcover:
                logger.debug('Updated cover for %s using %s' % (bookname, source))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif book['img'] and book['img'].startswith('http'):
                link, success, _ = cache_img("book", bookid, book['img'])
                if success:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    logger.debug('Failed to cache image for %s' % book['img'])

        serieslist = []
        if book['series']:
            serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))]
        if lazylibrarian.CONFIG['ADD_SERIES']:
            newserieslist = getWorkSeries(bookid)
            if newserieslist:
                serieslist = newserieslist
                logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
        setSeries(serieslist, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Ejemplo n.º 17
0
def addBookToDB(bookid, authorname):
    type = 'book'
    myDB = database.DBConnection()
    GR = GoodReads(authorname, type)
    GB = GoogleBooks(bookid, type)

    # process book
    dbbook = myDB.action('SELECT * from books WHERE BookID=?',
                         [bookid]).fetchone()
    controlValueDict = {"BookID": bookid}

    if dbbook is None:
        newValueDict = {"BookID": "BookID: %s" % (bookid), "Status": "Loading"}
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("books", newValueDict, controlValueDict)

    book = GR.find_book()

    if not book:
        logger.warn("Error fetching bookinfo for BookID: " + bookid)

    else:
        controlValueDict = {"BookID": book['bookid']}
        newValueDict = {
            "AuthorName": book['authorname'],
            "BookName": book['bookname'],
            "BookDesc": book['bookdesc'],
            "BookIsbn": book['bookisbn'],
            "BookImg": book['bookimg'],
            "BookLink": book['booklink'],
            "BookRate": book['bookrate'],
            "BookPages": book['bookpages'],
            "BookDate": book['bookdate'],
            "BookLang": book['booklang'],
            "Status": "Skipped",
            "BookAdded": formatter.today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)


# process author
    dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'",
                           [authorname]).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorName": "Authorname: %s" % (authorname),
            "Status": "Loading"
        }
    else:
        newValueDict = {"Status": "Loading"}

    author = GR.find_author_id()

    if not author:
        logger.warn("Error fetching authorinfo with name: " + authorname)

    else:
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": author['authorid'],
            "AuthorLink": author['authorlink'],
            "AuthorImg": author['authorimg'],
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
Ejemplo n.º 18
0
    def get_author_books(self, authorid=None, authorname=None, refresh=False):
        books_dict=[]
        set_url = self.url + urllib.quote('inauthor:' + '"' + authorname + '"')
        URL = set_url + '&' + urllib.urlencode(self.params)

        api_hits = 0
        logger.info('[%s] Now processing books with Google Books API' % authorname)

        #Artist is loading
        myDB = database.DBConnection()
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": "Loading"}
        myDB.upsert("authors", newValueDict, controlValueDict)

        try:
            startindex = 0
            resultcount = 0
            removedResults = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0

            while True:

                self.params['startIndex'] = startindex
                URL = set_url + '&' + urllib.urlencode(self.params)

                try:
                    jsonresults = json.JSONDecoder().decode(urllib2.urlopen(URL, timeout=30).read())
                    api_hits = api_hits + 1
                    number_results = jsonresults['totalItems']
                    logger.debug('[%s] Searching url: %s' % (authorname, URL))
                    if number_results == 0:
                        logger.info('Found no results for %s with value: %s' % (api_value, self.name))
                        break
                    else:
                        pass
                except HTTPError, err:
                    logger.Error('Google API returned HTTP Error - probably time/rate limiting - [%s]' % err.msg)
                    
                startindex = startindex+40

                for item in jsonresults['items']:

                    total_count = total_count + 1

                    # skip if no author, no author is no book.
                    try:
                        Author = item['volumeInfo']['authors'][0]
                    except KeyError:
                        logger.debug('Skipped a result without authorfield.')
                        continue

                    try:
                        #skip if language is in ignore list
                        booklang = item['volumeInfo']['language']
                        valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])
                        if booklang not in valid_langs:
                            logger.debug('Skipped a book with language %s' % booklang)
                            ignored = ignored + 1
                            continue
                    except KeyError:
                        ignored = ignored+1
                        logger.debug('Skipped a result where no language is found')
                        continue

                    try:
                        bookpub = item['volumeInfo']['publisher']
                    except KeyError:
                        bookpub = None

                    try:
                        booksub = item['volumeInfo']['subtitle']
                    except KeyError:
                        booksub = None

                    try:
                        bookdate = item['volumeInfo']['publishedDate']
                    except KeyError:
                        bookdate = '0000-00-00'

                    try:
                        bookimg = item['volumeInfo']['imageLinks']['thumbnail']
                    except KeyError:
                        bookimg = 'images/nocover.png'

                    try:
                        bookrate = item['volumeInfo']['averageRating']
                    except KeyError:
                        bookrate = 0

                    try:
                        bookpages = item['volumeInfo']['pageCount']
                    except KeyError:
                        bookpages = 0

                    try:
                        bookgenre = item['volumeInfo']['categories'][0]
                    except KeyError:
                        bookgenre = None

                    try:
                        bookdesc = item['volumeInfo']['description']
                    except KeyError:
                        bookdesc = None

                    try:
                        if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                            bookisbn = item['volumeInfo']['industryIdentifiers'][0]['identifier']
                        else:
                            bookisbn = None
                    except KeyError:
                        bookisbn = None

                    bookid = item['id']
#  Darkie67:
#        replacing German Umlauts and filtering out ":"
#
                    booknamealt = item['volumeInfo']['title']
                    booknametmp1=booknamealt.replace(u'\xf6',u'oe')
                    booknametmp2=booknametmp1.replace(u'\xe4',u'ae')
                    booknametmp3=booknametmp2.replace(u'\xdf',u'ss')
                    booknametmp4=booknametmp3.replace(u'\xc4',u'Ae')
                    booknametmp5=booknametmp4.replace(u'\xdc',u'Ue')
                    booknametmp6=booknametmp5.replace(u'\xd6',u'Oe')
                    booknametmp7=booknametmp6.replace(':','')
                    bookname=booknametmp7.replace(u'\xfc',u'ue')
# Darkie67 end
                    booklink = item['volumeInfo']['canonicalVolumeLink']
                    bookrate = float(bookrate)

                    find_book_status = myDB.select("SELECT * FROM books WHERE BookID = '%s'" % bookid)
                    if find_book_status:
                        for resulted in find_book_status:
                            book_status = resulted['Status']
                    else:
                        book_status = "Skipped"


                    if not (re.match('[^\w-]', bookname)): #remove books with bad caracters in title
                        if book_status != "Ignored":
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {
                                "AuthorName":   authorname,
                                "AuthorID":     authorid,
                                "AuthorLink":   "",
                                "BookName":     bookname,
                                "BookSub":      booksub,
                                "BookDesc":     bookdesc,
                                "BookIsbn":     bookisbn,
                                "BookPub":      bookpub,
                                "BookGenre":    bookgenre,
                                "BookImg":      bookimg,
                                "BookLink":     booklink,
                                "BookRate":     bookrate,
                                "BookPages":    bookpages,
                                "BookDate":     bookdate,
                                "BookLang":     booklang,
                                "Status":       book_status,
                                "BookAdded":    formatter.today(),
                                "Series":       None,
                                "SeriesOrder":  None
                            }
                            resultcount = resultcount + 1

                            myDB.upsert("books", newValueDict, controlValueDict)
                            logger.debug(u"book found " + bookname + " " + bookdate)
                            if not find_book_status:
                                logger.info("[%s] Added book: %s" % (authorname, bookname))
                                added_count = added_count + 1
                            else:
                                updated_count = updated_count + 1
                                logger.info("[%s] Updated book: %s" % (authorname, bookname))
                        else:
                            book_ignore_count = book_ignore_count + 1
                    else:
                        removedResults = removedResults + 1

                    if startindex >= number_results:
                        break
                    else:
                        continue

        except KeyError:
            pass

        logger.info('[%s] The Google Books API was hit %s times to populate book list' % (authorname, str(api_hits)))

        lastbook = myDB.action("SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' AND Status != 'Ignored' order by BookDate DESC" % authorid).fetchone()
        unignoredbooks = myDB.select("SELECT COUNT(BookName) as unignored FROM books WHERE AuthorID='%s' AND Status != 'Ignored'" % authorid)
        bookCount = myDB.select("SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" % authorid)   

        controlValueDict = {"AuthorID": authorid}
        newValueDict = {
                "Status": "Active",
                "TotalBooks": bookCount[0]['counter'],
                "UnignoredBooks": unignoredbooks[0]['unignored'],
                "LastBook": lastbook['BookName'],
                "LastLink": lastbook['BookLink'],
                "LastDate": lastbook['BookDate']
                }
        myDB.upsert("authors", newValueDict, controlValueDict)

                   
        logger.debug("Found %s total books for author" % total_count)
        logger.debug("Removed %s bad language results for author" % ignored)
        logger.debug("Removed %s bad character results for author" % removedResults)
        logger.debug("Ignored %s books by author marked as Ignored" % book_ignore_count)
        logger.debug("Imported/Updated %s books for author" % resultcount)

        if refresh:
            logger.info("[%s] Book processing complete: Added %s books / Updated %s books" % (authorname, str(added_count), str(updated_count)))
        else:
            logger.info("[%s] Book processing complete: Added %s books to the database" % (authorname, str(added_count)))
        return books_dict
Ejemplo n.º 19
0
						    "AuthorID":     authorid,
						    "AuthorLink":   "",
						    "BookName":     book.find('title').text,
						    "BookSub":      "",
						    "BookDesc":     book.find('description').text,
						    "BookIsbn":     book.find('isbn').text,
						    "BookPub":      book.find('publisher').text,
						    "BookGenre":    "",
						    "BookImg":      bookimg,
						    "BookLink":     book.find('link').text,
						    "BookRate":     float(book.find('average_rating').text),
						    "BookPages":    book.find('num_pages').text,
						    "BookDate":     pubyear,
						    "BookLang":     bookLanguage,
						    "Status":       "Skipped",
						    "BookAdded":    formatter.today()
						}

						myDB.upsert("books", newValueDict, controlValueDict)
						logger.debug(u"book found " + book.find('title').text + " " + pubyear)
						resultsCount = resultsCount + 1
						
						lastbook = myDB.action("SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' order by BookDate DESC" % authorid).fetchone()
						bookCount = myDB.select("SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" % authorid)			
						for count in bookCount:
						    controlValueDict = {"AuthorID": authorid}
						    newValueDict = {
						            "Status": "Active",
						            "TotalBooks": count['counter'],
						            "LastBook": lastbook['BookName'],
						            "LastLink": lastbook['BookLink'],
Ejemplo n.º 20
0
    def get_author_books(self, authorid=None, authorname=None, refresh=False):

        logger.debug(
            '[%s] Now processing books with Google Books API' %
            authorname)
        # google doesnt like accents in author names
        aname = unidecode(u'%s' % authorname)

        set_url = self.url + urllib.quote('inauthor:' + '"' + aname + '"')
        URL = set_url + '&' + urllib.urlencode(self.params)

        books_dict = []
        api_hits = 0
        gr_lang_hits = 0
        lt_lang_hits = 0
        gb_lang_change = 0
        cache_hits = 0
        not_cached = 0

        # Artist is loading
        myDB = database.DBConnection()
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": "Loading"}
        myDB.upsert("authors", newValueDict, controlValueDict)

        try:
            startindex = 0
            resultcount = 0
            removedResults = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = ([valid_lang.strip()
                           for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])

            while startindex < number_results:

                self.params['startIndex'] = startindex
                URL = set_url + '&' + urllib.urlencode(self.params)

                try:
                    jsonresults, in_cache = self.get_request(URL)
                    if jsonresults is None:
                        number_results = 0
                    else:
                        if not in_cache:
                            api_hits = api_hits + 1
                        number_results = jsonresults['totalItems']
                except HTTPError as err:
                    logger.warn(
                        'Google Books API Error [%s]: Check your API key or wait a while' %
                        err.reason)
                    break

                if number_results == 0:
                    logger.warn('Found no results for %s' % (authorname))
                    break
                else:
                    logger.debug(
                        'Found %s results for %s' %
                        (number_results, authorname))

                startindex = startindex + 40

                for item in jsonresults['items']:

                    total_count = total_count + 1

                    # skip if no author, no author is no book.
                    try:
                        Author = item['volumeInfo']['authors'][0]
                    except KeyError:
                        logger.debug('Skipped a result without authorfield.')
                        continue

                    try:
                        if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                            bookisbn = item['volumeInfo'][
                                'industryIdentifiers'][0]['identifier']
                        else:
                            bookisbn = ""
                    except KeyError:
                        bookisbn = ""

                    isbnhead = ""
                    if len(bookisbn) == 10:
                        isbnhead = bookisbn[0:3]

                    try:
                        booklang = item['volumeInfo']['language']
                    except KeyError:
                        booklang = "Unknown"

                    # do we care about language?
                    if "All" not in valid_langs:
                        if bookisbn != "":
                            # seems google lies to us, sometimes tells us books
                            # are in english when they are not
                            if booklang == "Unknown" or booklang == "en":
                                googlelang = booklang
                                match = myDB.action('SELECT lang FROM languages where isbn = "%s"' %
                                                    (isbnhead)).fetchone()
                                if (match):
                                    booklang = match['lang']
                                    cache_hits = cache_hits + 1
                                    logger.debug(
                                        "Found cached language [%s] for [%s]" %
                                        (booklang, isbnhead))

                                else:
                                    # no match in cache, try searching librarything for a language code using the isbn
                                    # if no language found, librarything return value is "invalid" or "unknown"
                                    # librarything returns plain text, not xml
                                    BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + \
                                        bookisbn
                                    try:
                                        time.sleep(1)  # sleep 1 second to respect librarything api terms
                                        resp = urllib2.urlopen(BOOK_URL, timeout=30).read()
                                        lt_lang_hits = lt_lang_hits + 1
                                        logger.debug(
                                            "LibraryThing reports language [%s] for %s" % (resp, isbnhead))

                                        if (resp != 'invalid' and resp != 'unknown'):
                                            booklang = resp  # found a language code
                                            myDB.action('insert into languages values ("%s", "%s")' %
                                                        (isbnhead, booklang))
                                            logger.debug(u"LT language: " + booklang)
                                    except Exception as e:
                                        booklang = ""
                                        logger.error("Error finding language: %s" % e)

                                if googlelang == "en" and booklang not in "en-US, en-GB, eng":
                                    # these are all english, may need to expand
                                    # this list
                                    booknamealt = item['volumeInfo']['title']
                                    logger.debug("%s Google thinks [%s], we think [%s]" %
                                                 (booknamealt, googlelang, booklang))
                                    gb_lang_change = gb_lang_change + 1
                            else:
                                match = myDB.action('SELECT lang FROM languages where isbn = "%s"' %
                                                    (isbnhead)).fetchone()
                                if (not match):
                                    myDB.action(
                                        'insert into languages values ("%s", "%s")' %
                                        (isbnhead, booklang))
                                    logger.debug(u"GB language: " + booklang)

                        # skip if language is in ignore list
                        if booklang not in valid_langs:
                            booknamealt = item['volumeInfo']['title']
                            logger.debug(
                                'Skipped [%s] with language %s' %
                                (booknamealt, booklang))
                            ignored = ignored + 1
                            continue

                    try:
                        bookpub = item['volumeInfo']['publisher']
                    except KeyError:
                        bookpub = None
                    try:
                        booksub = item['volumeInfo']['subtitle']
                        try:
                            series = booksub.split('(')[1].split(' Series ')[0]
                        except IndexError:
                            series = None
                        try:
                            seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0]
                            if seriesNum[0] == '#':
                                seriesNum = seriesNum[1:]
                        except IndexError:
                            seriesNum = None
                    except KeyError:
                        booksub = None

                    try:
                        bookdate = item['volumeInfo']['publishedDate']
                    except KeyError:
                        bookdate = '0000-00-00'

                    try:
                        bookimg = item['volumeInfo']['imageLinks']['thumbnail']
                    except KeyError:
                        bookimg = 'images/nocover.png'

                    try:
                        bookrate = item['volumeInfo']['averageRating']
                    except KeyError:
                        bookrate = 0

                    try:
                        bookpages = item['volumeInfo']['pageCount']
                    except KeyError:
                        bookpages = 0

                    try:
                        bookgenre = item['volumeInfo']['categories'][0]
                    except KeyError:
                        bookgenre = None

                    try:
                        bookdesc = item['volumeInfo']['description']
                    except KeyError:
                        bookdesc = None
                        
                    bookname = item['volumeInfo']['title']
                    bookname = bookname.replace(':', '').replace('"', '').replace("'", "")
                    bookname = unidecode(u'%s' % bookname)
                    bookname = bookname.strip()  # strip whitespace

                    booklink = item['volumeInfo']['canonicalVolumeLink']
                    bookrate = float(bookrate)

                    find_book_status = myDB.select(
                        'SELECT * FROM books WHERE BookID = "%s"' %
                        bookid)
                    if find_book_status:
                        for resulted in find_book_status:
                            book_status = resulted['Status']
                    else:
                        book_status = lazylibrarian.NEWBOOK_STATUS

                    if not (re.match('[^\w-]', bookname)):  # remove books with bad characters in title
                        if book_status != "Ignored":
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {
                                "AuthorName": authorname,
                                "AuthorID": authorid,
                                "AuthorLink": "",
                                "BookName": bookname,
                                "BookSub": booksub,
                                "BookDesc": bookdesc,
                                "BookIsbn": bookisbn,
                                "BookPub": bookpub,
                                "BookGenre": bookgenre,
                                "BookImg": bookimg,
                                "BookLink": booklink,
                                "BookRate": bookrate,
                                "BookPages": bookpages,
                                "BookDate": bookdate,
                                "BookLang": booklang,
                                "Status": book_status,
                                "BookAdded": formatter.today(),
                                "Series": series,
                                "SeriesNum": seriesNum
                            }
                            resultcount = resultcount + 1

                            myDB.upsert("books", newValueDict, controlValueDict)
                            logger.debug(u"Book found: " + bookname + " " + bookdate)

                            if 'nocover' in bookimg or 'nophoto' in bookimg:
                                # try to get a cover from librarything
                                workcover = bookwork.getBookCover(bookid)
                                if workcover:
                                    logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))    
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": workcover}
                                    myDB.upsert("books", newValueDict, controlValueDict)
         
                            elif bookimg.startswith('http'):
                                link = bookwork.cache_cover(bookid, bookimg)
                                if link is not None:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": link}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            if seriesNum == None:
                                # try to get series info from librarything
                                series, seriesNum = bookwork.getWorkSeries(bookid)
                                if seriesNum:
                                    logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))    
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {
                                        "Series": series,
                                        "SeriesNum": seriesNum
                                    }
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            worklink = bookwork.getWorkPage(bookid)
                            if worklink:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {"WorkPage": worklink}
                                myDB.upsert("books", newValueDict, controlValueDict)

                            if not find_book_status:
                                logger.debug("[%s] Added book: %s [%s]" % (authorname, bookname, booklang))
                                added_count = added_count + 1
                            else:
                                updated_count = updated_count + 1
                                logger.debug("[%s] Updated book: %s" % (authorname, bookname))
                        else:
                            book_ignore_count = book_ignore_count + 1
                    else:
                        logger.debug(
                            "[%s] removed book for bad characters" %
                            (bookname))
                        removedResults = removedResults + 1

        except KeyError:
            pass

        logger.debug('[%s] The Google Books API was hit %s times to populate book list' %
                     (authorname, str(api_hits)))

        lastbook = myDB.action('SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \
                               AND Status != "Ignored" order by BookDate DESC' % authorid).fetchone()

        if lastbook:  # maybe there are no books [remaining] for this author
            lastbookname = lastbook['BookName']
            lastbooklink = lastbook['BookLink']
            lastbookdate = lastbook['BookDate']
        else:
            lastbookname = None
            lastbooklink = None
            lastbookdate = None

        controlValueDict = {"AuthorID": authorid}
        newValueDict = {
            "Status": "Active",
            "LastBook": lastbookname,
            "LastLink": lastbooklink,
            "LastDate": lastbookdate
        }

        myDB.upsert("authors", newValueDict, controlValueDict)

        logger.debug("Found %s total books for author" % total_count)
        logger.debug("Removed %s bad language results for author" % ignored)
        logger.debug(
            "Removed %s bad character results for author" %
            removedResults)
        logger.debug(
            "Ignored %s books by author marked as Ignored" %
            book_ignore_count)
        logger.debug("Imported/Updated %s books for author" % resultcount)

        myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i)' %
                    (authorname, api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits,
                     ignored, removedResults, not_cached))
            
        if refresh:
            logger.info("[%s] Book processing complete: Added %s books / Updated %s books" %
                        (authorname, str(added_count), str(updated_count)))
        else:
            logger.info("[%s] Book processing complete: Added %s books to the database" %
                        (authorname, str(added_count)))

        return books_dict
Ejemplo n.º 21
0
    def find_book(self, bookid=None, queue=None):
        threading.currentThread().name = "GB-ADD-BOOK"
        myDB = database.DBConnection()

        URL = 'https://www.googleapis.com/books/v1/volumes/' + str(
            bookid) + "?key=" + lazylibrarian.GB_API
        jsonresults = json.JSONDecoder().decode(
            urllib2.urlopen(URL, timeout=30).read())

        bookname = jsonresults['volumeInfo']['title']

        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug('Book %s does not contain author field' % bookname)

        try:
            #skip if language is in ignore list
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = ([
                valid_lang.strip()
                for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')
            ])
            if booklang not in valid_langs:
                logger.debug('Book %s language does not match preference' %
                             bookname)
        except KeyError:
            logger.debug('Book does not have language field')

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = None

        try:
            booksub = jsonresults['volumeInfo']['subtitle']
        except KeyError:
            booksub = None

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = None

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = None

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'identifier']
            else:
                bookisbn = None
        except KeyError:
            bookisbn = None

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        name = jsonresults['volumeInfo']['authors'][0]
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": "",
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": formatter.today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)
Ejemplo n.º 22
0
def processDir(force=False, reset=False):
    # rename this thread
    threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError:
        logger.error('Could not access [%s] directory ' % processpath)
        return False

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if force is False and len(snatched) == 0:
        logger.info('Nothing marked as snatched. Stopping postprocessor job.')
        common.schedule_job(action='Stop', target='processDir')
    elif len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
    else:
        ppcount = 0
        for book in snatched:
            found = False
            for fname in downloads:
                if not fname.endswith('.fail'):  # has this failed before?
                    # this is to get round unicode differences in torrent filenames.
                    # there might be a better way...
                    if isinstance(fname, str):
                        matchname = fname.decode('utf-8')
                    else:
                        matchname = fname
                    if 'LL.(' in matchname:
                        matchname = matchname.split('LL.(')[0]
                    match = fuzz.token_set_ratio(matchname, book['NZBtitle'])
                    if match >= 95:
                        pp_path = os.path.join(processpath, fname)
                        logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle']))
                        found = True
                        break
            if found:
                data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID'])
                if data:
                    authorname = data[0]['AuthorName']
                    bookname = data[0]['BookName']
                    if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                        logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                        lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

                    # Default destination path, should be allowed change per config file.
                    dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    # dest_path = authorname+'/'+bookname
                    # global_name = bookname + ' - ' + authorname
                    # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic))
                    dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                        lazylibrarian.SYS_ENCODING)
                else:
                    data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                    if data:
                        # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                        # files are downloading, there will be an error in post-processing, trying to go to the
                        # same directory.
                        mostrecentissue = data[0]['IssueDate']  # keep for processing issues arriving out of order
                        # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                        mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic))
                        # book auxinfo is a cleaned date, eg 2015-01-01
                        dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', mag_name)
                        # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                        if lazylibrarian.MAG_RELATIVE:
                            if dest_path[0] not in '._':
                                dest_path = '_' + dest_path
                            dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                                lazylibrarian.SYS_ENCODING)
                        else:
                            dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                        authorname = None
                        bookname = None
                        global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                            '$Title', mag_name)
                        # global_name = book['AuxInfo']+' - '+title
                    else:
                        logger.debug("Snatched magazine %s is not in download directory" % (book['BookID']))
                        continue
            else:
                logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
                continue

            # try:
            #    os.chmod(dest_path, 0777)
            # except Exception, e:
            #    logger.debug("Could not chmod post-process directory: " + str(dest_path))

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

            if processBook:
                logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
                # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
                controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Processed", "NZBDate": formatter.now()}  # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)

                if bookname is not None:  # it's a book, if None it's a magazine
                    processExtras(myDB, dest_path, global_name, data)
                else:
                    # update mags
                    controlValueDict = {"Title": book['BookID']}
                    if mostrecentissue > book['AuxInfo']:  # check this in case processing issues arriving out of order
                        newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"}
                    else:
                        newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(),
                                        "IssueStatus": "Open"}
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    # dest_path is where we put the magazine after processing, but we don't have the full filename
                    # so look for any "book" in that directory
                    dest_file = book_file(dest_path, booktype='mag')
                    controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                    newValueDict = {"IssueAcquired": formatter.today(), 
                                    "IssueFile": dest_file,
                                    "IssueID" : magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                   }
                    myDB.upsert("issues", newValueDict, controlValueDict)

                    # create a thumbnail cover for the new issue
                    magazinescan.create_cover(dest_file)

                logger.info('Successfully processed: %s' % global_name)
                ppcount = ppcount + 1
                notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now())
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s.fail' % pp_path)
                # at this point, as it failed we should move it or it will get postprocessed 
                # again (and fail again)
                try:
                    os.rename(pp_path, pp_path + '.fail')
                except:
                    logger.debug("Unable to rename %s" % pp_path)

        downloads = os.listdir(processpath)  # check in case we processed/deleted some above
        for directory in downloads:
            if "LL.(" in directory and not directory.endswith('.fail'):
                bookID = str(directory).split("LL.(")[1].split(")")[0]
                logger.debug("Book with id: " + str(bookID) + " is in downloads")
                pp_path = os.path.join(processpath, directory)

                if os.path.isfile(pp_path):
                    pp_path = os.path.join(processpath)

                if (os.path.isdir(pp_path)):
                    logger.debug('Found LL folder %s.' % pp_path)
                if import_book(pp_path, bookID):
                    ppcount = ppcount + 1

        if ppcount:
            logger.info('%s books/mags have been processed.' % ppcount)
        else:
            logger.info('No snatched books/mags have been found')
    if reset == True:
        common.schedule_job(action='Restart', target='processDir')
Ejemplo n.º 23
0
def addAuthorToDB(authorname=None):
    threading.currentThread().name = "DBIMPORT"
    type = 'author'
    myDB = database.DBConnection()

    GR = GoodReads(authorname, type)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace(
        "'", "''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books
    bookscount = 0
    books = GR.get_author_books(authorid)
    for book in books:
        controlValueDict = {"BookID": book['bookid']}
        newValueDict = {
            "AuthorName": book['authorname'],
            "AuthorID": authorid,
            "AuthorLink": authorimg,
            "BookName": book['bookname'],
            "BookSub": book['booksub'],
            "BookDesc": book['bookdesc'],
            "BookIsbn": book['bookisbn'],
            "BookPub": book['bookpub'],
            "BookGenre": book['bookgenre'],
            "BookImg": book['bookimg'],
            "BookLink": book['booklink'],
            "BookRate": book['bookrate'],
            "BookPages": book['bookpages'],
            "BookDate": book['bookdate'],
            "BookLang": book['booklang'],
            "Status": "Skipped",
            "BookAdded": formatter.today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        bookscount = bookscount + 1

    lastbook = myDB.action(
        "SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' order by BookDate DESC"
        % authorid).fetchone()
    bookCount = myDB.select(
        "SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" %
        authorid)

    for count in bookCount:
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {
            "Status": "Active",
            "TotalBooks": count['counter'],
            "LastBook": lastbook['BookName'],
            "LastLink": lastbook['BookLink'],
            "LastDate": lastbook['BookDate']
        }

        myDB.upsert("authors", newValueDict, controlValueDict)
        logger.info("Processing complete: Added %s books to the database" %
                    str(count['counter']))
Ejemplo n.º 24
0
    def get_author_books(self, authorid=None, authorname=None, refresh=False):

        api_hits = 0
        gr_lang_hits = 0
        lt_lang_hits = 0
        gb_lang_change = 0
        cache_hits = 0
        not_cached = 0
        URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params)

        # Artist is loading
        myDB = database.DBConnection()
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": "Loading"}
        myDB.upsert("authors", newValueDict, controlValueDict)
        books_dict = []
        try:
            rootxml, in_cache = self.get_request(URL)
        except Exception as e:
            logger.error("Error fetching author books: %s" % e)
            return books_dict
        if rootxml is None:
            logger.debug("Error requesting author books")
            return books_dict
        if not in_cache:
            api_hits = api_hits + 1
        resultxml = rootxml.getiterator('book')

        valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])

        if not len(resultxml):
            logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid))
        else:
            logger.debug("[%s] Now processing books with GoodReads API" % authorname)

            resultsCount = 0
            removedResults = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            logger.debug(u"url " + URL)

            authorNameResult = rootxml.find('./author/name').text
            logger.debug(u"author name " + authorNameResult)
            loopCount = 1

            while resultxml is not None:
                for book in resultxml:
                    total_count = total_count + 1

                    if (book.find('publication_year').text is None):
                        pubyear = "0000"
                    else:
                        pubyear = book.find('publication_year').text

                    try:
                        bookimg = book.find('image_url').text
                        if ('nocover' in bookimg):
                            bookimg = 'images/nocover.png'
                    except KeyError,AttributeError:
                        bookimg = 'images/nocover.png'

    # PAB this next section tries to get the book language using the isbn13 to look it up. If no isbn13 we skip the
    # book entirely, rather than including it with an "Unknown" language. Changed this so we can still include the book
    # with language set to "Unknown". There is a setting in config.ini to allow or skip books with "Unknown" language
    # if you really don't want to include them.
    # Not all GR books have isbn13 filled in, but all have a GR bookid, which we've already got, so use that.
    # Also, with GR API rules we can only call the API once per second, which slows us down a lot when all we want
    # is to get the language. We sleep for one second per book that GR knows about for each author you have in your
    # library. The libraryThing API has the same 1 second restriction, and is limited to 1000 hits per day, but has
    # fewer books with unknown language. To get around this and speed up the process, see if we already have a book
    # in the database with a similar start to the ISBN. The way ISBNs work, digits 3-5 of a 13 char ISBN or digits 0-2
    # of a 10 digit ISBN indicate the region/language so if two books have the same 3 digit isbn code, they _should_
    # be the same language.
    # I ran a simple python script on my library of 1500 books, and these codes were 100% correct on matching book
    # languages, no mis-matches. It did result in a small number of books with "unknown" language being wrongly matched
    # but most "unknown" were matched to the correct language.
    # We could look up ISBNs we already know about in the database, but this only holds books in the languages we want
    # to keep, which reduces the number of cache hits, so we create a new database table, holding ALL results including
    # the ISBNs for languages we don't want and books we reject.
    # The new table is created (if not exists) in init.py so by the time we get here there is an existing table.
    # If we haven't an already matching partial ISBN, look up language code from libraryThing
    # "http://www.librarything.com/api/thingLang.php?isbn=1234567890"
    # If you find a matching language, add it to the database.  If "unknown" or "invalid", try GR as maybe GR can
    # provide a match.
    # If both LT and GR return unknown, add isbn to db as "unknown". No point in repeatedly asking LT for a code
    # it's told you it doesn't know.
    # As an extra option, if language includes "All" in config.ini, we can skip this whole section and process
    # everything much faster by not querying for language at all.
    # It does mean we include a lot of unwanted foreign translations in the database, but it's _much_ faster.

                    bookLanguage = "Unknown"
                    find_field = "id"
                    isbn = ""
                    isbnhead = ""
                    if "All" not in valid_langs:  # do we care about language
                        if (book.find('isbn').text is not None):
                            find_field = "isbn"
                            isbn = book.find('isbn').text
                            isbnhead = isbn[0:3]
                        else:
                            if (book.find('isbn13').text is not None):
                                find_field = "isbn13"
                                isbn = book.find('isbn13').text
                                isbnhead = isbn[3:6]
                        if (find_field != 'id'):  # isbn or isbn13 found

                            match = myDB.action('SELECT lang FROM languages where isbn = "%s"' %
                                                (isbnhead)).fetchone()
                            if (match):
                                bookLanguage = match['lang']
                                cache_hits = cache_hits + 1
                                logger.debug("Found cached language [%s] for %s [%s]" %
                                             (bookLanguage, find_field, isbnhead))
                            else:
                                # no match in cache, try searching librarything for a language code using the isbn
                                # if no language found, librarything return value is "invalid" or "unknown"
                                # returns plain text, not xml
                                BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn
                                try:
                                    time_now = int(time.time())
                                    if time_now <= lazylibrarian.LAST_LIBRARYTHING:  # called within the last second?
                                        time.sleep(1)  # sleep 1 second to respect librarything api terms

                                    resp = urllib2.urlopen(BOOK_URL, timeout=30).read()
                                    lazylibrarian.LAST_LIBRARYTHING = time_now
                                    lt_lang_hits = lt_lang_hits + 1
                                    logger.debug("LibraryThing reports language [%s] for %s" % (resp, isbnhead))

                                    if (resp == 'invalid' or resp == 'unknown'):
                                        find_field = "id"  # reset the field to force search on goodreads
                                    else:
                                        bookLanguage = resp  # found a language code
                                        myDB.action('insert into languages values ("%s", "%s")' %
                                                    (isbnhead, bookLanguage))
                                        logger.debug(u"LT language: " + bookLanguage)
                                except Exception as e:
                                    find_field = "id"  # reset the field to search on goodreads
                                    logger.error("Error finding LT language result: %s" % e)

                        if (find_field == 'id'):
                            # [or bookLanguage == "Unknown"] no earlier match, we'll have to search the goodreads api
                            try:
                                if (book.find(find_field).text is not None):
                                    BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \
                                        book.find(find_field).text + '&' + urllib.urlencode(self.params)
                                    logger.debug(u"Book URL: " + BOOK_URL)

                                    try:
                                        time_now = int(time.time())
                                        if time_now <= lazylibrarian.LAST_GOODREADS:
                                            time.sleep(1)

                                        BOOK_rootxml, in_cache = self.get_request(BOOK_URL)
                                        if BOOK_rootxml is None:
                                            logger.debug('Error requesting book language code')
                                            bookLanguage = ""
                                        else:
                                            if not in_cache:
                                                # only update last_goodreads if the result wasn't found in the cache
                                                lazylibrarian.LAST_GOODREADS = time_now
                                            bookLanguage = BOOK_rootxml.find('./book/language_code').text
                                    except Exception as e:
                                        logger.error("Error finding book results: %s" % e)
                                    if not in_cache:
                                        gr_lang_hits = gr_lang_hits + 1
                                    if not bookLanguage:
                                        bookLanguage = "Unknown"

                                    if (isbnhead != ""):
                                        # GR didn't give an isbn so we can't cache it, just use language for this book
                                        myDB.action('insert into languages values ("%s", "%s")' %
                                                    (isbnhead, bookLanguage))
                                        logger.debug("GoodReads reports language [%s] for %s" %
                                                     (bookLanguage, isbnhead))
                                    else:
                                        not_cached = not_cached + 1

                                    logger.debug(u"GR language: " + bookLanguage)
                                else:
                                    logger.debug("No %s provided for [%s]" % (find_field, book.find('title').text))
                                    # continue

                            except Exception as e:
                                logger.debug(u"An error has occured: %s" % e)

                        if bookLanguage not in valid_langs:
                            logger.debug('Skipped a book with language %s' % bookLanguage)
                            ignored = ignored + 1
                            continue

                    bookname = book.find('title').text
                    bookid = book.find('id').text
                    bookdesc = book.find('description').text
                    bookisbn = book.find('isbn').text
                    bookpub = book.find('publisher').text
                    booklink = book.find('link').text
                    bookrate = float(book.find('average_rating').text)
                    bookpages = book.find('num_pages').text

                    # \(            Must have (
                    # ([\S\s]+)     followed by a group of one or more non whitespace
                    # ,? #         followed by optional comma, then space hash
                    # (             start next group
                    # \d+           must have one or more digits
                    # \.?           then optional decimal point, (. must be escaped)
                    # -?            optional dash for a range
                    # \d{0,}        zero or more digits
                    # )             end group

                    result = re.search(r"\(([\S\s]+),? #(\d+\.?-?\d{0,})", bookname)
                    if result:
                        series = result.group(1)
                        if series[-1] == ',':
                            series = series[:-1]
                        seriesNum = result.group(2)
                    else:
                        series = None
                        seriesNum = None
    
                    find_book_status = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid)
                    if find_book_status:
                        for resulted in find_book_status:
                            book_status = resulted['Status']
                    else:
                        book_status = lazylibrarian.NEWBOOK_STATUS

                    bookname = bookname.replace(':', '').replace('"', '').replace("'", "")
                    bookname = unidecode(u'%s' % bookname)
                    bookname = bookname.strip()  # strip whitespace
                    
                    if not (re.match('[^\w-]', bookname)):  # remove books with bad characters in title
                        if book_status != "Ignored":
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {
                                "AuthorName": authorNameResult,
                                "AuthorID": authorid,
                                "AuthorLink": None,
                                "BookName": bookname,
                                "BookSub": None,
                                "BookDesc": bookdesc,
                                "BookIsbn": bookisbn,
                                "BookPub": bookpub,
                                "BookGenre": None,
                                "BookImg": bookimg,
                                "BookLink": booklink,
                                "BookRate": bookrate,
                                "BookPages": bookpages,
                                "BookDate": pubyear,
                                "BookLang": bookLanguage,
                                "Status": book_status,
                                "BookAdded": formatter.today(),
                                "Series": series,
                                "SeriesNum": seriesNum
                            }

                            resultsCount = resultsCount + 1

                            myDB.upsert("books", newValueDict, controlValueDict)
                            logger.debug(u"Book found: " + book.find('title').text + " " + pubyear)

                            if 'nocover' in bookimg or 'nophoto' in bookimg:
                                # try to get a cover from librarything
                                workcover = bookwork.getWorkCover(bookid)
                                if workcover:
                                    logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))    
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": workcover}
                                    myDB.upsert("books", newValueDict, controlValueDict)
                            
                            elif bookimg.startswith('http'):
                                link = bookwork.cache_cover(bookid, bookimg)
                                if link != bookimg:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": link}
                                    myDB.upsert("books", newValueDict, controlValueDict)
         
                            if seriesNum == None:
                                # try to get series info from librarything
                                series, seriesNum = bookwork.getWorkSeries(bookid)
                                if seriesNum:
                                    logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))    
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {
                                        "Series": series,
                                        "SeriesNum": seriesNum
                                    }
                                    myDB.upsert("books", newValueDict, controlValueDict)
                                    
                            if not find_book_status:
                                logger.debug(u"[%s] Added book: %s" % (authorname, bookname))
                                added_count = added_count + 1
                            else:
                                logger.debug(u"[%s] Updated book: %s" % (authorname, bookname))
                                updated_count = updated_count + 1
                        else:
                            book_ignore_count = book_ignore_count + 1
                    else:
                        logger.debug(u"removed result [" + bookname + "] for bad characters")
                        removedResults = removedResults + 1

                loopCount = loopCount + 1
                URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \
                      urllib.urlencode(self.params) + '&page=' + str(loopCount)
                resultxml = None
                try:
                    rootxml, in_cache = self.get_request(URL)
                    if rootxml is None:
                        logger.debug('Error requesting next page of results')
                    else:
                        resultxml = rootxml.getiterator('book')
                        if not in_cache:
                            api_hits = api_hits + 1
                except Exception as e:
                    resultxml = None
                    logger.error("Error finding next page of results: %s" % e)

                if resultxml is not None:
                    if all(False for book in resultxml):  # returns True if iterator is empty
                        resultxml = None
Ejemplo n.º 25
0
    def find_book(self, bookid=None, queue=None):
        threading.currentThread().name = "GR-ADD-BOOK"
        myDB = database.DBConnection()

        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params)

        try:
            rootxml, in_cache = self.get_request(URL)
            if rootxml is None:
                logger.debug("Error requesting book")
                return
        except Exception as e:
            logger.error("Error finding book: %s" % e)
            return

        bookLanguage = rootxml.find('./book/language_code').text
        bookname = rootxml.find('./book/title').text

        if not bookLanguage:
            bookLanguage = "Unknown"
#
# PAB user has said they want this book, don't block for bad language, just warn
#
        valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])
        if bookLanguage not in valid_langs:
            logger.debug('Book %s language does not match preference' % bookname)

        if (rootxml.find('./book/publication_year').text is None):
            bookdate = "0000"
        else:
            bookdate = rootxml.find('./book/publication_year').text

        try:
            bookimg = rootxml.find('./book/img_url').text
            if (bookimg == 'http://www.goodreads.com/assets/nocover/111x148.png'):
                bookimg = 'images/nocover.png'
        except KeyError:
            bookimg = 'images/nocover.png'
        except AttributeError:
            bookimg = 'images/nocover.png'

        authorname = rootxml.find('./book/authors/author/name').text
        bookdesc = rootxml.find('./book/description').text
        bookisbn = rootxml.find('./book/isbn').text
        bookpub = rootxml.find('./book/publisher').text
        booklink = rootxml.find('./book/link').text
        bookrate = float(rootxml.find('./book/average_rating').text)
        bookpages = rootxml.find('.book/num_pages').text

        name = authorname
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        result = re.search(r"\(([\S\s]+),? #(\d+\.?-?\d{0,})", bookname)
        if result:
            series = result.group(1)
            if series[-1] == ',':
                series = series[:-1]
            seriesNum = result.group(2)
        else:
            series = None
            seriesNum = None

        bookname = bookname.replace(':', '').replace('"', '').replace("'", "")
        bookname = unidecode(u'%s' % bookname)
        bookname = bookname.strip()  # strip whitespace

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": None,
            "BookName": bookname,
            "BookSub": None,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": None,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": bookLanguage,
            "Status": "Wanted",
            "BookAdded": formatter.today(),
            "Series": series,
            "SeriesNum": seriesNum
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.debug("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = bookwork.getWorkCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))    
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)
        
        elif bookimg.startswith('http'):
            link = bookwork.cache_cover(bookid, bookimg)
            if link != bookimg:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": link}
                myDB.upsert("books", newValueDict, controlValueDict)

        if seriesNum == None: 
            #  try to get series info from librarything
            series, seriesNum = bookwork.getWorkSeries(bookid)
            if seriesNum:
                logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))    
                controlValueDict = {"BookID": bookid}
                newValueDict = {
                    "Series": series,
                    "SeriesNum": seriesNum
                }
                myDB.upsert("books", newValueDict, controlValueDict)
Ejemplo n.º 26
0
def processDir(reset=False):

    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError as why:
        logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror))
        return

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if len(snatched) == 0:
        logger.info('Nothing marked as snatched.')
        scheduleJob(action='Stop', target='processDir')
        return

    if len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
        return

    logger.info("Checking %s download%s for %s snatched file%s" %
        (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched))))
    ppcount = 0
    for book in snatched:
        matches = []
        for fname in downloads:
            if not fname.endswith('.fail'):  # has this failed before?
                # this is to get round differences in torrent filenames.
                # Torrents aren't always returned with the name we searched for
                # there might be a better way...
                if isinstance(fname, str):
                    matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                else:
                    matchname = fname
                if ' LL.(' in matchname:
                    matchname = matchname.split(' LL.(')[0]
                matchtitle = book['NZBtitle']
                match = 0
                if matchtitle:
                    if ' LL.(' in matchtitle:
                        matchtitle = matchtitle.split(' LL.(')[0]
                    match = fuzz.token_set_ratio(matchtitle, matchname)
                if match >= lazylibrarian.DLOAD_RATIO:
                    fname = matchname
                    if os.path.isfile(os.path.join(processpath, fname)):
                        # handle single file downloads here...
                        if is_valid_booktype(fname, booktype="book") \
                            or is_valid_booktype(fname, booktype="mag"):
                            dirname = os.path.join(processpath, os.path.splitext(fname)[0])
                            if not os.path.exists(dirname):
                                try:
                                    os.makedirs(dirname)
                                except OSError as why:
                                    logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror))
                            if os.path.exists(dirname):
                                try:
                                    shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname))
                                    fname = os.path.splitext(fname)[0]
                                except Exception as why:
                                    logger.debug("Failed to move file %s to %s, %s" %
                                        (fname, dirname, str(why)))
                    if os.path.isdir(os.path.join(processpath, fname)):
                        pp_path = os.path.join(processpath, fname)
                        logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, book['NZBtitle']))
                        matches.append([match, pp_path, book])
                else:
                    logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle))
            else:
                logger.debug('Skipping %s' % fname)

        if matches:
            highest = max(matches, key=lambda x: x[0])
            match = highest[0]
            pp_path = highest[1]
            book = highest[2]
            logger.info(u'Best match (%s%%): %s for %s' %
                    (match, pp_path, book['NZBtitle']))

            data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID'])
            if data:
                authorname = data[0]['AuthorName']
                bookname = data[0]['BookName']
                if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                    logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                    lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

                # Default destination path, should be allowed change per config file.
                dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                    '$Title', bookname)
                global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                    '$Title', bookname)
                global_name = unaccented(global_name)
                # dest_path = authorname+'/'+bookname
                # global_name = bookname + ' - ' + authorname
                # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                       ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                dest_path = unaccented_str(replace_all(dest_path, dic))
                dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                    lazylibrarian.SYS_ENCODING)
            else:
                data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                if data:
                    # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                    # files are downloading, there will be an error in post-processing, trying to go to the
                    # same directory.
                    mostrecentissue = data[0]['IssueDate']  # keep for processing issues arriving out of order
                    # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    mag_name = unaccented_str(replace_all(book['BookID'], dic))
                    # book auxinfo is a cleaned date, eg 2015-01-01
                    dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                    if lazylibrarian.MAG_RELATIVE:
                        if dest_path[0] not in '._':
                            dest_path = '_' + dest_path
                        dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                            lazylibrarian.SYS_ENCODING)
                    else:
                        dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                    authorname = None
                    bookname = None
                    global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                        '$Title', mag_name)
                    global_name = unaccented(global_name)
                    # global_name = book['AuxInfo']+' - '+title
                else:
                    logger.debug("Snatched magazine %s is not in download directory" % (book['BookID']))
                    continue
        else:
            logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
            continue

        processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

        if processBook:
            logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
            # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Processed", "NZBDate": now()}  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)

            if bookname is not None:  # it's a book, if None it's a magazine
                if len(lazylibrarian.IMP_CALIBREDB):
                    logger.debug('Calibre should have created the extras for us')
                else:
                    processExtras(myDB, dest_path, global_name, data)
            else:
                # update mags
                controlValueDict = {"Title": book['BookID']}
                if mostrecentissue:
                    if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit():
                        older = int(mostrecentissue) > int(book['AuxInfo']) # issuenumber
                    else:
                        older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                else:
                    older = False
                if older:  # check this in case processing issues arriving out of order
                    newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"}
                else:
                    newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(),
                                    "IssueStatus": "Open"}
                myDB.upsert("magazines", newValueDict, controlValueDict)
                # dest_path is where we put the magazine after processing, but we don't have the full filename
                # so look for any "book" in that directory
                dest_file = book_file(dest_path, booktype='mag')
                controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                newValueDict = {"IssueAcquired": today(),
                                "IssueFile": dest_file,
                                "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                }
                myDB.upsert("issues", newValueDict, controlValueDict)

                # create a thumbnail cover for the new issue
                create_cover(dest_file)

            logger.info('Successfully processed: %s' % global_name)
            ppcount = ppcount + 1
            notify_download("%s at %s" % (global_name, now()))
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' % pp_path)
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Failed", "NZBDate": now()}
            myDB.upsert("wanted", newValueDict, controlValueDict)
            # if it's a book, reset status so we try for a different version
            # if it's a magazine, user can select a different one from pastissues table
            if bookname is not None:
                myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID'])

            # at this point, as it failed we should move it or it will get postprocessed
            # again (and fail again)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except:
                logger.debug("Unable to rename %s" % pp_path)

    downloads = os.listdir(processpath)  # check in case we processed/deleted some above
    for directory in downloads:
        if "LL.(" in directory and not directory.endswith('.fail'):
            bookID = str(directory).split("LL.(")[1].split(")")[0]
            logger.debug("Book with id: " + str(bookID) + " is in downloads")
            pp_path = os.path.join(processpath, directory)

            if os.path.isfile(pp_path):
                pp_path = os.path.join(processpath)

            if (os.path.isdir(pp_path)):
                logger.debug('Found LL folder %s.' % pp_path)
            if import_book(pp_path, bookID):
                ppcount = ppcount + 1

    if ppcount == 0:
        logger.info('No snatched books/mags have been found')
    else:
        logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount)))

    if reset:
        scheduleJob(action='Restart', target='processDir')
Ejemplo n.º 27
0
    def find_book(self, bookid=None, bookstatus="None"):
        myDB = database.DBConnection()
        if not lazylibrarian.CONFIG['GB_API']:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
              str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API']
        jsonresults, in_cache = gb_json_request(URL)

        if jsonresults is None:
            logger.debug('No results found for %s' % bookid)
            return

        if not bookstatus:
            bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS']

        book = bookdict(jsonresults)
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(book['name'], dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        if not book['author']:
            logger.debug('Book %s does not contain author field, skipping' %
                         bookname)
            return
        # warn if language is in ignore list, but user said they wanted this book
        valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
        if book['lang'] not in valid_langs and 'All' not in valid_langs:
            logger.debug(
                'Book %s googlebooks language does not match preference, %s' %
                (bookname, book['lang']))

        if lazylibrarian.CONFIG['NO_PUBDATE']:
            if not book['date'] or book['date'] == '0000':
                logger.warn(
                    'Book %s Publication date does not match preference, %s' %
                    (bookname, book['date']))

        if lazylibrarian.CONFIG['NO_FUTURE']:
            if book['date'] > today()[:4]:
                logger.warn(
                    'Book %s Future publication date does not match preference, %s'
                    % (bookname, book['date']))

        authorname = book['author']
        GR = GoodReads(authorname)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?',
                               (AuthorID, ))
            if not match:
                match = myDB.match(
                    'SELECT AuthorID from authors WHERE AuthorName=?',
                    (author['authorname'], ))
                if match:
                    logger.debug(
                        '%s: Changing authorid from %s to %s' %
                        (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match[
                        'AuthorID']  # we have a different authorid for that authorname
                else:  # no author but request to add book, add author with newauthor status
                    # User hit "add book" button from a search or a wishlist import
                    newauthor_status = 'Active'
                    if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in [
                            'Skipped', 'Ignored'
                    ]:
                        newauthor_status = 'Paused'
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": newauthor_status
                    }
                    authorname = author['authorname']
                    myDB.upsert("authors", newValueDict, controlValueDict)
                    if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']:
                        self.get_author_books(AuthorID,
                                              entrystatus=lazylibrarian.
                                              CONFIG['NEWAUTHOR_STATUS'])
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" %
                        (book['author'], bookname))
            return

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": book['sub'],
            "BookDesc": book['desc'],
            "BookIsbn": book['isbn'],
            "BookPub": book['pub'],
            "BookGenre": book['genre'],
            "BookImg": book['img'],
            "BookLink": book['link'],
            "BookRate": float(book['rate']),
            "BookPages": book['pages'],
            "BookDate": book['date'],
            "BookLang": book['lang'],
            "Status": bookstatus,
            "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'],
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s by %s added to the books database" %
                    (bookname, authorname))

        if 'nocover' in book['img'] or 'nophoto' in book['img']:
            # try to get a cover from another source
            workcover, source = getBookCover(bookid)
            if workcover:
                logger.debug('Updated cover for %s using %s' %
                             (bookname, source))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif book['img'] and book['img'].startswith('http'):
                link, success, _ = cache_img("book", bookid, book['img'])
                if success:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    logger.debug('Failed to cache image for %s' % book['img'])

        serieslist = []
        if book['series']:
            serieslist = [('', book['seriesNum'],
                           cleanName(unaccented(book['series']), '&/'))]
        if lazylibrarian.CONFIG['ADD_SERIES']:
            newserieslist = getWorkSeries(bookid)
            if newserieslist:
                serieslist = newserieslist
                logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
        setSeries(serieslist, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Ejemplo n.º 28
0
    def get_author_books(self,
                         authorid=None,
                         authorname=None,
                         bookstatus="Skipped",
                         entrystatus='Active',
                         refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' %
                         authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote(
                'inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = gb_json_request(
                            URL, useCache=not refresh)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn(
                            'Google Books API Error [%s]: Check your API key or wait a while'
                            % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' %
                                     (number_results, plural(number_results),
                                      authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1
                        book = bookdict(item)
                        # skip if no author, no author is no book.
                        if not book['author']:
                            logger.debug(
                                'Skipped a result without authorfield.')
                            continue

                        isbnhead = ""
                        if len(book['isbn']) == 10:
                            isbnhead = book['isbn'][0:3]
                        elif len(book['isbn']) == 13:
                            isbnhead = book['isbn'][3:6]

                        booklang = book['lang']
                        # do we care about language?
                        if "All" not in valid_langs:
                            if book['isbn']:
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match(
                                        'SELECT lang FROM languages where isbn=?',
                                        (isbnhead, ))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug(
                                            "Found cached language [%s] for [%s]"
                                            % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(
                                                    book['isbn']
                                            ) == 13 and book[
                                                    'isbn'].startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(
                                                            lang):
                                                        booklang = lazylibrarian.isbn_979_dict[
                                                            lang]
                                                        logger.debug(
                                                            "ISBN979 returned %s for %s"
                                                            % (booklang,
                                                               isbnhead))
                                                        match = True
                                                        break
                                            elif (len(book['isbn']) == 10) or \
                                                    (len(book['isbn']) == 13 and book['isbn'].startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(
                                                            lang):
                                                        booklang = lazylibrarian.isbn_978_dict[
                                                            lang]
                                                        logger.debug(
                                                            "ISBN979 returned %s for %s"
                                                            % (booklang,
                                                               isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                myDB.action(
                                                    'insert into languages values (?, ?)',
                                                    (isbnhead, booklang))

                                    if not match:
                                        booklang = thingLang(book['isbn'])
                                        lt_lang_hits += 1
                                        if booklang:
                                            match = True
                                            myDB.action(
                                                'insert into languages values (?, ?)',
                                                (isbnhead, booklang))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in [
                                                "en-US", "en-GB", "eng"
                                        ]:
                                            # these are all english, may need to expand this list
                                            logger.debug(
                                                "%s Google thinks [%s], we think [%s]"
                                                % (book['name'], googlelang,
                                                   booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                logger.debug('Skipped [%s] with language %s' %
                                             (book['name'], booklang))
                                ignored += 1
                                continue

                        rejected = 0
                        check_status = False
                        book_status = bookstatus  # new_book status, or new_author status
                        audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
                        added = today()
                        locked = False
                        existing_book = None
                        bookname = book['name']
                        bookid = item['id']
                        if not bookname:
                            logger.debug(
                                'Rejecting bookid %s for %s, no bookname' %
                                (bookid, authorname))
                            removedResults += 1
                            rejected = 1
                        else:
                            bookname = replace_all(unaccented(bookname), {
                                ':': '.',
                                '"': '',
                                '\'': ''
                            }).strip()
                            # GoodReads sometimes has multiple bookids for the same book (same author/title, different
                            # editions) and sometimes uses the same bookid if the book is the same but the title is
                            # slightly different. Not sure if googlebooks does too, but we only want one...
                            cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?'
                            existing_book = myDB.match(cmd, (bookid, ))
                            if existing_book:
                                book_status = existing_book['Status']
                                audio_status = existing_book['AudioStatus']
                                locked = existing_book['Manual']
                                added = existing_book['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                if rejected in [3, 4, 5]:
                                    book_status = 'Ignored'
                                    audio_status = 'Ignored'
                                else:
                                    book_status = bookstatus  # new_book status, or new_author status
                                    audio_status = lazylibrarian.CONFIG[
                                        'NEWAUDIO_STATUS']
                                added = today()
                                locked = False

                        if not rejected and re.match(
                                '[^\w-]', bookname
                        ):  # remove books with bad characters in title
                            logger.debug(
                                "[%s] removed book for bad characters" %
                                bookname)
                            removedResults += 1
                            rejected = 2

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if book['date'] > today()[:len(book['date'])]:
                                logger.debug(
                                    'Rejecting %s, future publication date %s'
                                    % (bookname, book['date']))
                                removedResults += 1
                                rejected = 3

                        if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']:
                            if not book['date']:
                                logger.debug(
                                    'Rejecting %s, no publication date' %
                                    bookname)
                                removedResults += 1
                                rejected = 4

                        if not rejected and lazylibrarian.CONFIG['NO_ISBN']:
                            if not isbnhead:
                                logger.debug('Rejecting %s, no isbn' %
                                             bookname)
                                removedResults += 1
                                rejected = 5

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname.replace(
                                '"', '""'), authorname.replace('"', '""')))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got %s'
                                        % (match['BookID'], authorname,
                                           bookname, bookid))
                                    rejected = 6
                                    duplicates += 1

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                            match = myDB.match(cmd, (bookid, ))
                            if match:  # we have a book with this bookid already
                                if bookname != match[
                                        'BookName'] or authorname != match[
                                            'AuthorName']:
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]'
                                        % (bookid, authorname, bookname,
                                           match['AuthorName'],
                                           match['BookName']))
                                else:
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got this book in database'
                                        % (bookid, authorname, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = 7

                        if check_status or not rejected or (
                                lazylibrarian.CONFIG['IMP_IGNORE']
                                and rejected in [3, 4, 5]):  # dates, isbn
                            if not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": book['sub'],
                                    "BookDesc": book['desc'],
                                    "BookIsbn": book['isbn'],
                                    "BookPub": book['pub'],
                                    "BookGenre": book['genre'],
                                    "BookImg": book['img'],
                                    "BookLink": book['link'],
                                    "BookRate": float(book['rate']),
                                    "BookPages": book['pages'],
                                    "BookDate": book['date'],
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": audio_status,
                                    "BookAdded": added
                                }
                                resultcount += 1

                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                                logger.debug("Book found: " + bookname + " " +
                                             book['date'])
                                updated = False
                                if 'nocover' in book[
                                        'img'] or 'nophoto' in book['img']:
                                    # try to get a cover from another source
                                    workcover, source = getBookCover(bookid)
                                    if workcover:
                                        logger.debug(
                                            'Updated cover for %s using %s' %
                                            (bookname, source))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict,
                                                    controlValueDict)
                                        updated = True

                                elif book['img'] and book['img'].startswith(
                                        'http'):
                                    link, success, _ = cache_img(
                                        "book",
                                        bookid,
                                        book['img'],
                                        refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict,
                                                    controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug(
                                            'Failed to cache image for %s' %
                                            book['img'])

                                serieslist = []
                                if book['series']:
                                    serieslist = [
                                        ('', book['seriesNum'],
                                         cleanName(unaccented(book['series']),
                                                   '&/'))
                                    ]
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    newserieslist = getWorkSeries(bookid)
                                    if newserieslist:
                                        serieslist = newserieslist
                                        logger.debug(
                                            'Updated series: %s [%s]' %
                                            (bookid, serieslist))
                                        updated = True
                                setSeries(serieslist, bookid)

                                new_status = setStatus(bookid, serieslist,
                                                       bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)

                                if not existing_book:
                                    logger.debug(
                                        "[%s] Added book: %s [%s] status %s" %
                                        (authorname, bookname, booklang,
                                         book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug(
                                        "[%s] Updated book: %s [%s] status %s"
                                        % (authorname, bookname, booklang,
                                           book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug(
                '[%s] The Google Books API was hit %s time%s to populate book list'
                % (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid, ))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)

            logger.debug("Found %s total book%s for author" %
                         (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s for author" %
                         (ignored, plural(ignored)))
            logger.debug(
                "Removed %s bad character or no-name result%s for author" %
                (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s for author" %
                         (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" %
                         (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" %
                         (resultcount, plural(resultcount)))

            myDB.action(
                'insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                (authorname.replace('"', '""'), api_hits, gr_lang_hits,
                 lt_lang_hits, gb_lang_change, cache_hits, ignored,
                 removedResults, not_cached, duplicates))

            if refresh:
                logger.info(
                    "[%s] Book processing complete: Added %s book%s / Updated %s book%s"
                    % (authorname, added_count, plural(added_count),
                       updated_count, plural(updated_count)))
            else:
                logger.info(
                    "[%s] Book processing complete: Added %s book%s to the database"
                    % (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' %
                         traceback.format_exc())
Ejemplo n.º 29
0
def processDir(force=False, reset=False):
    # rename this thread
    threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError as why:
        logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror))
        return False

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if force is False and len(snatched) == 0:
        logger.info('Nothing marked as snatched. Stopping postprocessor job.')
        common.schedule_job(action='Stop', target='processDir')
    elif len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
    else:
        logger.debug("Checking %s downloads for %s snatched files" % (len(downloads), len(snatched)))
        ppcount = 0
        for book in snatched:
            found = False
            for fname in downloads:
                if not fname.endswith('.fail'):  # has this failed before?
                    # this is to get round differences in torrent filenames.
                    # Torrents aren't always returned with the name we searched for
                    # there might be a better way...
                    if isinstance(fname, str):
                        matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                    else:
                        matchname = fname
                    if ' LL.(' in matchname:
                        matchname = matchname.split(' LL.(')[0]
                    matchtitle = book['NZBtitle']
                    if ' LL.(' in matchtitle:
                        matchtitle = matchtitle.split(' LL.(')[0]
                    match = fuzz.token_set_ratio(matchtitle, matchname)
                    if match >= 95:
                        fname = matchname
                        if os.path.isfile(os.path.join(processpath, fname)):
                            # handle single file downloads here...
                            if formatter.is_valid_booktype(fname, booktype="book") \
                                or formatter.is_valid_booktype(fname, booktype="mag"):
                                dirname = os.path.join(processpath, os.path.splitext(fname)[0])
                                if not os.path.exists(dirname):
                                    try:
                                        os.makedirs(dirname)
                                    except OSError as why:
                                        logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror))
                                if os.path.exists(dirname):
                                    try:
                                        shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname))
                                        fname = os.path.splitext(fname)[0]
                                    except Exception as why:
                                        logger.debug("Failed to move file %s to %s, %s" % 
                                            (fname, dirname, str(why)))                                         
                        if os.path.isdir(os.path.join(processpath, fname)): 
                            pp_path = os.path.join(processpath, fname)
                            logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle']))
                            found = True
                            break
                    else:
                        logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle))
                else:
                    logger.debug('Skipping %s' % fname)
            if found:
                data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID'])
                if data:
                    authorname = data[0]['AuthorName']
                    bookname = data[0]['BookName']
                    if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                        logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                        lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

                    # Default destination path, should be allowed change per config file.
                    dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = common.remove_accents(global_name)
                    # dest_path = authorname+'/'+bookname
                    # global_name = bookname + ' - ' + authorname
                    # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic))
                    dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                        lazylibrarian.SYS_ENCODING)
                else:
                    data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                    if data:
                        # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                        # files are downloading, there will be an error in post-processing, trying to go to the
                        # same directory.
                        mostrecentissue = data[0]['IssueDate']  # keep for processing issues arriving out of order
                        # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                        mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic))
                        # book auxinfo is a cleaned date, eg 2015-01-01
                        dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                            '$IssueDate',
                            book['AuxInfo']).replace('$Title',
                                                     mag_name)
                        # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                        if lazylibrarian.MAG_RELATIVE:
                            if dest_path[0] not in '._':
                                dest_path = '_' + dest_path
                            dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                                lazylibrarian.SYS_ENCODING)
                        else:
                            dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                        authorname = None
                        bookname = None
                        global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                            '$Title', mag_name)
                        global_name = common.remove_accents(global_name)
                        # global_name = book['AuxInfo']+' - '+title
                    else:
                        logger.debug("Snatched magazine %s is not in download directory" % (book['BookID']))
                        continue
            else:
                logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
                continue

            # try:
            #    os.chmod(dest_path, 0777)
            # except Exception, e:
            #    logger.debug("Could not chmod post-process directory: " + str(dest_path))

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

            if processBook:
                logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
                # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
                controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Processed", "NZBDate": formatter.now()}  # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)

                if bookname is not None:  # it's a book, if None it's a magazine
                    processExtras(myDB, dest_path, global_name, data)
                else:
                    # update mags
                    controlValueDict = {"Title": book['BookID']}
                    if mostrecentissue > book['AuxInfo']:  # check this in case processing issues arriving out of order
                        newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"}
                    else:
                        newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(),
                                        "IssueStatus": "Open"}
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    # dest_path is where we put the magazine after processing, but we don't have the full filename
                    # so look for any "book" in that directory
                    dest_file = book_file(dest_path, booktype='mag')
                    controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                    newValueDict = {"IssueAcquired": formatter.today(),
                                    "IssueFile": dest_file,
                                    "IssueID": magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                    }
                    myDB.upsert("issues", newValueDict, controlValueDict)

                    # create a thumbnail cover for the new issue
                    magazinescan.create_cover(dest_file)

                logger.info('Successfully processed: %s' % global_name)
                ppcount = ppcount + 1
                notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now())
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s.fail' % pp_path)
                # at this point, as it failed we should move it or it will get postprocessed
                # again (and fail again)
                try:
                    os.rename(pp_path, pp_path + '.fail')
                except:
                    logger.debug("Unable to rename %s" % pp_path)

        downloads = os.listdir(processpath)  # check in case we processed/deleted some above
        for directory in downloads:
            if "LL.(" in directory and not directory.endswith('.fail'):
                bookID = str(directory).split("LL.(")[1].split(")")[0]
                logger.debug("Book with id: " + str(bookID) + " is in downloads")
                pp_path = os.path.join(processpath, directory)

                if os.path.isfile(pp_path):
                    pp_path = os.path.join(processpath)

                if (os.path.isdir(pp_path)):
                    logger.debug('Found LL folder %s.' % pp_path)
                if import_book(pp_path, bookID):
                    ppcount = ppcount + 1

        if ppcount:
            logger.info('%s books/mags have been processed.' % ppcount)
        else:
            logger.info('No snatched books/mags have been found')
    if reset:
        common.schedule_job(action='Restart', target='processDir')
Ejemplo n.º 30
0
def get_capabilities(provider):
    """
    query provider for caps if none loaded yet, or if config entry is too old and not set manually.
    """
    match = False
    if len(provider['UPDATED']) == 10:  # any stored values?
        match = True
        if (age(provider['UPDATED']) > lazylibrarian.CACHE_AGE) and not provider['MANUAL']:
            logger.debug('Stored capabilities for %s are too old' % provider['HOST'])
            match = False

    if match:
        logger.debug('Using stored capabilities for %s' % provider['HOST'])
    else:
        host = provider['HOST']
        if not str(host)[:4] == "http":
            host = 'http://' + host
        URL = host + '/api?t=caps&apikey=' + provider['API']
        logger.debug('Requesting capabilities for %s' % URL)

        source_xml, success = fetchURL(URL)
        if success:
            data = ElementTree.fromstring(source_xml)
        else:
            logger.debug(u"Error getting xml from %s, %s" % (URL, source_xml))
            data = ''
        if len(data):
            logger.debug(u"Parsing xml for capabilities of %s" % URL)

            #
            # book search isn't mentioned in the caps xml returned by
            # nzbplanet,jackett,oznzb,usenet-crawler, so we can't use it as a test
            # but the newznab+ ones usually support t=book and categories in 7000 range
            # whereas nZEDb ones don't support t=book and use categories in 8000 range
            # also some providers give searchtype but no supportedparams, so we still
            # can't tell what queries will be accepted
            # also category names can be lowercase or Mixed, magazine subcat name isn't
            # consistent, and subcat can be just subcat or category/subcat subcat > lang
            # eg "Magazines" "Mags" or "Books/Magazines" "Mags > French"
            # Load all languages for now as we don't know which the user might want
            #
            #
            #  set some defaults
            #
            provider['GENERALSEARCH'] = 'search'
            provider['EXTENDED'] = '1'
            provider['BOOKCAT'] = ''
            provider['MAGCAT'] = ''
            provider['BOOKSEARCH'] = ''
            provider['MAGSEARCH'] = ''
            #
            search = data.find('searching/search')
            if search is not None:
                if 'available' in search.attrib:
                    if search.attrib['available'] == 'yes':
                        provider['GENERALSEARCH'] = 'search'
            categories = data.getiterator('category')
            for cat in categories:
                if 'name' in cat.attrib:
                    if cat.attrib['name'].lower() == 'books':
                        bookcat = cat.attrib['id']  # keep main bookcat for later
                        provider['BOOKCAT'] = bookcat
                        provider['MAGCAT'] = ''
                        if provider['BOOKCAT'] == '7000':
                            # looks like newznab+, should support book-search
                            provider['BOOKSEARCH'] = 'book'
                            # but check in case
                            search = data.find('searching/book-search')
                            if search is not None:
                                if 'available' in search.attrib:
                                    if search.attrib['available'] == 'yes':
                                        provider['BOOKSEARCH'] = 'book'
                                    else:
                                        provider['BOOKSEARCH'] = ''
                        else:
                            # looks like nZEDb, probably no book-search
                            provider['BOOKSEARCH'] = ''
                            # but check in case
                            search = data.find('searching/book-search')
                            if search is not None:
                                if 'available' in search.attrib:
                                    if search.attrib['available'] == 'yes':
                                        provider['BOOKSEARCH'] = 'book'
                                    else:
                                        provider['BOOKSEARCH'] = ''
                        subcats = cat.getiterator('subcat')
                        for subcat in subcats:
                            if 'ebook' in subcat.attrib['name'].lower():
                                provider['BOOKCAT'] = "%s,%s" % (provider['BOOKCAT'], subcat.attrib['id'])
                            if 'magazines' in subcat.attrib['name'].lower() or 'mags' in subcat.attrib['name'].lower():
                                if provider['MAGCAT']:
                                    provider['MAGCAT'] = "%s,%s" % (provider['MAGCAT'], subcat.attrib['id'])
                                else:
                                    provider['MAGCAT'] = subcat.attrib['id']
                        # if no specific magazine subcategory, use books
                        if not provider['MAGCAT']:
                            provider['MAGCAT'] = bookcat
            logger.debug("Categories: Books %s : Mags %s" % (provider['BOOKCAT'], provider['MAGCAT']))
            provider['UPDATED'] = today()
            lazylibrarian.config_write()
        else:
            logger.warn(u"Unable to get capabilities for %s: No data returned" % URL)
    return provider
Ejemplo n.º 31
0
def get_capabilities(provider, force=False):
    """
    query provider for caps if none loaded yet, or if config entry is too old and not set manually.
    """
    if not force and len(provider['UPDATED']) == 10:  # any stored values?
        match = True
        if (age(provider['UPDATED']) >
                lazylibrarian.CONFIG['CACHE_AGE']) and not provider['MANUAL']:
            logger.debug('Stored capabilities for %s are too old' %
                         provider['HOST'])
            match = False
    else:
        match = False

    if match:
        logger.debug('Using stored capabilities for %s' % provider['HOST'])
    else:
        host = provider['HOST']
        if not str(host)[:4] == "http":
            host = 'http://' + host
        if host[-1:] == '/':
            host = host[:-1]
        URL = host + '/api?t=caps'

        # most providers will give you caps without an api key
        logger.debug('Requesting capabilities for %s' % URL)
        source_xml, success = fetchURL(URL)
        # If it failed, retry with api key
        if not success:
            if provider['API']:
                URL = URL + '&apikey=' + provider['API']
                logger.debug('Requesting capabilities for %s' % URL)
                source_xml, success = fetchURL(URL)
        if success:
            try:
                data = ElementTree.fromstring(source_xml)
            except ElementTree.ParseError:
                data = ''
                logger.debug("Error parsing xml from %s, %s" %
                             (URL, source_xml))
        else:
            logger.debug("Error getting xml from %s, %s" % (URL, source_xml))
            data = ''
        if len(data):
            logger.debug("Parsing xml for capabilities of %s" % URL)

            #
            # book search isn't mentioned in the caps xml returned by
            # nzbplanet,jackett,oznzb,usenet-crawler, so we can't use it as a test
            # but the newznab+ ones usually support t=book and categories in 7000 range
            # whereas nZEDb ones don't support t=book and use categories in 8000 range
            # also some providers give searchtype but no supportedparams, so we still
            # can't tell what queries will be accepted
            # also category names can be lowercase or Mixed, magazine subcat name isn't
            # consistent, and subcat can be just subcat or category/subcat subcat > lang
            # eg "Magazines" "Mags" or "Books/Magazines" "Mags > French"
            # Load all languages for now as we don't know which the user might want
            #
            #
            #  set some defaults
            #
            provider['GENERALSEARCH'] = 'search'
            provider['EXTENDED'] = '1'
            provider['BOOKCAT'] = ''
            provider['MAGCAT'] = ''
            provider['AUDIOCAT'] = ''
            provider['BOOKSEARCH'] = ''
            provider['MAGSEARCH'] = ''
            provider['AUDIOSEARCH'] = ''
            #
            search = data.find('searching/search')
            if search is not None:
                # noinspection PyUnresolvedReferences
                if 'available' in search.attrib:
                    # noinspection PyUnresolvedReferences
                    if search.attrib['available'] == 'yes':
                        provider['GENERALSEARCH'] = 'search'
            categories = data.getiterator('category')
            for cat in categories:
                if 'name' in cat.attrib:
                    if cat.attrib['name'].lower() == 'audio':
                        provider['AUDIOCAT'] = cat.attrib['id']
                        subcats = cat.getiterator('subcat')
                        for subcat in subcats:
                            if 'audiobook' in subcat.attrib['name'].lower():
                                provider['AUDIOCAT'] = "%s,%s" % (
                                    provider['AUDIOCAT'], subcat.attrib['id'])

                    elif cat.attrib['name'].lower() == 'books':
                        bookcat = cat.attrib[
                            'id']  # keep main bookcat for starting magazines later
                        provider['BOOKCAT'] = bookcat
                        provider['MAGCAT'] = ''
                        # set default booksearch
                        if provider['BOOKCAT'] == '7000':
                            # looks like newznab+, should support book-search
                            provider['BOOKSEARCH'] = 'book'
                        else:
                            # looks like nZEDb, probably no book-search
                            provider['BOOKSEARCH'] = ''
                        # but check in case we got some settings back
                        search = data.find('searching/book-search')
                        if search:
                            # noinspection PyUnresolvedReferences
                            if 'available' in search.attrib:
                                # noinspection PyUnresolvedReferences
                                if search.attrib['available'] == 'yes':
                                    provider['BOOKSEARCH'] = 'book'
                                else:
                                    provider['BOOKSEARCH'] = ''

                        subcats = cat.getiterator('subcat')
                        for subcat in subcats:
                            if 'ebook' in subcat.attrib['name'].lower():
                                provider['BOOKCAT'] = "%s,%s" % (
                                    provider['BOOKCAT'], subcat.attrib['id'])
                            if 'magazines' in subcat.attrib['name'].lower(
                            ) or 'mags' in subcat.attrib['name'].lower():
                                if provider['MAGCAT']:
                                    provider['MAGCAT'] = "%s,%s" % (
                                        provider['MAGCAT'],
                                        subcat.attrib['id'])
                                else:
                                    provider['MAGCAT'] = subcat.attrib['id']
                        # if no specific magazine subcategory, use books
                        if not provider['MAGCAT']:
                            provider['MAGCAT'] = bookcat
            logger.debug("Categories: Books %s : Mags %s : Audio %s" %
                         (provider['BOOKCAT'], provider['MAGCAT'],
                          provider['AUDIOCAT']))
            provider['UPDATED'] = today()
            threadname = threading.currentThread().name
            lazylibrarian.config_write()
            threading.currentThread().name = threadname
        else:
            logger.warn("Unable to get capabilities for %s: No data returned" %
                        URL)
    return provider
Ejemplo n.º 32
0
def addAuthorToDB(authorname=None, refresh=False):
    """
    Add an author to the database, and get  list of all their books
    If author already exists in database, refresh their details and booklist
    """
    try:
        myDB = database.DBConnection()

        GR = GoodReads(authorname)

        query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''")
        dbauthor = myDB.match(query)
        controlValueDict = {"AuthorName": authorname}

        if not dbauthor:
            newValueDict = {
                "AuthorID": "0: %s" % (authorname),
                "Status": "Loading"
            }
            logger.debug("Now adding new author: %s to database" % authorname)
        else:
            newValueDict = {"Status": "Loading"}
            logger.debug("Now updating author: %s" % authorname)
        myDB.upsert("authors", newValueDict, controlValueDict)

        author = GR.find_author_id(refresh=refresh)
        if author:
            authorid = author['authorid']
            authorlink = author['authorlink']
            authorimg = author['authorimg']
            controlValueDict = {"AuthorName": authorname}
            newValueDict = {
                "AuthorID": authorid,
                "AuthorLink": authorlink,
                "AuthorImg": authorimg,
                "AuthorBorn": author['authorborn'],
                "AuthorDeath": author['authordeath'],
                "DateAdded": today(),
                "Status": "Loading"
            }
            myDB.upsert("authors", newValueDict, controlValueDict)
        else:
            logger.warn(u"Nothing found for %s" % authorname)
            myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname)
            return

        new_img = False
        if authorimg and 'nophoto' in authorimg:
            authorimg = getAuthorImage(authorid)
            new_img = True
        if authorimg and authorimg.startswith('http'):
            newimg = cache_cover(authorid, authorimg)
            if newimg:
                authorimg = newimg
                new_img = True

        if new_img:
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"AuthorImg": authorimg}
            myDB.upsert("authors", newValueDict, controlValueDict)


        # process books
        if lazylibrarian.BOOK_API == "GoogleBooks":
            book_api = GoogleBooks()
            book_api.get_author_books(authorid, authorname, refresh=refresh)
        elif lazylibrarian.BOOK_API == "GoodReads":
            GR.get_author_books(authorid, authorname, refresh=refresh)

        # update totals works for existing authors only.
        # New authors need their totals updating after libraryscan or import of books.
        if dbauthor:
            update_totals(authorid)
        logger.debug("[%s] Author update complete" % authorname)
    except Exception as e:
        logger.error('Unhandled exception in addAuthorToDB: %s' % traceback.format_exc())
Ejemplo n.º 33
0
    def find_book(self, bookid=None, queue=None):
        threading.currentThread().name = "GR-ADD-BOOK"
        myDB = database.DBConnection()

        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(
            self.params)

        try:
            rootxml, in_cache = self.get_request(URL)
            if rootxml is None:
                logger.debug("Error requesting book")
                return
        except Exception as e:
            logger.error("Error finding book: %s" % e)
            return

        bookLanguage = rootxml.find('./book/language_code').text
        bookname = rootxml.find('./book/title').text

        if not bookLanguage:
            bookLanguage = "Unknown"
#
# PAB user has said they want this book, don't block for bad language, just warn
#
        valid_langs = ([
            valid_lang.strip()
            for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')
        ])
        if bookLanguage not in valid_langs:
            logger.debug('Book %s language does not match preference' %
                         bookname)

        if (rootxml.find('./book/publication_year').text is None):
            bookdate = "0000"
        else:
            bookdate = rootxml.find('./book/publication_year').text

        try:
            bookimg = rootxml.find('./book/img_url').text
            if (bookimg ==
                    'http://www.goodreads.com/assets/nocover/111x148.png'):
                bookimg = 'images/nocover.png'
        except KeyError:
            bookimg = 'images/nocover.png'
        except AttributeError:
            bookimg = 'images/nocover.png'

        authorname = rootxml.find('./book/authors/author/name').text
        bookdesc = rootxml.find('./book/description').text
        bookisbn = rootxml.find('./book/isbn').text
        bookpub = rootxml.find('./book/publisher').text
        booklink = rootxml.find('./book/link').text
        bookrate = float(rootxml.find('./book/average_rating').text)
        bookpages = rootxml.find('.book/num_pages').text

        name = authorname
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        result = re.search(r"\(([\S\s]+),? #(\d+\.?-?\d{0,})", bookname)
        if result:
            series = result.group(1)
            if series[-1] == ',':
                series = series[:-1]
            seriesNum = result.group(2)
        else:
            series = None
            seriesNum = None

        bookname = bookname.replace(':', '').replace('"', '').replace("'", "")
        bookname = unidecode(u'%s' % bookname)
        bookname = bookname.strip()  # strip whitespace

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": None,
            "BookName": bookname,
            "BookSub": None,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": None,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": bookLanguage,
            "Status": "Wanted",
            "BookAdded": formatter.today(),
            "Series": series,
            "SeriesNum": seriesNum
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.debug("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = bookwork.getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' %
                             (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

        elif bookimg.startswith('http'):
            link = bookwork.cache_cover(bookid, bookimg)
            if link is not None:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": link}
                myDB.upsert("books", newValueDict, controlValueDict)

        if seriesNum == None:
            #  try to get series info from librarything
            series, seriesNum = bookwork.getWorkSeries(bookid)
            if seriesNum:
                logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"Series": series, "SeriesNum": seriesNum}
                myDB.upsert("books", newValueDict, controlValueDict)

        worklink = bookwork.getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Ejemplo n.º 34
0
def addBookToDB(bookid, authorname):
    type = 'book'
    myDB = database.DBConnection()
    GR = GoodReads(authorname, type)

# process book
    dbbook = myDB.action('SELECT * from books WHERE BookID=?', [bookid]).fetchone()
    controlValueDict = {"BookID": bookid}

    if dbbook is None:
        newValueDict = {
            "BookID":   "BookID: %s" % (bookid),
            "Status":       "Loading"
            }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("books", newValueDict, controlValueDict)

    book = GR.find_book()

    if not book:
        logger.warn("Error fetching bookinfo for BookID: " + bookid)

    else:
        controlValueDict = {"BookID": book['bookid']}
        newValueDict = {
            "AuthorName":   book['authorname'],
            "BookName":     book['bookname'],
            "BookDesc":     book['bookdesc'],
            "BookIsbn":     book['bookisbn'],
            "BookImg":      book['bookimg'],
            "BookLink":     book['booklink'],
            "BookRate":     book['bookrate'],
            "BookPages":    book['bookpages'],
            "BookDate":     book['bookdate'],
            "BookLang":     book['booklang'],
            "Status":       "Skipped",
            "BookAdded":    formatter.today()
            }

        myDB.upsert("books", newValueDict, controlValueDict)

# process author
    dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'", [authorname]).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorName":   "Authorname: %s" % (authorname),
            "Status":       "Loading"
            }
    else:
        newValueDict = {"Status": "Loading"}

    author = GR.find_author_id()

    if not author:
        logger.warn("Error fetching authorinfo with name: " + authorname)

    else:
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID":     author['authorid'],
            "AuthorLink":   author['authorlink'],
            "AuthorImg":    author['authorimg'],
            "AuthorBorn":   author['authorborn'],
            "AuthorDeath":  author['authordeath'],
            "DateAdded":    formatter.today(),
            "Status":       "Loading"
            }
        myDB.upsert("authors", newValueDict, controlValueDict)
Ejemplo n.º 35
0
    def find_book(self, bookid=None, queue=None):
        myDB = database.DBConnection()

        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(
            self.params)

        try:
            rootxml, in_cache = get_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting book")
                return
        except Exception as e:
            logger.error("Error finding book: %s" % e)
            return

        bookLanguage = rootxml.find('./book/language_code').text
        bookname = rootxml.find('./book/title').text

        if not bookLanguage:
            bookLanguage = "Unknown"
#
# PAB user has said they want this book, don't block for bad language, just warn
#
        valid_langs = ([
            valid_lang.strip()
            for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')
        ])
        if bookLanguage not in valid_langs:
            logger.debug('Book %s language does not match preference' %
                         bookname)

        if (rootxml.find('./book/publication_year').text is None):
            bookdate = "0000"
        else:
            bookdate = rootxml.find('./book/publication_year').text

        try:
            bookimg = rootxml.find('./book/img_url').text
            if 'assets/nocover' in bookimg:
                bookimg = 'images/nocover.png'
        except (KeyError, AttributeError):
            bookimg = 'images/nocover.png'

        authorname = rootxml.find('./book/authors/author/name').text
        bookdesc = rootxml.find('./book/description').text
        bookisbn = rootxml.find('./book/isbn').text
        bookpub = rootxml.find('./book/publisher').text
        booklink = rootxml.find('./book/link').text
        bookrate = float(rootxml.find('./book/average_rating').text)
        bookpages = rootxml.find('.book/num_pages').text

        name = authorname
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        booksub = ''
        bookname = unaccented(bookname)
        if ': ' in bookname:
            parts = bookname.split(': ', 1)
            bookname = parts[0]
            booksub = parts[1]

        dic = {':': '', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic)
        bookname = bookname.strip()  # strip whitespace
        booksub = replace_all(booksub, dic)
        booksub = booksub.strip()  # strip whitespace
        if booksub:
            series, seriesNum = bookSeries(booksub)
        else:
            series, seriesNum = bookSeries(bookname)

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": None,
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": None,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": bookLanguage,
            "Status": "Wanted",
            "BookAdded": today(),
            "Series": series,
            "SeriesNum": seriesNum
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.debug("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' %
                             (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

        elif bookimg and bookimg.startswith('http'):
            link = cache_cover(bookid, bookimg)
            if link is not None:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": link}
                myDB.upsert("books", newValueDict, controlValueDict)

        if seriesNum == None:
            #  try to get series info from librarything
            series, seriesNum = getWorkSeries(bookid)
            if seriesNum:
                logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"Series": series, "SeriesNum": seriesNum}
                myDB.upsert("books", newValueDict, controlValueDict)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Ejemplo n.º 36
0
    def find_book(self, bookid=None, queue=None):
        myDB = database.DBConnection()

        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params)

        try:
            rootxml, in_cache = get_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting book")
                return
        except Exception as e:
            logger.error("Error finding book: %s" % str(e))
            return

        bookLanguage = rootxml.find('./book/language_code').text
        bookname = rootxml.find('./book/title').text

        if not bookLanguage:
            bookLanguage = "Unknown"
        #
        # PAB user has said they want this book, don't block for unwanted language, just warn
        #
        valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
        if bookLanguage not in valid_langs:
            logger.debug('Book %s goodreads language does not match preference, %s' % (bookname, bookLanguage))

        if rootxml.find('./book/publication_year').text is None:
            bookdate = "0000"
        else:
            bookdate = rootxml.find('./book/publication_year').text

        try:
            bookimg = rootxml.find('./book/img_url').text
            if 'assets/nocover' in bookimg:
                bookimg = 'images/nocover.png'
        except (KeyError, AttributeError):
            bookimg = 'images/nocover.png'

        authorname = rootxml.find('./book/authors/author/name').text
        bookdesc = rootxml.find('./book/description').text
        bookisbn = rootxml.find('./book/isbn').text
        bookpub = rootxml.find('./book/publisher').text
        booklink = rootxml.find('./book/link').text
        bookrate = float(rootxml.find('./book/average_rating').text)
        bookpages = rootxml.find('.book/num_pages').text

        name = authorname
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match('SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID)
            if not match:
                match = myDB.match('SELECT AuthorID from authors WHERE AuthorName="%s"' %  author['authorname'])
                if match:
                    logger.debug('%s: Changing authorid from %s to %s' %
                                (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match['AuthorID']    # we have a different authorid for that authorname
                else:   # no author but request to add book, add author as "ignored"
                        # User hit "add book" button from a search
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": "Ignored"
                    }
                    myDB.upsert("authors", newValueDict, controlValueDict)
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" % (authorname, bookname))
            return

        bookname = unaccented(bookname)
        bookname, booksub = split_title(authorname, bookname)
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic).strip()
        booksub = replace_all(booksub, dic).strip()
        if booksub:
            series, seriesNum = bookSeries(booksub)
        else:
            series, seriesNum = bookSeries(bookname)

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": "",
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": bookLanguage,
            "Status": "Wanted",
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

        elif bookimg and bookimg.startswith('http'):
            link, success = cache_img("book", bookid, bookimg)
            if success:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": link}
                myDB.upsert("books", newValueDict, controlValueDict)
            else:
                logger.debug('Failed to cache image for %s' % bookimg)

        if lazylibrarian.CONFIG['ADD_SERIES']:
            # prefer series info from librarything
            seriesdict = getWorkSeries(bookid)
            if seriesdict:
                logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict))
            else:
                if series:
                    seriesdict = {cleanName(unaccented(series)): seriesNum}
            setSeries(seriesdict, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Ejemplo n.º 37
0
def addAuthorToDB(authorname=None,
                  refresh=False,
                  authorid=None,
                  addbooks=True):
    """
    Add an author to the database by name or id, and optionally get a list of all their books
    If author already exists in database, refresh their details and optionally booklist
    """
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "AddAuthorToDB"
    try:
        myDB = database.DBConnection()
        match = False
        authorimg = ''
        new_author = not refresh
        entry_status = ''

        if authorid:
            dbauthor = myDB.match("SELECT * from authors WHERE AuthorID=?",
                                  (authorid, ))
            if not dbauthor:
                authorname = 'unknown author'
                logger.debug("Adding new author id %s to database" % authorid)
                new_author = True
            else:
                entry_status = dbauthor['Status']
                authorname = dbauthor['authorname']
                logger.debug("Updating author %s " % authorname)
                new_author = False

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            GR = GoodReads(authorname)
            author = GR.get_author_info(authorid=authorid)
            if author:
                authorname = author['authorname']
                authorimg = author['authorimg']
                controlValueDict = {"AuthorID": authorid}
                newValueDict = {
                    "AuthorLink": author['authorlink'],
                    "DateAdded": today()
                }
                if not dbauthor or (dbauthor and not dbauthor['manual']):
                    newValueDict["AuthorName"] = author['authorname']
                    newValueDict["AuthorImg"] = author['authorimg']
                    newValueDict["AuthorBorn"] = author['authorborn']
                    newValueDict["AuthorDeath"] = author['authordeath']

                myDB.upsert("authors", newValueDict, controlValueDict)
                match = True
            else:
                logger.warn(u"Nothing found for %s" % authorid)
                if not dbauthor:
                    myDB.action('DELETE from authors WHERE AuthorID=?',
                                (authorid, ))

        if authorname and not match:
            authorname = ' '.join(
                authorname.split())  # ensure no extra whitespace
            GR = GoodReads(authorname)
            author = GR.find_author_id(refresh=refresh)

            query = "SELECT * from authors WHERE AuthorName=?"
            dbauthor = myDB.match(query, (authorname.replace("'", "''"), ))
            if author and not dbauthor:  # may have different name for same authorid (spelling?)
                query = "SELECT * from authors WHERE AuthorID=?"
                dbauthor = myDB.match(query, (author['authorid'], ))
                authorname = dbauthor['AuthorName']

            controlValueDict = {"AuthorName": authorname}

            if not dbauthor:
                newValueDict = {
                    "AuthorID": "0: %s" % authorname,
                    "Status": "Loading"
                }
                logger.debug("Now adding new author: %s to database" %
                             authorname)
                entry_status = lazylibrarian.CONFIG['NEWAUTHOR_STATUS']
                new_author = True
            else:
                newValueDict = {"Status": "Loading"}
                logger.debug("Now updating author: %s" % authorname)
                entry_status = dbauthor['Status']
                new_author = False
            myDB.upsert("authors", newValueDict, controlValueDict)

            if author:
                authorid = author['authorid']
                authorimg = author['authorimg']
                controlValueDict = {"AuthorName": authorname}
                newValueDict = {
                    "AuthorID": author['authorid'],
                    "AuthorLink": author['authorlink'],
                    "DateAdded": today(),
                    "Status": "Loading"
                }
                if not dbauthor or (dbauthor and not dbauthor['manual']):
                    newValueDict["AuthorImg"] = author['authorimg']
                    newValueDict["AuthorBorn"] = author['authorborn']
                    newValueDict["AuthorDeath"] = author['authordeath']

                myDB.upsert("authors", newValueDict, controlValueDict)
                match = True
            else:
                logger.warn(u"Nothing found for %s" % authorname)
                if not dbauthor:
                    myDB.action('DELETE from authors WHERE AuthorName=?',
                                (authorname, ))
                return
        if not match:
            logger.error(
                "AddAuthorToDB: No matching result for authorname or authorid")
            return

        # if author is set to manual, should we allow replacing 'nophoto' ?
        new_img = False
        match = myDB.match("SELECT Manual from authors WHERE AuthorID=?",
                           (authorid, ))
        if not match or not match['Manual']:
            if authorimg and 'nophoto' in authorimg:
                newimg = getAuthorImage(authorid)
                if newimg:
                    authorimg = newimg
                    new_img = True

        # allow caching
        if authorimg and authorimg.startswith('http'):
            newimg, success = cache_img("author",
                                        authorid,
                                        authorimg,
                                        refresh=refresh)
            if success:
                authorimg = newimg
                new_img = True
            else:
                logger.debug('Failed to cache image for %s' % authorimg)

        if new_img:
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"AuthorImg": authorimg}
            myDB.upsert("authors", newValueDict, controlValueDict)

        if addbooks:
            # audiostatus = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
            if new_author:
                bookstatus = lazylibrarian.CONFIG['NEWAUTHOR_STATUS']
            else:
                bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS']

            if entry_status not in ['Active', 'Wanted', 'Ignored', 'Paused']:
                entry_status = 'Active'  # default for invalid/unknown or "loading"
            # process books
            if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                book_api = GoogleBooks()
                book_api.get_author_books(authorid,
                                          authorname,
                                          bookstatus,
                                          entrystatus=entry_status,
                                          refresh=refresh)
            elif lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                book_api = GoodReads(authorname)
                book_api.get_author_books(authorid,
                                          authorname,
                                          bookstatus,
                                          entrystatus=entry_status,
                                          refresh=refresh)

            # update totals works for existing authors only.
            # New authors need their totals updating after libraryscan or import of books.
            if not new_author:
                update_totals(authorid)

            if new_author and lazylibrarian.CONFIG['GR_FOLLOWNEW']:
                res = grfollow(authorid, True)
                if res.startswith('Unable'):
                    logger.warn(res)
                try:
                    followid = res.split("followid=")[1]
                    logger.debug('%s marked followed' % authorname)
                except IndexError:
                    followid = ''
                myDB.action('UPDATE authors SET GRfollow=? WHERE AuthorID=?',
                            (followid, authorid))
        else:
            # if we're not loading any books, mark author as ignored
            entry_status = 'Ignored'

        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": entry_status}
        myDB.upsert("authors", newValueDict, controlValueDict)

        msg = "[%s] Author update complete, status %s" % (authorname,
                                                          entry_status)
        logger.info(msg)
        return msg
    except Exception:
        msg = 'Unhandled exception in addAuthorToDB: %s' % traceback.format_exc(
        )
        logger.error(msg)
        return msg
Ejemplo n.º 38
0
    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", refresh=False):
        try:
            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params)

            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
            except Exception as e:
                logger.error("Error fetching author books: %s" % str(e))
                return
            if rootxml is None:
                logger.debug("Error requesting author books")
                return
            if not in_cache:
                api_hits += 1
            resultxml = rootxml.getiterator('book')

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])

            resultsCount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0

            if not len(resultxml):
                logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid))
            else:
                logger.debug("[%s] Now processing books with GoodReads API" % authorname)
                logger.debug(u"url " + URL)

                authorNameResult = rootxml.find('./author/name').text
                # Goodreads sometimes puts extra whitepase in the author names!
                authorNameResult =  ' '.join(authorNameResult.split())
                logger.debug(u"GoodReads author name [%s]" % authorNameResult)
                loopCount = 1

                while resultxml:
                    for book in resultxml:
                        total_count += 1

                        if book.find('publication_year').text is None:
                            pubyear = "0000"
                        else:
                            pubyear = book.find('publication_year').text

                        try:
                            bookimg = book.find('image_url').text
                            if 'nocover' in bookimg:
                                bookimg = 'images/nocover.png'
                        except (KeyError, AttributeError):
                            bookimg = 'images/nocover.png'

                        bookLanguage = "Unknown"
                        find_field = "id"
                        isbn = ""
                        isbnhead = ""
                        if "All" not in valid_langs:  # do we care about language
                            if book.find('isbn').text:
                                find_field = "isbn"
                                isbn = book.find('isbn').text
                                isbnhead = isbn[0:3]
                            else:
                                if book.find('isbn13').text:
                                    find_field = "isbn13"
                                    isbn = book.find('isbn13').text
                                    isbnhead = isbn[3:6]
                            # Try to use shortcut of ISBN identifier codes described here...
                            # https://en.wikipedia.org/wiki/List_of_ISBN_identifier_groups
                            if isbnhead:
                                if find_field == "isbn13" and isbn.startswith('979'):
                                    for item in lazylibrarian.isbn_979_dict:
                                        if isbnhead.startswith(item):
                                            bookLanguage = lazylibrarian.isbn_979_dict[item]
                                            break
                                    if bookLanguage != "Unknown":
                                        logger.debug("ISBN979 returned %s for %s" % (bookLanguage, isbnhead))
                                elif (find_field == "isbn") or (find_field == "isbn13" and isbn.startswith('978')):
                                    for item in lazylibrarian.isbn_978_dict:
                                        if isbnhead.startswith(item):
                                            bookLanguage = lazylibrarian.isbn_978_dict[item]
                                            break
                                    if bookLanguage != "Unknown":
                                        logger.debug("ISBN978 returned %s for %s" % (bookLanguage, isbnhead))

                            if bookLanguage == "Unknown" and isbnhead:
                                # Nothing in the isbn dictionary, try any cached results
                                match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead)
                                if match:
                                    bookLanguage = match['lang']
                                    cache_hits += 1
                                    logger.debug("Found cached language [%s] for %s [%s]" %
                                                 (bookLanguage, find_field, isbnhead))
                                else:
                                    # no match in cache, try searching librarything for a language code using the isbn
                                    # if no language found, librarything return value is "invalid" or "unknown"
                                    # returns plain text, not xml
                                    BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn
                                    try:
                                        librarything_wait()
                                        resp = urllib2.urlopen(BOOK_URL, timeout=30).read()
                                        lt_lang_hits += 1
                                        logger.debug("LibraryThing reports language [%s] for %s" % (resp, isbnhead))

                                        if 'invalid' in resp or 'Unknown' in resp:
                                            bookLanguage = "Unknown"
                                        else:
                                            bookLanguage = resp  # found a language code
                                            myDB.action('insert into languages values ("%s", "%s")' %
                                                        (isbnhead, bookLanguage))
                                            logger.debug(u"LT language %s: %s" % (isbnhead, bookLanguage))
                                    except Exception as e:
                                        logger.error("Error finding LT language result for [%s], %s" % (isbn, str(e)))

                            if bookLanguage == "Unknown":
                                # still  no earlier match, we'll have to search the goodreads api
                                try:
                                    if book.find(find_field).text:
                                        BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \
                                                   book.find(find_field).text + '&' + urllib.urlencode(self.params)
                                        logger.debug(u"Book URL: " + BOOK_URL)

                                        time_now = int(time.time())
                                        if time_now <= lazylibrarian.LAST_GOODREADS:
                                            time.sleep(1)

                                        bookLanguage = ""
                                        try:
                                            BOOK_rootxml, in_cache = get_xml_request(BOOK_URL)
                                            if BOOK_rootxml is None:
                                                logger.debug('Error requesting book language code')
                                            else:
                                                if not in_cache:
                                                    # only update last_goodreads if the result wasn't found in the cache
                                                    lazylibrarian.LAST_GOODREADS = time_now
                                                try:
                                                    bookLanguage = BOOK_rootxml.find('./book/language_code').text
                                                except Exception as e:
                                                    logger.debug("Error finding language_code in book xml: %s" % str(e))
                                        except Exception as e:
                                            logger.debug("Error getting book xml: %s" % str(e))

                                        if not in_cache:
                                            gr_lang_hits += 1
                                        if not bookLanguage:
                                            bookLanguage = "Unknown"
                                            # At this point, give up?
                                            # WhatWork on author/title doesn't give us a language.
                                            # It might give us the "original language" of the book (but not always)
                                            # and our copy might not be in the original language anyway
                                            # eg "The Girl With the Dragon Tattoo" original language Swedish
                                            # If we have an isbn, try WhatISBN to get alternatives
                                            # in case any of them give us a language, but it seems if thinglang doesn't
                                            # have a language for the first isbn code, it doesn't for any of the
                                            # alternatives either
                                            # Goodreads search results don't include the language. Although sometimes
                                            # it's in the html page, it's not in the xml results

                                        if isbnhead != "":
                                            # if GR didn't give an isbn we can't cache it, just use language for this book
                                            myDB.action('insert into languages values ("%s", "%s")' %
                                                        (isbnhead, bookLanguage))
                                            logger.debug("GoodReads reports language [%s] for %s" %
                                                         (bookLanguage, isbnhead))
                                        else:
                                            not_cached += 1

                                        logger.debug(u"GR language: " + bookLanguage)
                                    else:
                                        logger.debug("No %s provided for [%s]" % (find_field, book.find('title').text))
                                        # continue

                                except Exception as e:
                                    logger.debug(u"Goodreads language search failed: %s" % str(e))

                            if bookLanguage not in valid_langs:
                                logger.debug('Skipped %s with language %s' % (book.find('title').text, bookLanguage))
                                ignored += 1
                                continue

                        bookname = book.find('title').text
                        bookid = book.find('id').text
                        bookdesc = book.find('description').text
                        bookisbn = book.find('isbn').text
                        bookpub = book.find('publisher').text
                        booklink = book.find('link').text
                        bookrate = float(book.find('average_rating').text)
                        bookpages = book.find('num_pages').text
                        bookname = unaccented(bookname)

                        bookname, booksub = split_title(authorNameResult, bookname)

                        dic = {':': '.', '"': ''}  # do we need to strip apostrophes , '\'': ''}
                        bookname = replace_all(bookname, dic)
                        bookname = bookname.strip()  # strip whitespace
                        booksub = replace_all(booksub, dic)
                        booksub = booksub.strip()  # strip whitespace
                        if booksub:
                            series, seriesNum = bookSeries(booksub)
                        else:
                            series, seriesNum = bookSeries(bookname)

                        rejected = False
                        check_status = False

                        if re.match('[^\w-]', bookname):  # reject books with bad characters in title
                            logger.debug(u"removed result [" + bookname + "] for bad characters")
                            removedResults += 1
                            rejected = True

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            if pubyear > today()[:4]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, pubyear))
                                removedResults += 1
                                rejected = True

                        if not rejected and not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' %
                                         (bookid, authorNameResult))
                            removedResults += 1
                            rejected = True

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName = "%s" COLLATE NOCASE and AuthorName = "%s" COLLATE NOCASE' % \
                                    (bookname, authorNameResult.replace('"', '""'))
                            match = myDB.match(cmd)
                            if match:
                                if match['BookID'] != bookid:
                                    # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorNameResult, bookname, bookid))
                                    duplicates += 1
                                    rejected = True

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=%s' % bookid
                            match = myDB.match(cmd)
                            if match:
                                # we have a book with this bookid already
                                if bookname != match['BookName'] or authorNameResult != match['AuthorName']:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                                 (bookid, authorNameResult, bookname,
                                                 match['AuthorName'], match['BookName']))
                                else:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                                 (bookid, authorNameResult, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = True

                        if check_status or not rejected:
                            existing_book = myDB.match('SELECT Status,Manual FROM books WHERE BookID = "%s"' % bookid)
                            if existing_book:
                                book_status = existing_book['Status']
                                locked = existing_book['Manual']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                book_status = bookstatus  # new_book status, or new_author status
                                locked = False

                            # Is the book already in the database?
                            # Leave alone if locked or status "ignore"
                            if not locked and book_status != "Ignored":
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": booksub,
                                    "BookDesc": bookdesc,
                                    "BookIsbn": bookisbn,
                                    "BookPub": bookpub,
                                    "BookGenre": "",
                                    "BookImg": bookimg,
                                    "BookLink": booklink,
                                    "BookRate": bookrate,
                                    "BookPages": bookpages,
                                    "BookDate": pubyear,
                                    "BookLang": bookLanguage,
                                    "Status": book_status,
                                    "BookAdded": today()
                                }

                                resultsCount += 1
                                updated = False

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug(u"Book found: " + book.find('title').text + " " + pubyear)

                                if 'nocover' in bookimg or 'nophoto' in bookimg:
                                    # try to get a cover from librarything
                                    workcover = getBookCover(bookid)
                                    if workcover:
                                        logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True

                                elif bookimg and bookimg.startswith('http'):
                                    link, success = cache_img("book", bookid, bookimg, refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug('Failed to cache image for %s' % bookimg)

                                seriesdict = {}
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    # prefer series info from librarything
                                    seriesdict = getWorkSeries(bookid)
                                    if seriesdict:
                                        logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict))
                                        updated = True
                                    else:
                                        if series:
                                            seriesdict = {cleanName(unaccented(series)): seriesNum}
                                    setSeries(seriesdict, bookid)

                                new_status = setStatus(bookid, seriesdict, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug(u"[%s] Added book: %s [%s] status %s" %
                                                (authorname, bookname, bookLanguage, book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug(u"[%s] Updated book: %s [%s] status %s" %
                                                (authorname, bookname, bookLanguage, book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1

                    loopCount += 1
                    URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \
                          urllib.urlencode(self.params) + '&page=' + str(loopCount)
                    resultxml = None
                    try:
                        rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
                        if rootxml is None:
                            logger.debug('Error requesting next page of results')
                        else:
                            resultxml = rootxml.getiterator('book')
                            if not in_cache:
                                api_hits += 1
                    except Exception as e:
                        resultxml = None
                        logger.error("Error finding next page of results: %s" % str(e))

                    if resultxml:
                        if all(False for _ in resultxml):  # returns True if iterator is empty
                            resultxml = None

            deleteEmptySeries()
            lastbook = myDB.match('SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID="%s" \
                                AND Status != "Ignored" order by BookDate DESC' % authorid)
            if lastbook:
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": "Active",
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }
            myDB.upsert("authors", newValueDict, controlValueDict)

            # This is here because GoodReads sometimes has several entries with the same BookID!
            modified_count = added_count + updated_count

            logger.debug("Found %s result%s" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored)))
            logger.debug(
                "Removed %s bad character or no-name result%s" %
                (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s" % (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s" % (modified_count, plural(modified_count)))

            myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' %
                        (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GR.get_author_books: %s' % traceback.format_exc())
Ejemplo n.º 39
0
    def find_book(self, bookid=None, queue=None):
        threading.currentThread().name = "GR-ADD-BOOK"
        myDB = database.DBConnection()

        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(
            self.params)

        try:
            rootxml, in_cache = self.get_request(URL)
        except Exception as e:
            logger.error("Error finding book: " + str(e))
            return

        bookLanguage = rootxml.find('./book/language_code').text
        bookname = rootxml.find('./book/title').text

        if not bookLanguage:
            bookLanguage = "Unknown"
#
# PAB user has said they want this book, don't block for bad language, just warn
#
        valid_langs = ([
            valid_lang.strip()
            for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')
        ])
        if bookLanguage not in valid_langs:
            logger.debug('Book %s language does not match preference' %
                         bookname)

        if (rootxml.find('./book/publication_year').text is None):
            bookdate = "0000"
        else:
            bookdate = rootxml.find('./book/publication_year').text

        try:
            bookimg = rootxml.find('./book/img_url').text
            if (bookimg ==
                    'http://www.goodreads.com/assets/nocover/111x148.png'):
                bookimg = 'images/nocover.png'
        except KeyError:
            bookimg = 'images/nocover.png'
        except AttributeError:
            bookimg = 'images/nocover.png'

        authorname = rootxml.find('./book/authors/author/name').text
        bookdesc = rootxml.find('./book/description').text
        bookisbn = rootxml.find('./book/isbn').text
        bookpub = rootxml.find('./book/publisher').text
        booklink = rootxml.find('./book/link').text
        bookrate = float(rootxml.find('./book/average_rating').text)
        bookpages = rootxml.find('.book/num_pages').text

        name = authorname
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        bookname = bookname.replace(':', '').replace('"', '').replace("'", "")
        bookname = unidecode(u'%s' % bookname)
        bookname = bookname.strip()  # strip whitespace

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": None,
            "BookName": bookname,
            "BookSub": None,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": None,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": bookLanguage,
            "Status": "Wanted",
            "BookAdded": formatter.today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.debug("%s added to the books database" % bookname)
Ejemplo n.º 40
0
    def get_author_books(self, authorid=None, authorname=None, refresh=False):

        api_hits = 0
        gr_lang_hits = 0
        lt_lang_hits = 0
        gb_lang_change = 0
        cache_hits = 0
        not_cached = 0
        URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params)

        # Artist is loading
        myDB = database.DBConnection()
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": "Loading"}
        myDB.upsert("authors", newValueDict, controlValueDict)
        books_dict = []
        try:
            rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
        except Exception as e:
            logger.error("Error fetching author books: %s" % e)
            return books_dict
        if rootxml is None:
            logger.debug("Error requesting author books")
            return books_dict
        if not in_cache:
            api_hits = api_hits + 1
        resultxml = rootxml.getiterator('book')

        valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])

        if not len(resultxml):
            logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid))
        else:
            logger.debug("[%s] Now processing books with GoodReads API" % authorname)

            resultsCount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            logger.debug(u"url " + URL)

            authorNameResult = rootxml.find('./author/name').text
            logger.debug(u"author name " + authorNameResult)
            loopCount = 1

            while resultxml is not None:
                for book in resultxml:
                    total_count = total_count + 1

                    if (book.find('publication_year').text is None):
                        pubyear = "0000"
                    else:
                        pubyear = book.find('publication_year').text

                    try:
                        bookimg = book.find('image_url').text
                        if ('nocover' in bookimg):
                            bookimg = 'images/nocover.png'
                    except (KeyError,AttributeError):
                        bookimg = 'images/nocover.png'

    # PAB this next section tries to get the book language using the isbn13 to look it up. If no isbn13 we skip the
    # book entirely, rather than including it with an "Unknown" language. Changed this so we can still include the book
    # with language set to "Unknown". There is a setting in config.ini to allow or skip books with "Unknown" language
    # if you really don't want to include them.
    # Not all GR books have isbn13 filled in, but all have a GR bookid, which we've already got, so use that.
    # Also, with GR API rules we can only call the API once per second, which slows us down a lot when all we want
    # is to get the language. We sleep for one second per book that GR knows about for each author you have in your
    # library. The libraryThing API has the same 1 second restriction, and is limited to 1000 hits per day, but has
    # fewer books with unknown language. To get around this and speed up the process, see if we already have a book
    # in the database with a similar start to the ISBN. The way ISBNs work, digits 3-5 of a 13 char ISBN or digits 0-2
    # of a 10 digit ISBN indicate the region/language so if two books have the same 3 digit isbn code, they _should_
    # be the same language.
    # I ran a simple python script on my library of 1500 books, and these codes were 100% correct on matching book
    # languages, no mis-matches. It did result in a small number of books with "unknown" language being wrongly matched
    # but most "unknown" were matched to the correct language.
    # We could look up ISBNs we already know about in the database, but this only holds books in the languages we want
    # to keep, which reduces the number of cache hits, so we create a new database table, holding ALL results including
    # the ISBNs for languages we don't want and books we reject.
    # The new table is created (if not exists) in init.py so by the time we get here there is an existing table.
    # If we haven't an already matching partial ISBN, look up language code from libraryThing
    # "http://www.librarything.com/api/thingLang.php?isbn=1234567890"
    # If you find a matching language, add it to the database.  If "unknown" or "invalid", try GR as maybe GR can
    # provide a match.
    # If both LT and GR return unknown, add isbn to db as "unknown". No point in repeatedly asking LT for a code
    # it's told you it doesn't know.
    # As an extra option, if language includes "All" in config.ini, we can skip this whole section and process
    # everything much faster by not querying for language at all.
    # It does mean we include a lot of unwanted foreign translations in the database, but it's _much_ faster.

                    bookLanguage = "Unknown"
                    find_field = "id"
                    isbn = ""
                    isbnhead = ""
                    if "All" not in valid_langs:  # do we care about language
                        if (book.find('isbn').text is not None):
                            find_field = "isbn"
                            isbn = book.find('isbn').text
                            isbnhead = isbn[0:3]
                        else:
                            if (book.find('isbn13').text is not None):
                                find_field = "isbn13"
                                isbn = book.find('isbn13').text
                                isbnhead = isbn[3:6]
                        if (find_field != 'id'):  # isbn or isbn13 found

                            match = myDB.action('SELECT lang FROM languages where isbn = "%s"' %
                                                (isbnhead)).fetchone()
                            if (match):
                                bookLanguage = match['lang']
                                cache_hits = cache_hits + 1
                                logger.debug("Found cached language [%s] for %s [%s]" %
                                             (bookLanguage, find_field, isbnhead))
                            else:
                                # no match in cache, try searching librarything for a language code using the isbn
                                # if no language found, librarything return value is "invalid" or "unknown"
                                # returns plain text, not xml
                                BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn
                                try:
                                    librarything_wait()
                                    resp = urllib2.urlopen(BOOK_URL, timeout=30).read()
                                    lt_lang_hits = lt_lang_hits + 1
                                    logger.debug("LibraryThing reports language [%s] for %s" % (resp, isbnhead))

                                    if ('invalid' in resp or 'Unknown' in resp):
                                        find_field = "id"  # reset the field to force search on goodreads
                                    else:
                                        bookLanguage = resp  # found a language code
                                        myDB.action('insert into languages values ("%s", "%s")' %
                                                    (isbnhead, bookLanguage))
                                        logger.debug(u"LT language %s: %s" % (isbnhead, bookLanguage))
                                except Exception as e:
                                    logger.error("Error finding LT language result for [%s], %s" % (isbn, e))
                                    find_field = "id"  # reset the field to search on goodreads

                        if (find_field == 'id'):
                            # [or bookLanguage == "Unknown"] no earlier match, we'll have to search the goodreads api
                            try:
                                if (book.find(find_field).text is not None):
                                    BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \
                                        book.find(find_field).text + '&' + urllib.urlencode(self.params)
                                    logger.debug(u"Book URL: " + BOOK_URL)

                                    try:
                                        time_now = int(time.time())
                                        if time_now <= lazylibrarian.LAST_GOODREADS:
                                            time.sleep(1)

                                        BOOK_rootxml, in_cache = get_xml_request(BOOK_URL)
                                        if BOOK_rootxml is None:
                                            logger.debug('Error requesting book language code')
                                            bookLanguage = ""
                                        else:
                                            if not in_cache:
                                                # only update last_goodreads if the result wasn't found in the cache
                                                lazylibrarian.LAST_GOODREADS = time_now
                                            bookLanguage = BOOK_rootxml.find('./book/language_code').text
                                    except Exception as e:
                                        logger.error("Error finding book results: %s" % e)
                                    if not in_cache:
                                        gr_lang_hits = gr_lang_hits + 1
                                    if not bookLanguage:
                                        bookLanguage = "Unknown"

                                    if (isbnhead != ""):
                                        # GR didn't give an isbn so we can't cache it, just use language for this book
                                        myDB.action('insert into languages values ("%s", "%s")' %
                                                    (isbnhead, bookLanguage))
                                        logger.debug("GoodReads reports language [%s] for %s" %
                                                     (bookLanguage, isbnhead))
                                    else:
                                        not_cached = not_cached + 1

                                    logger.debug(u"GR language: " + bookLanguage)
                                else:
                                    logger.debug("No %s provided for [%s]" % (find_field, book.find('title').text))
                                    # continue

                            except Exception as e:
                                logger.debug(u"An error has occured: %s" % e)

                        if bookLanguage not in valid_langs:
                            logger.debug('Skipped a book with language %s' % bookLanguage)
                            ignored = ignored + 1
                            continue
                    bookname = book.find('title').text
                    bookid = book.find('id').text
                    bookdesc = book.find('description').text
                    bookisbn = book.find('isbn').text
                    bookpub = book.find('publisher').text
                    booklink = book.find('link').text
                    bookrate = float(book.find('average_rating').text)
                    bookpages = book.find('num_pages').text
                    bookname = unaccented(bookname)
                    if ': ' in bookname:
                        parts = bookname.split(': ', 1)
                        bookname = parts[0]
                        booksub = parts[1]
                    else:
                        booksub = ''
                    dic = {':': '', '"': '', '\'': ''}
                    bookname = replace_all(bookname, dic)
                    bookname = bookname.strip()  # strip whitespace
                    booksub = replace_all(booksub, dic)
                    booksub = booksub.strip()  # strip whitespace
                    if booksub:
                        series,seriesNum = bookSeries(booksub)
                    else:
                        series,seriesNum = bookSeries(bookname)

                    # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions)
                    # and sometimes uses the same bookid if the book is the same but the title is slightly different
                    # We use bookid, then reject if another author/title has a different bookid so we just keep one...
                    find_book_status = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid)
                    if find_book_status:
                        for resulted in find_book_status:
                            book_status = resulted['Status']
                            locked = resulted ['Manual']
                    else:
                        book_status = lazylibrarian.NEWBOOK_STATUS
                        locked = False

                    rejected = False

                    if re.match('[^\w-]', bookname):  # reject books with bad characters in title
                        logger.debug(u"removed result [" + bookname + "] for bad characters")
                        removedResults = removedResults + 1
                        rejected = True

                    if not rejected and not bookname:
                        logger.debug('Rejecting bookid %s for %s, no bookname' %
                                (bookid, authorNameResult))
                        removedResults = removedResults + 1
                        rejected = True

                    if not rejected:
                        find_books = myDB.select('SELECT * FROM books WHERE BookName = "%s" and AuthorName = "%s"' %
                                                    (bookname, authorNameResult))
                        if find_books:
                            for find_book in find_books:
                                if find_book['BookID'] != bookid:
                                    # we have a book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                        (find_book['BookID'], authorNameResult, bookname, bookid))
                                    duplicates = duplicates + 1
                                    rejected = True
                                    break

                    if not rejected:
                        find_books = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid)
                        if find_books:
                            # we have a book with this bookid already
                            logger.debug('Rejecting bookid %s for [%s][%s] already got this bookid in database' %
                                (bookid, authorNameResult, bookname))
                            duplicates = duplicates + 1
                            rejected = True
                            break

                    if not rejected:
                        if book_status != "Ignored":
                            if not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorName": authorNameResult,
                                    "AuthorID": authorid,
                                    "AuthorLink": None,
                                    "BookName": bookname,
                                    "BookSub": booksub,
                                    "BookDesc": bookdesc,
                                    "BookIsbn": bookisbn,
                                    "BookPub": bookpub,
                                    "BookGenre": None,
                                    "BookImg": bookimg,
                                    "BookLink": booklink,
                                    "BookRate": bookrate,
                                    "BookPages": bookpages,
                                    "BookDate": pubyear,
                                    "BookLang": bookLanguage,
                                    "Status": book_status,
                                    "BookAdded": today(),
                                    "Series": series,
                                    "SeriesNum": seriesNum
                                }

                                resultsCount = resultsCount + 1

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug(u"Book found: " + book.find('title').text + " " + pubyear)

                            if 'nocover' in bookimg or 'nophoto' in bookimg:
                                # try to get a cover from librarything
                                workcover = getBookCover(bookid)
                                if workcover:
                                    logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": workcover}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            elif bookimg and bookimg.startswith('http'):
                                link = cache_cover(bookid, bookimg)
                                if link is not None:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": link}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            if seriesNum == None:
                                # try to get series info from librarything
                                series, seriesNum = getWorkSeries(bookid)
                                if seriesNum:
                                    logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {
                                        "Series": series,
                                        "SeriesNum": seriesNum
                                    }
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            worklink = getWorkPage(bookid)
                            if worklink:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {"WorkPage": worklink}
                                myDB.upsert("books", newValueDict, controlValueDict)

                            if not find_book_status:
                                logger.debug(u"[%s] Added book: %s" % (authorname, bookname))
                                added_count = added_count + 1
                            else:
                                logger.debug(u"[%s] Updated book: %s" % (authorname, bookname))
                                updated_count = updated_count + 1
                        else:
                            book_ignore_count = book_ignore_count + 1

                loopCount = loopCount + 1
                URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \
                      urllib.urlencode(self.params) + '&page=' + str(loopCount)
                resultxml = None
                try:
                    rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
                    if rootxml is None:
                        logger.debug('Error requesting next page of results')
                    else:
                        resultxml = rootxml.getiterator('book')
                        if not in_cache:
                            api_hits = api_hits + 1
                except Exception as e:
                    resultxml = None
                    logger.error("Error finding next page of results: %s" % e)

                if resultxml is not None:
                    if all(False for book in resultxml):  # returns True if iterator is empty
                        resultxml = None

        lastbook = myDB.action('SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \
                                AND Status != "Ignored" order by BookDate DESC' % authorid).fetchone()
        if lastbook:
            lastbookname = lastbook['BookName']
            lastbooklink = lastbook['BookLink']
            lastbookdate = lastbook['BookDate']
        else:
            lastbookname = None
            lastbooklink = None
            lastbookdate = None

        controlValueDict = {"AuthorID": authorid}
        newValueDict = {
            "Status": "Active",
            "LastBook": lastbookname,
            "LastLink": lastbooklink,
            "LastDate": lastbookdate
        }
        myDB.upsert("authors", newValueDict, controlValueDict)

        # This is here because GoodReads sometimes has several entries with the same BookID!
        modified_count = added_count + updated_count

        logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
        logger.debug("Removed %s bad language result%s for author" % (ignored, plural(ignored)))
        logger.debug("Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults)))
        logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates)))
        logger.debug("Ignored %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
        logger.debug("Imported/Updated %s book%s for author" % (modified_count, plural(modified_count)))

        myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' %
                    (authorname, api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                     cache_hits, ignored, removedResults, not_cached, duplicates))

        if refresh:
            logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                        (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
        else:
            logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                        (authorname, added_count, plural(added_count)))

        return books_dict
Ejemplo n.º 41
0
def addAuthorToDB(authorname=None):
    type = 'author'
    myDB = database.DBConnection()

    GR = GoodReads(authorname, type)
    GB = GoogleBooks(authorname, type)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id()
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.error("Nothing found")

# process books
    bookscount = 0
    books = GB.find_results()
    for book in books:

        # this is for rare cases where google returns multiple authors who share nameparts
        if book['authorname'] == authorname:

            controlValueDict = {"BookID": book['bookid']}
            newValueDict = {
                "AuthorName": book['authorname'],
                "AuthorID": authorid,
                "AuthorLink": authorimg,
                "BookName": book['bookname'],
                "BookDesc": book['bookdesc'],
                "BookIsbn": book['bookisbn'],
                "BookImg": book['bookimg'],
                "BookLink": book['booklink'],
                "BookRate": book['bookrate'],
                "BookPages": book['bookpages'],
                "BookDate": book['bookdate'],
                "BookLang": book['booklang'],
                "Status": "Skipped",
                "BookAdded": formatter.today()
            }

            myDB.upsert("books", newValueDict, controlValueDict)
            bookscount = bookscount + 1

    lastbook = myDB.action(
        "SELECT BookName, BookLink, BookDate from books WHERE AuthorName='%s' order by BookDate DESC"
        % authorname).fetchone()
    controlValueDict = {"AuthorName": authorname}
    newValueDict = {
        "Status": "Active",
        "TotalBooks": bookscount,
        "LastBook": lastbook['BookName'],
        "LastLink": lastbook['BookLink'],
        "LastDate": lastbook['BookDate']
    }

    myDB.upsert("authors", newValueDict, controlValueDict)
    logger.info("Processing complete: Added %s books to the database" %
                bookscount)
Ejemplo n.º 42
0
def addAuthorToDB(authorname=None, refresh=False):
    threading.currentThread().name = "DBIMPORT"

    myDB = database.DBConnection()

    GR = GoodReads(authorname)

    query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''")
    dbauthor = myDB.action(query).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorID": "0: %s" % (authorname),
            "Status": "Loading"
        }
        logger.debug("Now adding new author: %s to database" % authorname)
    else:
        newValueDict = {"Status": "Loading"}
        logger.debug("Now updating author: %s" % authorname)
    myDB.upsert("authors", newValueDict, controlValueDict)

    author = GR.find_author_id(refresh=refresh)
    if author:
        authorid = author['authorid']
        authorlink = author['authorlink']
        authorimg = author['authorimg']
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": authorid,
            "AuthorLink": authorlink,
            "AuthorImg": authorimg,
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
    else:
        logger.warn(u"Nothing found for %s" % authorname)
        myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname)
        return
# process books
    if lazylibrarian.BOOK_API == "GoogleBooks":
        book_api = GoogleBooks()
        book_api.get_author_books(authorid, authorname, refresh=refresh)
    elif lazylibrarian.BOOK_API == "GoodReads":
        GR.get_author_books(authorid, authorname, refresh=refresh)

    havebooks = myDB.action(
        'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' %
        authorname).fetchone()
    myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks['counter'], authorname))
    totalbooks = myDB.action(
        'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s"' % authorname).fetchone()        
    myDB.action('UPDATE authors set TotalBooks="%s" where AuthorName="%s"' % (totalbooks['counter'], authorname))
    unignoredbooks = myDB.action(
        'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' %
        authorname).fetchone()
    myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (unignoredbooks['counter'], authorname))

    logger.debug("[%s] Author update complete" % authorname)
Ejemplo n.º 43
0
def addAuthorToDB(authorname=None,
                  refresh=False,
                  authorid=None,
                  addbooks=True):
    """
    Add an author to the database by name or id, and optionally get a list of all their books
    If author already exists in database, refresh their details and optionally booklist
    """
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "AddAuthorToDB"
    try:
        myDB = database.DBConnection()
        match = False
        authorimg = ''
        new_author = not refresh
        if authorid:
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}

            dbauthor = myDB.match("SELECT * from authors WHERE AuthorID='%s'" %
                                  authorid)
            if not dbauthor:
                authorname = 'unknown author'
                logger.debug("Now adding new author id: %s to database" %
                             authorid)
                new_author = True
            else:
                authorname = dbauthor['authorname']
                logger.debug("Now updating author %s " % authorname)
                new_author = False

            myDB.upsert("authors", newValueDict, controlValueDict)

            GR = GoodReads(authorname)
            author = GR.get_author_info(authorid=authorid)
            if author:
                authorname = author['authorname']
                authorimg = author['authorimg']
                controlValueDict = {"AuthorID": authorid}
                newValueDict = {
                    "AuthorLink": author['authorlink'],
                    "DateAdded": today()
                }
                if not dbauthor or (dbauthor and not dbauthor['manual']):
                    newValueDict["AuthorName"] = author['authorname']
                    newValueDict["AuthorImg"] = author['authorimg']
                    newValueDict["AuthorBorn"] = author['authorborn']
                    newValueDict["AuthorDeath"] = author['authordeath']

                myDB.upsert("authors", newValueDict, controlValueDict)
                match = True
            else:
                logger.warn(u"Nothing found for %s" % authorid)
                if not dbauthor:
                    myDB.action('DELETE from authors WHERE AuthorID="%s"' %
                                authorid)

        if authorname and not match:
            authorname = ' '.join(
                authorname.split())  # ensure no extra whitespace
            GR = GoodReads(authorname)

            query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace(
                "'", "''")
            dbauthor = myDB.match(query)
            controlValueDict = {"AuthorName": authorname}

            if not dbauthor:
                newValueDict = {
                    "AuthorID": "0: %s" % authorname,
                    "Status": "Loading"
                }
                logger.debug("Now adding new author: %s to database" %
                             authorname)
                new_author = True
            else:
                newValueDict = {"Status": "Loading"}
                logger.debug("Now updating author: %s" % authorname)
                new_author = False
            myDB.upsert("authors", newValueDict, controlValueDict)

            author = GR.find_author_id(refresh=refresh)
            if author:
                authorid = author['authorid']
                authorimg = author['authorimg']
                controlValueDict = {"AuthorName": authorname}
                newValueDict = {
                    "AuthorID": author['authorid'],
                    "AuthorLink": author['authorlink'],
                    "DateAdded": today(),
                    "Status": "Loading"
                }
                if not dbauthor or (dbauthor and not dbauthor['manual']):
                    newValueDict["AuthorImg"] = author['authorimg']
                    newValueDict["AuthorBorn"] = author['authorborn']
                    newValueDict["AuthorDeath"] = author['authordeath']

                myDB.upsert("authors", newValueDict, controlValueDict)
                match = True
            else:
                logger.warn(u"Nothing found for %s" % authorname)
                if not dbauthor:
                    myDB.action('DELETE from authors WHERE AuthorName="%s"' %
                                authorname)
                return
        if not match:
            logger.error(
                "AddAuthorToDB: No matching result for authorname or authorid")
            return

        # if author is set to manual, should we allow replacing 'nophoto' ?
        new_img = False
        match = myDB.match("SELECT Manual from authors WHERE AuthorID='%s'" %
                           authorid)
        if not match or not match['Manual']:
            if authorimg and 'nophoto' in authorimg:
                newimg = getAuthorImage(authorid)
                if newimg:
                    authorimg = newimg
                    new_img = True

        # allow caching
        if authorimg and authorimg.startswith('http'):
            newimg, success = cache_img("author",
                                        authorid,
                                        authorimg,
                                        refresh=refresh)
            if success:
                authorimg = newimg
                new_img = True
            else:
                logger.debug('Failed to cache image for %s' % authorimg)

        if new_img:
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"AuthorImg": authorimg}
            myDB.upsert("authors", newValueDict, controlValueDict)

        if addbooks:
            if new_author:
                bookstatus = lazylibrarian.CONFIG['NEWAUTHOR_STATUS']
            else:
                bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS']

            # process books
            if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                book_api = GoogleBooks()
                book_api.get_author_books(authorid,
                                          authorname,
                                          bookstatus,
                                          refresh=refresh)
            elif lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                GR = GoodReads(authorname)
                GR.get_author_books(authorid,
                                    authorname,
                                    bookstatus,
                                    refresh=refresh)

            # update totals works for existing authors only.
            # New authors need their totals updating after libraryscan or import of books.
            if not new_author:
                update_totals(authorid)
        else:
            # if we're not loading any books, mark author as ignored
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Ignored"}
            myDB.upsert("authors", newValueDict, controlValueDict)

        msg = "[%s] Author update complete" % authorname
        logger.debug(msg)
        return msg
    except Exception:
        msg = 'Unhandled exception in addAuthorToDB: %s' % traceback.format_exc(
        )
        logger.error(msg)
        return msg
Ejemplo n.º 44
0
    def get_author_books(self,
                         authorid=None,
                         authorname=None,
                         bookstatus="Skipped",
                         refresh=False):
        try:
            logger.debug('[%s] Now processing books with Google Books API' %
                         authorname)
            # google doesnt like accents in author names
            set_url = self.url + urllib.quote(
                'inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urllib.urlencode(self.params)

                    try:
                        jsonresults, in_cache = get_json_request(
                            URL, useCache=not refresh)
                        if not jsonresults:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except HTTPError as err:
                        logger.warn(
                            'Google Books API Error [%s]: Check your API key or wait a while'
                            % err.reason)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' %
                                     (number_results, plural(number_results),
                                      authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1

                        # skip if no author, no author is no book.
                        try:
                            _ = item['volumeInfo']['authors'][0]
                        except KeyError:
                            logger.debug(
                                'Skipped a result without authorfield.')
                            continue

                        try:
                            if item['volumeInfo']['industryIdentifiers'][0][
                                    'type'] == 'ISBN_10':
                                bookisbn = item['volumeInfo'][
                                    'industryIdentifiers'][0]['identifier']
                            else:
                                bookisbn = ""
                        except KeyError:
                            bookisbn = ""

                        isbnhead = ""
                        if len(bookisbn) == 10:
                            isbnhead = bookisbn[0:3]
                        elif len(bookisbn) == 13:
                            isbnhead = bookisbn[3:6]

                        try:
                            booklang = item['volumeInfo']['language']
                        except KeyError:
                            booklang = "Unknown"

                        # do we care about language?
                        if "All" not in valid_langs:
                            if bookisbn != "":
                                # seems google lies to us, sometimes tells us books
                                # are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match(
                                        'SELECT lang FROM languages where isbn = "%s"'
                                        % isbnhead)
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug(
                                            "Found cached language [%s] for [%s]"
                                            % (booklang, isbnhead))
                                        match = True
                                    if not match:
                                        # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(
                                                    bookisbn
                                            ) == 13 and bookisbn.startswith(
                                                    '979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(
                                                            lang):
                                                        booklang = lazylibrarian.isbn_979_dict[
                                                            lang]
                                                        logger.debug(
                                                            "ISBN979 returned %s for %s"
                                                            % (booklang,
                                                               isbnhead))
                                                        match = True
                                                        break

                                            elif (len(bookisbn) == 10) or \
                                                    (len(bookisbn) == 13 and bookisbn.startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(
                                                            lang):
                                                        booklang = lazylibrarian.isbn_978_dict[
                                                            lang]
                                                        logger.debug(
                                                            "ISBN979 returned %s for %s"
                                                            % (booklang,
                                                               isbnhead))
                                                        match = True
                                                        break

                                            if match:
                                                myDB.action(
                                                    'insert into languages values ("%s", "%s")'
                                                    % (isbnhead, booklang))
                                                logger.debug(u"GB language: " +
                                                             booklang)

                                    if not match:
                                        # try searching librarything for a language code using the isbn
                                        # if no language found, librarything return value is "invalid" or "unknown"
                                        # librarything returns plain text, not xml
                                        BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + bookisbn
                                        try:
                                            librarything_wait()
                                            resp = urllib2.urlopen(
                                                BOOK_URL, timeout=30).read()
                                            lt_lang_hits += 1
                                            logger.debug(
                                                "LibraryThing reports language [%s] for %s"
                                                % (resp, isbnhead))

                                            if resp != 'invalid' and resp != 'unknown':
                                                booklang = resp  # found a language code
                                                match = True
                                                myDB.action(
                                                    'insert into languages values ("%s", "%s")'
                                                    % (isbnhead, booklang))
                                                logger.debug(u"LT language: " +
                                                             booklang)
                                        except Exception as e:
                                            booklang = ""
                                            logger.error(
                                                "Error finding language: %s" %
                                                str(e))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in [
                                                "en-US", "en-GB", "eng"
                                        ]:
                                            # these are all english, may need to expand this list
                                            booknamealt = item['volumeInfo'][
                                                'title']
                                            logger.debug(
                                                "%s Google thinks [%s], we think [%s]"
                                                % (booknamealt, googlelang,
                                                   booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                booknamealt = item['volumeInfo']['title']
                                logger.debug('Skipped [%s] with language %s' %
                                             (booknamealt, booklang))
                                ignored += 1
                                continue

                        try:
                            bookpub = item['volumeInfo']['publisher']
                        except KeyError:
                            bookpub = ""

                        try:
                            booksub = item['volumeInfo']['subtitle']
                        except KeyError:
                            booksub = ""

                        if not booksub:
                            series = ""
                            seriesNum = ""
                        else:
                            try:
                                series = booksub.split('(')[1].split(
                                    ' Series ')[0]
                            except IndexError:
                                series = ""
                            if series.endswith(')'):
                                series = series[:-1]
                            try:
                                seriesNum = booksub.split('(')[1].split(
                                    ' Series ')[1].split(')')[0]
                                if seriesNum[0] == '#':
                                    seriesNum = seriesNum[1:]
                            except IndexError:
                                seriesNum = ""

                            if not seriesNum and '#' in series:
                                words = series.rsplit('#', 1)
                                series = words[0].strip()
                                seriesNum = words[1].strip()
                            if not seriesNum and ' ' in series:
                                words = series.rsplit(' ', 1)
                                # has to be unicode for isnumeric()
                                if (u"%s" % words[1]).isnumeric():
                                    series = words[0]
                                    seriesNum = words[1]

                        try:
                            bookdate = item['volumeInfo']['publishedDate']
                        except KeyError:
                            bookdate = '0000-00-00'

                        try:
                            bookimg = item['volumeInfo']['imageLinks'][
                                'thumbnail']
                        except KeyError:
                            bookimg = 'images/nocover.png'

                        try:
                            bookrate = item['volumeInfo']['averageRating']
                        except KeyError:
                            bookrate = 0

                        try:
                            bookpages = item['volumeInfo']['pageCount']
                        except KeyError:
                            bookpages = 0

                        try:
                            bookgenre = item['volumeInfo']['categories'][0]
                        except KeyError:
                            bookgenre = ""

                        try:
                            bookdesc = item['volumeInfo']['description']
                        except KeyError:
                            bookdesc = ""

                        bookname = item['volumeInfo']['title']
                        bookname = unaccented(bookname)
                        dic = {':': '.', '"': '', '\'': ''}
                        bookname = replace_all(bookname, dic)
                        bookname = bookname.strip()  # strip whitespace

                        booklink = item['volumeInfo']['canonicalVolumeLink']
                        bookrate = float(bookrate)
                        bookid = item['id']

                        # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions)
                        # and sometimes uses the same bookid if the book is the same but the title is slightly different
                        #
                        # Not sure if googlebooks does too, but we only want one...
                        existing_book = myDB.match(
                            'SELECT Status,Manual FROM books WHERE BookID = "%s"'
                            % bookid)
                        if existing_book:
                            book_status = existing_book['Status']
                            locked = existing_book['Manual']
                            if locked is None:
                                locked = False
                            elif locked.isdigit():
                                locked = bool(int(locked))
                        else:
                            book_status = bookstatus  # new_book status, or new_author status
                            locked = False

                        rejected = False
                        check_status = False

                        if re.match(
                                '[^\w-]', bookname
                        ):  # remove books with bad characters in title
                            logger.debug(
                                "[%s] removed book for bad characters" %
                                bookname)
                            removedResults += 1
                            rejected = True

                        if not rejected and not bookname:
                            logger.debug(
                                'Rejecting bookid %s for %s, no bookname' %
                                (bookid, authorname))
                            removedResults += 1
                            rejected = True

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if bookdate > today()[:len(bookdate)]:
                                logger.debug(
                                    'Rejecting %s, future publication date %s'
                                    % (bookname, bookdate))
                                removedResults += 1
                                rejected = True

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName = "%s" COLLATE NOCASE and AuthorName = "%s" COLLATE NOCASE'% \
                                    (bookname.replace('"', '""'), authorname.replace('"', '""'))
                            match = myDB.match(cmd)
                            if match:
                                if match['BookID'] != bookid:
                                    # we have a different book with this author/title already
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got %s'
                                        % (match['BookID'], authorname,
                                           bookname, bookid))
                                    rejected = True
                                    duplicates += 1

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID="%s"' % bookid
                            match = myDB.match(cmd)
                            if match:
                                # we have a book with this bookid already
                                if bookname != match[
                                        'BookName'] or authorname != match[
                                            'AuthorName']:
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]'
                                        % (bookid, authorname, bookname,
                                           match['AuthorName'],
                                           match['BookName']))
                                else:
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got this book in database'
                                        % (bookid, authorname, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = True

                        if check_status or not rejected:
                            if book_status != "Ignored" and not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": booksub,
                                    "BookDesc": bookdesc,
                                    "BookIsbn": bookisbn,
                                    "BookPub": bookpub,
                                    "BookGenre": bookgenre,
                                    "BookImg": bookimg,
                                    "BookLink": booklink,
                                    "BookRate": bookrate,
                                    "BookPages": bookpages,
                                    "BookDate": bookdate,
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "BookAdded": today()
                                }
                                resultcount += 1

                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                                logger.debug(u"Book found: " + bookname + " " +
                                             bookdate)
                                updated = False
                                if 'nocover' in bookimg or 'nophoto' in bookimg:
                                    # try to get a cover from librarything
                                    workcover = getBookCover(bookid)
                                    if workcover:
                                        logger.debug(
                                            u'Updated cover for %s to %s' %
                                            (bookname, workcover))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict,
                                                    controlValueDict)
                                        updated = True

                                elif bookimg and bookimg.startswith('http'):
                                    link, success = cache_img("book",
                                                              bookid,
                                                              bookimg,
                                                              refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict,
                                                    controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug(
                                            'Failed to cache image for %s' %
                                            bookimg)

                                seriesdict = {}
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    # prefer series info from librarything
                                    seriesdict = getWorkSeries(bookid)
                                    if seriesdict:
                                        logger.debug(
                                            u'Updated series: %s [%s]' %
                                            (bookid, seriesdict))
                                        updated = True
                                    else:
                                        # librarything doesn't have series info. Any in the title?
                                        if series:
                                            seriesdict = {
                                                cleanName(unaccented(series)):
                                                seriesNum
                                            }
                                    setSeries(seriesdict, bookid)

                                new_status = setStatus(bookid, seriesdict,
                                                       bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)

                                if not existing_book:
                                    logger.debug(
                                        "[%s] Added book: %s [%s] status %s" %
                                        (authorname, bookname, booklang,
                                         book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug(
                                        "[%s] Updated book: %s [%s] status %s"
                                        % (authorname, bookname, booklang,
                                           book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug(
                '[%s] The Google Books API was hit %s time%s to populate book list'
                % (authorname, api_hits, plural(api_hits)))

            lastbook = myDB.match(
                'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID="%s" \
                               AND Status != "Ignored" order by BookDate DESC'
                % authorid)

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": "Active",
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)

            logger.debug("Found %s total book%s for author" %
                         (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s for author" %
                         (ignored, plural(ignored)))
            logger.debug(
                "Removed %s bad character or no-name result%s for author" %
                (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s for author" %
                         (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" %
                         (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" %
                         (resultcount, plural(resultcount)))

            myDB.action(
                'insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)'
                % (authorname.replace('"', '""'), api_hits, gr_lang_hits,
                   lt_lang_hits, gb_lang_change, cache_hits, ignored,
                   removedResults, not_cached, duplicates))

            if refresh:
                logger.info(
                    "[%s] Book processing complete: Added %s book%s / Updated %s book%s"
                    % (authorname, added_count, plural(added_count),
                       updated_count, plural(updated_count)))
            else:
                logger.info(
                    "[%s] Book processing complete: Added %s book%s to the database"
                    % (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' %
                         traceback.format_exc())
Ejemplo n.º 45
0
    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped",
                         entrystatus='Active', refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' % authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = gb_json_request(URL, useCache=not refresh)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1
                        book = bookdict(item)
                        # skip if no author, no author is no book.
                        if not book['author']:
                            logger.debug('Skipped a result without authorfield.')
                            continue

                        isbnhead = ""
                        if len(book['isbn']) == 10:
                            isbnhead = book['isbn'][0:3]
                        elif len(book['isbn']) == 13:
                            isbnhead = book['isbn'][3:6]

                        booklang = book['lang']
                        # do we care about language?
                        if "All" not in valid_langs:
                            if book['isbn']:
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(book['isbn']) == 13 and book['isbn'].startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_979_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            elif (len(book['isbn']) == 10) or \
                                                    (len(book['isbn']) == 13 and book['isbn'].startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_978_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                myDB.action('insert into languages values (?, ?)',
                                                            (isbnhead, booklang))

                                    if not match:
                                        booklang = thingLang(book['isbn'])
                                        lt_lang_hits += 1
                                        if booklang:
                                            match = True
                                            myDB.action('insert into languages values (?, ?)', (isbnhead, booklang))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]:
                                            # these are all english, may need to expand this list
                                            logger.debug("%s Google thinks [%s], we think [%s]" %
                                                         (book['name'], googlelang, booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                logger.debug('Skipped [%s] with language %s' % (book['name'], booklang))
                                ignored += 1
                                continue

                        rejected = 0
                        check_status = False
                        book_status = bookstatus  # new_book status, or new_author status
                        audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
                        added = today()
                        locked = False
                        existing_book = None
                        bookname = book['name']
                        bookid = item['id']
                        if not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname))
                            removedResults += 1
                            rejected = 1
                        else:
                            bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip()
                            # GoodReads sometimes has multiple bookids for the same book (same author/title, different
                            # editions) and sometimes uses the same bookid if the book is the same but the title is
                            # slightly different. Not sure if googlebooks does too, but we only want one...
                            cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?'
                            existing_book = myDB.match(cmd, (bookid,))
                            if existing_book:
                                book_status = existing_book['Status']
                                audio_status = existing_book['AudioStatus']
                                locked = existing_book['Manual']
                                added = existing_book['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                if rejected in [3, 4, 5]:
                                    book_status = 'Ignored'
                                    audio_status = 'Ignored'
                                else:
                                    book_status = bookstatus  # new_book status, or new_author status
                                    audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
                                added = today()
                                locked = False

                        if not rejected and re.match('[^\w-]', bookname):  # remove books with bad characters in title
                            logger.debug("[%s] removed book for bad characters" % bookname)
                            removedResults += 1
                            rejected = 2

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if book['date'] > today()[:len(book['date'])]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, book['date']))
                                removedResults += 1
                                rejected = 3

                        if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']:
                            if not book['date']:
                                logger.debug('Rejecting %s, no publication date' % bookname)
                                removedResults += 1
                                rejected = 4

                        if not rejected and lazylibrarian.CONFIG['NO_ISBN']:
                            if not isbnhead:
                                logger.debug('Rejecting %s, no isbn' % bookname)
                                removedResults += 1
                                rejected = 5

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""')))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorname, bookname, bookid))
                                    rejected = 6
                                    duplicates += 1

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                            match = myDB.match(cmd, (bookid,))
                            if match:  # we have a book with this bookid already
                                if bookname != match['BookName'] or authorname != match['AuthorName']:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                                 (bookid, authorname, bookname, match['AuthorName'], match['BookName']))
                                else:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                                 (bookid, authorname, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = 7

                        if check_status or not rejected or (
                                lazylibrarian.CONFIG['IMP_IGNORE'] and rejected in [3, 4, 5]):  # dates, isbn
                            if not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": book['sub'],
                                    "BookDesc": book['desc'],
                                    "BookIsbn": book['isbn'],
                                    "BookPub": book['pub'],
                                    "BookGenre": book['genre'],
                                    "BookImg": book['img'],
                                    "BookLink": book['link'],
                                    "BookRate": float(book['rate']),
                                    "BookPages": book['pages'],
                                    "BookDate": book['date'],
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": audio_status,
                                    "BookAdded": added
                                }
                                resultcount += 1

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug("Book found: " + bookname + " " + book['date'])
                                updated = False
                                if 'nocover' in book['img'] or 'nophoto' in book['img']:
                                    # try to get a cover from another source
                                    workcover, source = getBookCover(bookid)
                                    if workcover:
                                        logger.debug('Updated cover for %s using %s' % (bookname, source))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True

                                elif book['img'] and book['img'].startswith('http'):
                                    link, success, _ = cache_img("book", bookid, book['img'], refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug('Failed to cache image for %s' % book['img'])

                                serieslist = []
                                if book['series']:
                                    serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))]
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    newserieslist = getWorkSeries(bookid)
                                    if newserieslist:
                                        serieslist = newserieslist
                                        logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
                                        updated = True
                                setSeries(serieslist, bookid)

                                new_status = setStatus(bookid, serieslist, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug("[%s] Added book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug("[%s] Updated book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' %
                         (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid,))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)

            logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored)))
            logger.debug("Removed %s bad character or no-name result%s for author" %
                         (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount)))

            myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                        (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
Ejemplo n.º 46
0
    def find_book(bookid=None, queue=None):
        myDB = database.DBConnection()
        if not lazylibrarian.CONFIG['GB_API']:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
              str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API']
        jsonresults, in_cache = get_json_request(URL)

        if not jsonresults:
            logger.debug('No results found for %s' % bookid)
            return

        bookname = jsonresults['volumeInfo']['title']
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug('Book %s does not contain author field, skipping' %
                         bookname)
            return
        try:
            # warn if language is in ignore list, but user said they wanted this book
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            if booklang not in valid_langs and 'All' not in valid_langs:
                logger.debug(
                    'Book %s googlebooks language does not match preference, %s'
                    % (bookname, booklang))
        except KeyError:
            logger.debug('Book does not have language field')
            booklang = "Unknown"

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = ""

        series = ""
        seriesNum = ""
        try:
            booksub = jsonresults['volumeInfo']['subtitle']
            try:
                series = booksub.split('(')[1].split(' Series ')[0]
            except IndexError:
                series = ""
            try:
                seriesNum = booksub.split('(')[1].split(' Series ')[1].split(
                    ')')[0]
                if seriesNum[0] == '#':
                    seriesNum = seriesNum[1:]
            except IndexError:
                seriesNum = ""
        except KeyError:
            booksub = ""

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = ""

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = ""

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'identifier']
            else:
                bookisbn = ""
        except KeyError:
            bookisbn = ""

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        GR = GoodReads(authorname)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match(
                'SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID)
            if not match:
                match = myDB.match(
                    'SELECT AuthorID from authors WHERE AuthorName="%s"' %
                    author['authorname'])
                if match:
                    logger.debug(
                        '%s: Changing authorid from %s to %s' %
                        (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match[
                        'AuthorID']  # we have a different authorid for that authorname
                else:  # no author but request to add book, add author as "ignored"
                    # User hit "add book" button from a search
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": "Ignored"
                    }
                    myDB.upsert("authors", newValueDict, controlValueDict)
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" %
                        (authorname, bookname))
            return

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' %
                             (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif bookimg and bookimg.startswith('http'):
                link, success = cache_img("book", bookid, bookimg)
                if success:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    logger.debug('Failed to cache image for %s' % bookimg)

        if lazylibrarian.CONFIG['ADD_SERIES']:
            # prefer series info from librarything
            seriesdict = getWorkSeries(bookid)
            if seriesdict:
                logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict))
            else:
                if series:
                    seriesdict = {cleanName(unaccented(series)): seriesNum}
            setSeries(seriesdict, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Ejemplo n.º 47
0
def processDir(reset=False):
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "POSTPROCESS"
        processpath = lazylibrarian.DIRECTORY('Download')

        logger.debug('Checking [%s] for files to post process' % processpath)

        try:
            downloads = os.listdir(processpath)
        except OSError as why:
            logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror))
            return

        myDB = database.DBConnection()
        snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

        if len(snatched) == 0:
            logger.info('Nothing marked as snatched.')
            scheduleJob(action='Stop', target='processDir')
            return

        if len(downloads) == 0:
            logger.info('No downloads are found. Nothing to process yet.')
            return

        logger.info("Checking %s download%s for %s snatched file%s" %
                    (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched))))
        ppcount = 0
        for book in snatched:
            # if torrent, see if we can get current status from the downloader as the name
            # may have been changed once magnet resolved, or download started or completed
            # depending on torrent downloader. Usenet doesn't change the name. We like usenet.
            torrentname = ''
            try:
                logger.debug("%s was sent to %s" % (book['NZBtitle'], book['Source']))
                if book['Source'] == 'TRANSMISSION':
                    torrentname = transmission.getTorrentFolder(book['DownloadID'])
                elif book['Source'] == 'UTORRENT':
                    torrentname = utorrent.nameTorrent(book['DownloadID'])
                elif book['Source'] == 'RTORRENT':
                    torrentname = rtorrent.getName(book['DownloadID'])
                elif book['Source'] == 'QBITTORRENT':
                    torrentname = qbittorrent.getName(book['DownloadID'])
                elif book['Source'] == 'SYNOLOGY_TOR':
                    torrentname = synology.getName(book['DownloadID'])
                elif book['Source'] == 'DELUGEWEBUI':
                    torrentname = deluge.getTorrentFolder(book['DownloadID'])
                elif book['Source'] == 'DELUGERPC':
                    client = DelugeRPCClient(lazylibrarian.DELUGE_HOST,
                                             int(lazylibrarian.DELUGE_PORT),
                                             lazylibrarian.DELUGE_USER,
                                             lazylibrarian.DELUGE_PASS)
                    try:
                        client.connect()
                        result = client.call('core.get_torrent_status', book['DownloadID'], {})
                        #    for item in result:
                        #        logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                        if 'name' in result:
                            torrentname = unaccented_str(result['name'])
                    except Exception as e:
                        logger.debug('DelugeRPC failed %s' % str(e))
            except Exception as e:
                logger.debug("Failed to get updated torrent name from %s for %s: %s" %
                            (book['Source'], book['DownloadID'], str(e)))

            matchtitle = unaccented_str(book['NZBtitle'])
            if torrentname and torrentname != matchtitle:
                logger.debug("%s Changing [%s] to [%s]" % (book['Source'], matchtitle, torrentname))
                myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl = "%s"' % (torrentname, book['NZBurl']))
                matchtitle = torrentname

            # here we could also check percentage downloaded or eta or status?
            # If downloader says it hasn't completed, no need to look for it.

            matches = []
            logger.info('Looking for %s in %s' % (matchtitle, processpath))
            for fname in downloads:
                # skip if failed before or incomplete torrents, or incomplete btsync
                extn = os.path.splitext(fname)[1]
                if extn not in ['.fail', '.part', '.bts', '.!ut']:
                    # This is to get round differences in torrent filenames.
                    # Usenet is ok, but Torrents aren't always returned with the name we searched for
                    # We ask the torrent downloader for the torrent name, but don't always get an answer
                    # so we try to do a "best match" on the name, there might be a better way...
                    if isinstance(fname, str):
                        matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                    else:
                        matchname = fname
                    if ' LL.(' in matchname:
                        matchname = matchname.split(' LL.(')[0]

                    match = 0
                    if matchtitle:
                        if ' LL.(' in matchtitle:
                            matchtitle = matchtitle.split(' LL.(')[0]
                        match = fuzz.token_set_ratio(matchtitle, matchname)
                    if match and match >= lazylibrarian.DLOAD_RATIO:
                        fname = matchname
                        if os.path.isfile(os.path.join(processpath, fname)):
                            # handle single file downloads here. Book/mag file in download root.
                            # move the file into it's own subdirectory so we don't move/delete things that aren't ours
                            logger.debug('filename [%s] is a file' % os.path.join(processpath, fname))
                            if is_valid_booktype(fname, booktype="book") \
                                    or is_valid_booktype(fname, booktype="mag"):
                                logger.debug('filename [%s] is a valid book/mag' % os.path.join(processpath, fname))
                                if bts_file(processpath):
                                    logger.debug("Skipping %s, found a .bts file" % processpath)
                                else:
                                    fname = os.path.splitext(fname)[0]
                                    dirname = os.path.join(processpath, fname)
                                    if not os.path.exists(dirname):
                                        try:
                                            os.makedirs(dirname)
                                            setperm(dirname)
                                        except OSError as why:
                                            logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror))
                                    if os.path.exists(dirname):
                                        # move the book and any related files too
                                        # ie other book formats, or opf, jpg with same title
                                        # can't move metadata.opf or cover.jpg or similar
                                        # as can't be sure they are ours
                                        # not sure if we need a new listdir here, or whether we can use the old one
                                        list_dir = os.listdir(processpath)
                                        for ourfile in list_dir:
                                            if ourfile.startswith(fname):
                                                if is_valid_booktype(ourfile, booktype="book") \
                                                    or is_valid_booktype(ourfile, booktype="mag") \
                                                        or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']:
                                                    try:
                                                        if lazylibrarian.DESTINATION_COPY:
                                                            shutil.copyfile(os.path.join(processpath, ourfile),
                                                                            os.path.join(dirname, ourfile))
                                                            setperm(os.path.join(dirname, ourfile))
                                                        else:
                                                            shutil.move(os.path.join(processpath, ourfile),
                                                                        os.path.join(dirname, ourfile))
                                                            setperm(os.path.join(dirname, ourfile))
                                                    except Exception as why:
                                                        logger.debug("Failed to copy/move file %s to %s, %s" %
                                                                    (ourfile, dirname, str(why)))

                        pp_path = os.path.join(processpath, fname)
                        if os.path.isdir(pp_path):
                            logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, matchtitle))
                            if not os.listdir(pp_path):
                                logger.debug("Skipping %s, folder is empty" % pp_path)
                            elif bts_file(pp_path):
                                logger.debug("Skipping %s, found a .bts file" % pp_path)
                            else:
                                matches.append([match, pp_path, book])
                    else:
                        pp_path = os.path.join(processpath, fname)
                        matches.append([match, pp_path, book])  # so we can report closest match
                else:
                    logger.debug('Skipping %s' % fname)

            match = 0
            if matches:
                highest = max(matches, key=lambda x: x[0])
                match = highest[0]
                pp_path = highest[1]
                book = highest[2]
            if match and match >= lazylibrarian.DLOAD_RATIO:
                logger.debug(u'Found match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle']))
                data = myDB.match('SELECT * from books WHERE BookID="%s"' % book['BookID'])
                if data:  # it's a book
                    logger.debug(u'Processing book %s' % book['BookID'])
                    authorname = data['AuthorName']
                    bookname = data['BookName']
                    if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                        logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                        lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')
                    # Default destination path, should be allowed change per config file.
                    dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = unaccented(global_name)
                    # dest_path = authorname+'/'+bookname
                    # global_name = bookname + ' - ' + authorname
                    # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    dest_path = unaccented_str(replace_all(dest_path, dic))
                    dest_path = os.path.join(processpath, dest_path).encode(lazylibrarian.SYS_ENCODING)
                else:
                    data = myDB.match('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                    if data:  # it's a magazine
                        logger.debug(u'Processing magazine %s' % book['BookID'])
                        # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                        # files are downloading, there will be an error in post-processing, trying to go to the
                        # same directory.
                        mostrecentissue = data['IssueDate']  # keep for processing issues arriving out of order
                        # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                        mag_name = unaccented_str(replace_all(book['BookID'], dic))
                        # book auxinfo is a cleaned date, eg 2015-01-01
                        dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                            '$IssueDate', book['AuxInfo']).replace('$Title', mag_name)

                        if lazylibrarian.MAG_RELATIVE:
                            if dest_path[0] not in '._':
                                dest_path = '_' + dest_path
                            dest_path = os.path.join(processpath, dest_path).encode(
                                lazylibrarian.SYS_ENCODING)
                        else:
                            dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                        authorname = None
                        bookname = None
                        global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                            '$Title', mag_name)
                        global_name = unaccented(global_name)
                    else:  # not recognised
                        logger.debug('Nothing in database matching "%s"' % book['BookID'])
                        continue
            else:
                logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
                if match:
                    logger.debug(u'Closest match (%s%%): %s' % (match, pp_path))
                    #for match in matches:
                    #    logger.info('Match: %s%%  %s' % (match[0], match[1]))
                continue

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

            if processBook:
                logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
                # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
                controlValueDict = {"BookID": book['BookID'], "NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Processed", "NZBDate": now()}  # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)

                if bookname:
                    # it's a book, if None it's a magazine
                    if len(lazylibrarian.IMP_CALIBREDB):
                        logger.debug('Calibre should have created the extras for us')
                    else:
                        processExtras(myDB, dest_path, global_name, data)
                else:
                    # update mags
                    controlValueDict = {"Title": book['BookID']}
                    if mostrecentissue:
                        if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit():
                            older = int(mostrecentissue) > int(book['AuxInfo'])  # issuenumber
                        else:
                            older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                    else:
                        older = False
                    if older:  # check this in case processing issues arriving out of order
                        newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"}
                    else:
                        newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(),
                                        "IssueStatus": "Open"}
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    # dest_path is where we put the magazine after processing, but we don't have the full filename
                    # so look for any "book" in that directory
                    dest_file = book_file(dest_path, booktype='mag')
                    controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                    newValueDict = {"IssueAcquired": today(),
                                    "IssueFile": dest_file,
                                    "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                    }
                    myDB.upsert("issues", newValueDict, controlValueDict)

                    # create a thumbnail cover for the new issue
                    create_cover(dest_file)

                # calibre or ll copied/moved the files we want, now delete source files

                to_delete = True
                if book['NZBmode'] in ['torrent', 'magnet']:
                    # Only delete torrents if we don't want to keep seeding
                    if lazylibrarian.KEEP_SEEDING:
                        logger.warn('%s is seeding %s %s' % (book['Source'], book['NZBmode'], book['NZBtitle']))
                        to_delete = False
                    else:
                        # ask downloader to delete the torrent, but not the files
                        # we may delete them later, depending on other settings
                        if book['DownloadID'] != "unknown":
                            logger.debug('Removing %s from %s' % (book['NZBtitle'], book['Source'].lower()))
                            delete_task(book['Source'], book['DownloadID'], False)
                        else:
                            logger.warn("Unable to remove %s from %s, no DownloadID" %
                                (book['NZBtitle'], book['Source'].lower()))

                if to_delete:
                    # only delete the files if not in download root dir and if DESTINATION_COPY not set
                    if not lazylibrarian.DESTINATION_COPY and (pp_path != processpath):
                        if os.path.isdir(pp_path):
                            # calibre might have already deleted it?
                            try:
                                shutil.rmtree(pp_path)
                            except Exception as why:
                                logger.debug("Unable to remove %s, %s" % (pp_path, str(why)))

                logger.info('Successfully processed: %s' % global_name)
                ppcount = ppcount + 1
                notify_download("%s from %s at %s" % (global_name, book['NZBprov'], now()))
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s.fail' % pp_path)
                controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Failed", "NZBDate": now()}
                myDB.upsert("wanted", newValueDict, controlValueDict)
                # if it's a book, reset status so we try for a different version
                # if it's a magazine, user can select a different one from pastissues table
                if bookname:
                    myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID'])

                # at this point, as it failed we should move it or it will get postprocessed
                # again (and fail again)
                try:
                    os.rename(pp_path, pp_path + '.fail')
                except Exception as e:
                    logger.debug("Unable to rename %s, %s" % (pp_path, str(e)))

        downloads = os.listdir(processpath)  # check in case we processed/deleted some above
        for directory in downloads:
            dname, extn = os.path.splitext(directory)
            if "LL.(" in dname and extn not in ['.fail', '.part', '.bts', '.!ut']:
                bookID = str(directory).split("LL.(")[1].split(")")[0]
                logger.debug("Book with id: " + str(bookID) + " found in download directory")
                pp_path = os.path.join(processpath, directory)

                if os.path.isfile(pp_path):
                    pp_path = os.path.join(processpath)

                if (os.path.isdir(pp_path)):
                    if import_book(pp_path, bookID):
                        ppcount = ppcount + 1

        if ppcount == 0:
            logger.info('No snatched books/mags have been found')
        else:
            logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount)))

        # Now check for any that are still marked snatched...
        if lazylibrarian.TASK_AGE:
            snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')
            if len(snatched) > 0:
                for snatch in snatched:
                    # FUTURE: we could check percentage downloaded or eta?
                    # if percentage is increasing, it's just slow
                    try:
                        when_snatched = time.strptime(snatch['NZBdate'], '%Y-%m-%d %H:%M:%S')
                        when_snatched = time.mktime(when_snatched)
                        diff = time.time() - when_snatched  # time difference in seconds
                    except:
                        diff = 0
                    hours = int(diff / 3600)
                    if hours >= lazylibrarian.TASK_AGE:
                        logger.warn('%s was sent to %s %s hours ago, deleting failed task' %
                                    (snatch['NZBtitle'], snatch['Source'].lower(), hours))
                        # change status to "Failed", and ask downloader to delete task and files
                        if snatch['BookID'] != 'unknown':
                            myDB.action('UPDATE wanted SET Status="Failed" WHERE BookID="%s"' % snatch['BookID'])
                            myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % snatch['BookID'])
                            delete_task(snatch['Source'], snatch['DownloadID'], True)
        if reset:
            scheduleJob(action='Restart', target='processDir')

    except Exception as e:
        logger.error('Unhandled exception in processDir: %s' % traceback.format_exc())
Ejemplo n.º 48
0
def searchbook(books=None):

    # rename this thread
    threading.currentThread().name = "SEARCHBOOKS"
    myDB = database.DBConnection()
    searchlist = []
    searchlist1 = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"')

        # Clear cache
        if os.path.exists(".ProviderCache"):
            for f in os.listdir(".ProviderCache"):
                os.unlink("%s/%s" % (".ProviderCache", f))

        # Clearing throttling timeouts
        t = SimpleCache.ThrottlingProcessor()
        t.lastRequestTime.clear()
    else:
        # The user has added a new book
        searchbooks = []
        for book in books:
            searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']])
            for terms in searchbook:
                searchbooks.append(terms)

    for searchbook in searchbooks:
        bookid = searchbook[0]
        author = searchbook[1]
        book = searchbook[2]

        dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':'', ';':''}
        dicSearchFormatting = {'.':' +', ' + ':' '}

        author = formatter.latinToAscii(formatter.replace_all(author, dic))
        book = formatter.latinToAscii(formatter.replace_all(book, dic))

        # TRY SEARCH TERM just using author name and book type
        author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting))
        searchterm1 = author # + ' ' + lazylibrarian.EBOOK_TYPE 
        searchterm1 = re.sub('[\.\-\/]', ' ', searchterm1).encode('utf-8')
        searchterm1 = re.sub(r'\(.*?\)', '', searchterm1).encode('utf-8')
        searchterm1 = re.sub(r"\s\s+" , " ", searchterm1) # strip any double white space
        searchlist.append({"bookid": bookid, "bookName":searchbook[2], "authorName":searchbook[1], "searchterm": searchterm1.strip()})

    if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE:
        logger.info('No download method is set, use SABnzbd or blackhole')

    if not lazylibrarian.NEWZNAB and not lazylibrarian.NEWZNAB2:
        logger.info('No providers are set. use NEWZNAB.')

    counter = 0
    for book in searchlist:
        resultlist = []
        if lazylibrarian.NEWZNAB:
            logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST)
            resultlist = providers.NewzNab(book, "1")

        if lazylibrarian.NEWZNAB2:
            logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST2)
            resultlist += providers.NewzNab(book, "2")

        if not resultlist:
            logger.debug("Adding book %s to queue." % book['searchterm'])

        else:
            dictrepl = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '(':'', ')':'', '[':'', ']':'', '#':'', '0':'', '1':'', '2':'', '3':'', '4':'', '5':'', '6':'', '7':'', '8':'' , '9':'', '\'':'', ':':'', '!':'', '-':'', '\s\s':' ', ' the ':' ', ' a ':' ', ' and ':' ', ' to ':' ', ' of ':' ', ' for ':' ', ' my ':' ', ' in ':' ', ' at ':' ', ' with ':' ' }
            bookName = book['bookName']
            bookID = book['bookid']
            bookName = re.sub('[\.\-\/]', ' ', bookName)
            bookName = re.sub(r'\(.*?\)', '', bookName)
            bookName = formatter.latinToAscii(formatter.replace_all(bookName.lower(), dictrepl)).strip()
            logger.debug(u'bookName %s' % bookName)
            addedCounter = 0

            for nzb in resultlist:
				nzbTitle = formatter.latinToAscii(formatter.replace_all(str(nzb['nzbtitle']).lower(), dictrepl)).strip()
				logger.debug(u'nzbName %s' % nzbTitle)
				logger.debug("NZB Match %: " + str(fuzz.partial_ratio(bookName, nzbTitle)))	
				if (fuzz.partial_ratio(bookName, nzbTitle) > 80):
					logger.debug(u'FOUND %s' % nzbTitle.lower())
					addedCounter = addedCounter + 1
					bookid = nzb['bookid']
					nzbTitle = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + bookID + ')').strip()
					nzburl = nzb['nzburl']
					nzbprov = nzb['nzbprov']

					controlValueDict = {"NZBurl": nzburl}
					newValueDict = {
                        "NZBprov": nzbprov,
                        "BookID": bookid,
                        "NZBdate": formatter.today(),
                        "NZBtitle": nzbTitle,
                        "Status": "Skipped"
					}
					myDB.upsert("wanted", newValueDict, controlValueDict)

					snatchedbooks = myDB.action('SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone()
					if not snatchedbooks:
						snatch = DownloadMethod(bookid, nzbprov, nzbTitle, nzburl)
					break;
            if addedCounter == 0:
            	logger.info("No nzb's found for " + (book["authorName"] + ' ' + bookName).strip() + ". Adding book to queue.")
        counter = counter + 1
Ejemplo n.º 49
0
def searchbook(books=None):

    # rename this thread
    threading.currentThread().name = "SEARCHBOOKS"
    myDB = database.DBConnection()
    searchlist = []
    searchlist1 = []

    if books is None:
        searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"')
    else:
        searchbooks = []
        for book in books:
            searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']])
            for terms in searchbook:
                searchbooks.append(terms)

    for searchbook in searchbooks:
        bookid = searchbook[0]
        author = searchbook[1]
        book = searchbook[2]

        dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':''}
        dicSearchFormatting = {' ':' +', '.':' +', ' + ':' '}
        dicSearchFormatting1 = {' + ':' '}

        author = formatter.latinToAscii(formatter.replace_all(author, dic))
        book = formatter.latinToAscii(formatter.replace_all(book, dic))
		
		#OLD SEARCH TERM
        searchterm = author + ' ' + book + ' ' + lazylibrarian.EBOOK_TYPE
        searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8')
        searchterm = re.sub(r"\s\s+" , " ", searchterm) # strip any double white space
        searchlist.append({"bookid": bookid, "bookName":searchbook[2], "authorName":searchbook[1], "searchterm": searchterm.strip()})

        # TRY A SECCOND SEARCH TERM just using author name and book type
        author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting))
        author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting1))
        searchterm1 = '+' + author + ' +' + lazylibrarian.EBOOK_TYPE 
        searchterm1 = re.sub('[\.\-\/]', ' ', searchterm1).encode('utf-8')
        searchterm1 = re.sub(r'\(.*?\)', '', searchterm1).encode('utf-8')
        searchterm1 = re.sub(r"\s\s+" , " ", searchterm1) # strip any double white space
        searchlist.append({"bookid": bookid, "bookName":searchbook[2], "authorName":searchbook[1], "searchterm": searchterm1.strip()})

    if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE:
        logger.info('No download method is set, use SABnzbd or blackhole')

    if not lazylibrarian.NEWZNAB and not lazylibrarian.NZBMATRIX:
        logger.info('No providers are set. use NEWZNAB or NZBMATRIX')

    counter = 0
    for book in searchlist:
        resultlist = []
        if lazylibrarian.NEWZNAB and not resultlist:
            logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST)
            resultlist = providers.NewzNab(book)

        if lazylibrarian.NZBMATRIX and not resultlist:
            logger.debug('Searching NZB at provider NZBMatrix ...')
            resultlist = providers.NZBMatrix(book)

        if not resultlist:
            logger.debug("Adding book %s to queue." % book['searchterm'])

        else:
            dictrepl = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '(':'', ')':'', '[':'', ']':'', '#':'', '0':'', '1':'', '2':'', '3':'', '4':'', '5':'', '6':'', '7':'', '8':'' , '9':'', '\'':'', ':':'', '\s\s':' ' }
            bookName = book['bookName']
            bookName = re.sub('[\.\-\/]', ' ', bookName)
            bookName = re.sub(r'\(.*?\)', '', bookName)
            bookName = formatter.latinToAscii(formatter.replace_all(bookName, dictrepl)).strip()
            logger.debug(u'bookName %s' % bookName)
            addedCounter = 0

            for nzb in resultlist:
				nzbTitle = formatter.latinToAscii(formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip()
				logger.debug(u'nzbName %s' % nzbTitle)
				nameFound = 0
				
				bookNameList = bookName.split()

				for word in bookNameList:
					if nzbTitle.lower().find(word.lower()) == -1:
						nameFound = -1
						
				if nameFound == 0:
					logger.debug(u'FOUND %s' % nzbTitle.lower())
					addedCounter = addedCounter + 1
					bookid = nzb['bookid']
					nzbTitle = (book["authorName"] + ' ' + bookName).strip()
					nzburl = nzb['nzburl']
					nzbprov = nzb['nzbprov']

					controlValueDict = {"NZBurl": nzburl}
					newValueDict = {
                        "NZBprov": nzbprov,
                        "BookID": bookid,
                        "NZBdate": formatter.today(),
                        "NZBtitle": nzbTitle,
                        "Status": "Skipped"
					}
					myDB.upsert("wanted", newValueDict, controlValueDict)

					snatchedbooks = myDB.action('SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone()
					if not snatchedbooks:
						snatch = DownloadMethod(bookid, nzbprov, nzbTitle, nzburl)
					time.sleep(1)
            if addedCounter == 0:
            	logger.info("No nzb's found for " + (book["authorName"] + ' ' + bookName).strip() + ". Adding book to queue.")
        counter = counter + 1
Ejemplo n.º 50
0
def processDir():
    # rename this thread
    threading.currentThread().name = "POSTPROCESS"

    processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    # TODO - try exception on os.listdir - it throws debug level
    # exception if dir doesn't exist - bloody hard to catch
    try:
        downloads = os.listdir(processpath)
    except OSError:
        logger.error('Could not access [%s] directory ' % processpath)
        return False
        
    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if snatched is None:
        logger.info('No books are snatched. Nothing to process.')
    elif downloads is None:
        logger.info('No downloads are found. Nothing to process.')
    else:
        ppcount = 0
        for book in snatched:
            if book['NZBtitle'] in downloads:
                pp_path = os.path.join(processpath, book['NZBtitle'])
                logger.debug('Found book/mag folder %s.' % pp_path)

                data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID'])
                if data:
                    authorname = data[0]['AuthorName']
                    bookname = data[0]['BookName']
                    
                    # Default destination path, should be allowed change per config file.
                    dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname)
                    global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname)
                    # dest_path = authorname+'/'+bookname
                    # global_name = bookname + ' - ' + authorname
                    dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING)
                else:
                    data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                    if data:
                        # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                        # files are downloading, there will be an error in post-processing, trying to go to the
                        # same directory.
                        mostrecentissue = data[0]['IssueDate'] # keep this for processing issues arriving out of order
                        dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', book['BookID'])
                        # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                        if lazylibrarian.MAG_RELATIVE:
                            if dest_path[0] not in '._':
                                dest_path = '_' + dest_path
                            dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING)
                        else:
                            dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                        authorname = None
                        bookname = None
                        global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace('$Title', book['BookID'])
                        # global_name = book['AuxInfo']+' - '+title
                    else:
                        logger.debug("Snatched magazine %s is not in download directory" % (book['BookID']))
                        continue                    
            else:
                logger.debug("Snatched NZB %s is not in download directory" % (book['NZBtitle']))
                continue

            dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
            dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic))
            try:
                os.chmod(dest_path, 0777)
            except Exception, e:
                logger.debug("Could not chmod post-process directory: " + str(dest_path))

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name, book['BookID'])

            if processBook:

                ppcount = ppcount + 1

                # update nzbs
                controlValueDict = {"NZBurl": book['NZBurl']}
                newValueDict = {"Status": "Processed", "NZBDate": formatter.today()} # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)
                    
                if bookname is not None: # it's a book, if None it's a magazine
                    processExtras(myDB, dest_path, global_name, data)
                else: 
                    # update mags
                    controlValueDict = {"Title": book['BookID']}
                    if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order
                        newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"}
                    else:    
                        newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"}
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    # dest_path is where we put the magazine after processing, but we don't have the full filename
                    # so look for any "book" in that directory
                    dest_file = book_file(dest_path)
                    controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                    newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file}
                    myDB.upsert("issues", newValueDict, controlValueDict)
                                    
                logger.info('Successfully processed: %s' % global_name)
                notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now())
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s' % pp_path)
        #
        # TODO Seems to be duplication here. Can we just scan once for snatched books 
        # instead of scan for snatched and then scan for directories with "LL.(bookID)" in?
        # Should there be any directories with "LL.(bookID)" that aren't in snatched?
        # Maybe this was put in for manually downloaded books?
        #  
        downloads = os.listdir(processpath) # check in case we processed/deleted some above      
        for directory in downloads:
            if "LL.(" in directory:
                bookID = str(directory).split("LL.(")[1].split(")")[0]
                logger.debug("Book with id: " + str(bookID) + " is in downloads")
                pp_path = os.path.join(processpath, directory)

                if os.path.isfile(pp_path):
                    pp_path = os.path.join(processpath) 

                if (os.path.isdir(pp_path)):
                    logger.debug('Found LL folder %s.' % pp_path)
                if import_book(pp_path, bookID):
                    ppcount = ppcount + 1
        if ppcount:
            logger.info('%s books/mags have been processed.' % ppcount)
        else:
            logger.info('No snatched books/mags have been found')
Ejemplo n.º 51
0
    def find_book(self, bookid=None, queue=None):
        threading.currentThread().name = "GB-ADD-BOOK"
        myDB = database.DBConnection()
        if not lazylibrarian.GB_API:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
            str(bookid) + "?key=" + lazylibrarian.GB_API
        jsonresults, in_cache = self.get_request(URL)

        if jsonresults is None:
            logger.debug('No results found for %s' % bookname)
            return

        bookname = jsonresults['volumeInfo']['title']
        bookname = bookname.replace(':', '').replace('"', '').replace("'", "")
        bookname = unidecode(u'%s' % bookname)
        bookname = bookname.strip()  # strip whitespace

        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug(
                'Book %s does not contain author field, skipping' %
                bookname)
            return
        try:
            # warn if language is in ignore list, but user said they wanted
            # this book
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = ([valid_lang.strip()
                           for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])
            if booklang not in valid_langs:
                logger.debug(
                    'Book %s language does not match preference' %
                    bookname)
        except KeyError:
            logger.debug('Book does not have language field')
            booklang = "Unknown"

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = None

        try:
            booksub = jsonresults['volumeInfo']['subtitle']
            try:
                series = booksub.split('(')[1].split(' Series ')[0]
            except IndexError:
                series = None
            try:
                seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0]
                if seriesNum[0] == '#':
                    seriesNum = seriesNum[1:]
            except IndexError:
                seriesNum = None
        except KeyError:
            booksub = None

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = None

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = None

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo'][
                    'industryIdentifiers'][0]['identifier']
            else:
                bookisbn = None
        except KeyError:
            bookisbn = None

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        name = jsonresults['volumeInfo']['authors'][0]
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": "",
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": formatter.today(),
            "Series": series,
            "SeriesNum": seriesNum
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.debug("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = bookwork.getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))    
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)
         
            elif bookimg.startswith('http'):
                link = bookwork.cache_cover(bookid, bookimg)
                if link is not None:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)

        if seriesNum == None:
            # try to get series info from librarything
            series, seriesNum = bookwork.getWorkSeries(bookid)
            if seriesNum:
                logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))    
                controlValueDict = {"BookID": bookid}
                newValueDict = {
                    "Series": series,
                    "SeriesNum": seriesNum
                }
                myDB.upsert("books", newValueDict, controlValueDict)

        worklink = bookwork.getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Ejemplo n.º 52
0
def get_capabilities(provider):
    """
    query provider for caps if none loaded yet, or if config entry is too old and not set manually. 
    """
    match = False
    if len(provider['UPDATED']) == 10:  # any stored values?
        match = True
        if (formatter.age(provider['UPDATED']) >
                lazylibrarian.CACHE_AGE) and not provider['MANUAL']:
            logger.debug('Stored capabilities for %s are too old' %
                         provider['HOST'])
            match = False

    if match:
        logger.debug('Using stored capabilities for %s' % provider['HOST'])
    else:
        host = provider['HOST']
        if not str(host)[:4] == "http":
            host = 'http://' + host
        URL = host + '/api?t=caps&apikey=' + provider['API']
        logger.debug('Requesting capabilities for %s' % URL)

        request = urllib2.Request(URL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST,
                              lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', common.USER_AGENT)
        resp = ""
        try:
            resp = urllib2.urlopen(request, timeout=30)  # don't get stuck
        except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e:
            logger.debug("Error getting capabilities: %s" % e)
            resp = ""
        if resp:
            if str(resp.getcode()).startswith("2"):  # (200 OK etc)
                logger.debug(u"Got capabilities for %s" %
                             request.get_full_url())
                try:
                    source_xml = resp.read()  # .decode('utf-8')
                    data = ElementTree.fromstring(source_xml)
                except:
                    logger.debug(u"Error getting xml from %s" % URL)
                    data = None
                if len(data):
                    logger.debug(u"Parsing xml for capabilities of %s" % URL)

                    #############################################################################
                    # book search isn't mentioned in the caps xml returned by
                    # nzbplanet,jackett,oznzb,usenet-crawler, so we can't use it as a test
                    # but the newznab+ ones usually support t=book and categories in 7000 range
                    # whereas nZEDb ones don't support t=book and use categories in 8000 range
                    # also some providers give searchtype but no supportedparams, so we still
                    # can't tell what queries will be accepted
                    # also category names can be lowercase or Mixed, magazine subcat name isn't
                    # consistent, and subcat can be just subcat or category/subcat subcat > lang
                    # eg "Magazines" "Mags" or "Books/Magazines" "Mags > French"
                    # Load all languages for now as we don't know which the user might want
                    #############################################################################
                    #
                    #  set some defaults
                    #
                    provider['GENERALSEARCH'] = ''
                    provider['EXTENDED'] = '1'
                    provider['BOOKCAT'] = ''
                    provider['MAGCAT'] = ''
                    provider['BOOKSEARCH'] = ''
                    provider['MAGSEARCH'] = ''
                    #
                    search = data.find('searching/search')
                    if search is not None:
                        if 'available' in search.attrib:
                            if search.attrib['available'] == 'yes':
                                provider['GENERALSEARCH'] = 'search'
                    categories = data.getiterator('category')
                    for cat in categories:
                        if 'name' in cat.attrib:
                            if cat.attrib['name'].lower() == 'books':
                                bookcat = cat.attrib[
                                    'id']  # keep main bookcat for later
                                provider['BOOKCAT'] = bookcat
                                provider['MAGCAT'] = ''
                                if provider['BOOKCAT'] == '7000':
                                    # looks like newznab+, should support book-search
                                    provider['BOOKSEARCH'] = 'book'
                                    # but check in case
                                    search = data.find('searching/book-search')
                                    if search is not None:
                                        if 'available' in search.attrib:
                                            if search.attrib[
                                                    'available'] == 'yes':
                                                provider['BOOKSEARCH'] = 'book'
                                            else:
                                                provider['BOOKSEARCH'] = ''
                                else:
                                    # looks like nZEDb, probably no book-search
                                    provider['BOOKSEARCH'] = ''
                                    # but check in case
                                    search = data.find('searching/book-search')
                                    if search is not None:
                                        if 'available' in search.attrib:
                                            if search.attrib[
                                                    'available'] == 'yes':
                                                provider['BOOKSEARCH'] = 'book'
                                            else:
                                                provider['BOOKSEARCH'] = ''
                                subcats = cat.getiterator('subcat')
                                for subcat in subcats:
                                    if 'ebook' in subcat.attrib['name'].lower(
                                    ):
                                        provider['BOOKCAT'] = "%s,%s" % (
                                            provider['BOOKCAT'],
                                            subcat.attrib['id'])
                                    if 'magazines' in subcat.attrib[
                                            'name'].lower(
                                            ) or 'mags' in subcat.attrib[
                                                'name'].lower():
                                        if provider['MAGCAT']:
                                            provider['MAGCAT'] = "%s,%s" % (
                                                provider['MAGCAT'],
                                                subcat.attrib['id'])
                                        else:
                                            provider['MAGCAT'] = subcat.attrib[
                                                'id']
                                # if no specific magazine subcategory, use books
                                if not provider['MAGCAT']:
                                    provider['MAGCAT'] = bookcat
                    logger.debug("Categories: Books %s : Mags %s" %
                                 (provider['BOOKCAT'], provider['MAGCAT']))
                    provider['UPDATED'] = formatter.today()
                else:
                    logger.warn(
                        u"Unable to get capabilities for %s: No data returned"
                        % URL)
            else:
                logger.warn(u"Unable to get capabilities for %s: Got %s" %
                            (URL, resp.getcode()))
    return provider
Ejemplo n.º 53
0
    def get_author_books(self, authorid=None, authorname=None, refresh=False):
        books_dict = []
        set_url = self.url + urllib.quote('inauthor:' + '"' + authorname + '"')
        URL = set_url + '&' + urllib.urlencode(self.params)

        api_hits = 0
        logger.info('[%s] Now processing books with Google Books API' %
                    authorname)

        #Artist is loading
        myDB = database.DBConnection()
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": "Loading"}
        myDB.upsert("authors", newValueDict, controlValueDict)

        try:
            startindex = 0
            resultcount = 0
            removedResults = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0

            while True:

                self.params['startIndex'] = startindex
                URL = set_url + '&' + urllib.urlencode(self.params)

                try:
                    jsonresults = json.JSONDecoder().decode(
                        urllib2.urlopen(URL, timeout=30).read())
                    api_hits = api_hits + 1
                    number_results = jsonresults['totalItems']
                    logger.debug('[%s] Searching url: %s' % (authorname, URL))
                    if number_results == 0:
                        logger.info('Found no results for %s with value: %s' %
                                    (api_value, self.name))
                        break
                    else:
                        pass
                except HTTPError, err:
                    logger.Error(
                        'Google API returned HTTP Error - probably time/rate limiting - [%s]'
                        % err.msg)

                startindex = startindex + 40

                for item in jsonresults['items']:

                    total_count = total_count + 1

                    # skip if no author, no author is no book.
                    try:
                        Author = item['volumeInfo']['authors'][0]
                    except KeyError:
                        logger.debug('Skipped a result without authorfield.')
                        continue

                    try:
                        #skip if language is in ignore list
                        booklang = item['volumeInfo']['language']
                        valid_langs = ([
                            valid_lang.strip() for valid_lang in
                            lazylibrarian.IMP_PREFLANG.split(',')
                        ])
                        if booklang not in valid_langs:
                            logger.debug('Skipped a book with language %s' %
                                         booklang)
                            ignored = ignored + 1
                            continue
                    except KeyError:
                        ignored = ignored + 1
                        logger.debug(
                            'Skipped a result where no language is found')
                        continue

                    try:
                        bookpub = item['volumeInfo']['publisher']
                    except KeyError:
                        bookpub = None

                    try:
                        booksub = item['volumeInfo']['subtitle']
                    except KeyError:
                        booksub = None

                    try:
                        bookdate = item['volumeInfo']['publishedDate']
                    except KeyError:
                        bookdate = '0000-00-00'

                    try:
                        bookimg = item['volumeInfo']['imageLinks']['thumbnail']
                    except KeyError:
                        bookimg = 'images/nocover.png'

                    try:
                        bookrate = item['volumeInfo']['averageRating']
                    except KeyError:
                        bookrate = 0

                    try:
                        bookpages = item['volumeInfo']['pageCount']
                    except KeyError:
                        bookpages = 0

                    try:
                        bookgenre = item['volumeInfo']['categories'][0]
                    except KeyError:
                        bookgenre = None

                    try:
                        bookdesc = item['volumeInfo']['description']
                    except KeyError:
                        bookdesc = None

                    try:
                        if item['volumeInfo']['industryIdentifiers'][0][
                                'type'] == 'ISBN_10':
                            bookisbn = item['volumeInfo'][
                                'industryIdentifiers'][0]['identifier']
                        else:
                            bookisbn = None
                    except KeyError:
                        bookisbn = None

                    bookid = item['id']
                    bookname = item['volumeInfo']['title']
                    booklink = item['volumeInfo']['canonicalVolumeLink']
                    bookrate = float(bookrate)

                    find_book_status = myDB.select(
                        "SELECT * FROM books WHERE BookID = '%s'" % bookid)
                    if find_book_status:
                        for resulted in find_book_status:
                            book_status = resulted['Status']
                    else:
                        book_status = "Skipped"

                    if not (re.match('[^\w-]', bookname)
                            ):  #remove books with bad caracters in title
                        if book_status != "Ignored":
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {
                                "AuthorName": authorname,
                                "AuthorID": authorid,
                                "AuthorLink": "",
                                "BookName": bookname,
                                "BookSub": booksub,
                                "BookDesc": bookdesc,
                                "BookIsbn": bookisbn,
                                "BookPub": bookpub,
                                "BookGenre": bookgenre,
                                "BookImg": bookimg,
                                "BookLink": booklink,
                                "BookRate": bookrate,
                                "BookPages": bookpages,
                                "BookDate": bookdate,
                                "BookLang": booklang,
                                "Status": book_status,
                                "BookAdded": formatter.today()
                            }
                            resultcount = resultcount + 1

                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            logger.debug(u"book found " + bookname + " " +
                                         bookdate)
                            if not find_book_status:
                                logger.info("[%s] Added book: %s" %
                                            (authorname, bookname))
                                added_count = added_count + 1
                            else:
                                updated_count = updated_count + 1
                                logger.info("[%s] Updated book: %s" %
                                            (authorname, bookname))
                        else:
                            book_ignore_count = book_ignore_count + 1
                    else:
                        removedResults = removedResults + 1

                    if startindex >= number_results:
                        break
                    else:
                        continue

        except KeyError:
            pass

        logger.info(
            '[%s] The Google Books API was hit %s times to populate book list'
            % (authorname, str(api_hits)))

        lastbook = myDB.action(
            "SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' AND Status != 'Ignored' order by BookDate DESC"
            % authorid).fetchone()
        unignoredbooks = myDB.select(
            "SELECT COUNT(BookName) as unignored FROM books WHERE AuthorID='%s' AND Status != 'Ignored'"
            % authorid)
        bookCount = myDB.select(
            "SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'"
            % authorid)

        controlValueDict = {"AuthorID": authorid}
        newValueDict = {
            "Status": "Active",
            "TotalBooks": bookCount[0]['counter'],
            "UnignoredBooks": unignoredbooks[0]['unignored'],
            "LastBook": lastbook['BookName'],
            "LastLink": lastbook['BookLink'],
            "LastDate": lastbook['BookDate']
        }
        myDB.upsert("authors", newValueDict, controlValueDict)

        logger.debug("Found %s total books for author" % total_count)
        logger.debug("Removed %s bad language results for author" % ignored)
        logger.debug("Removed %s bad character results for author" %
                     removedResults)
        logger.debug("Ignored %s books by author marked as Ignored" %
                     book_ignore_count)
        logger.debug("Imported/Updated %s books for author" % resultcount)

        if refresh:
            logger.info(
                "[%s] Book processing complete: Added %s books / Updated %s books"
                % (authorname, str(added_count), str(updated_count)))
        else:
            logger.info(
                "[%s] Book processing complete: Added %s books to the database"
                % (authorname, str(added_count)))
        return books_dict
def searchbook(books=None):

    # rename this thread
    threading.currentThread().name = "SEARCHBOOKS"
    myDB = database.DBConnection()
    searchlist = []
	#If user did not pass a book, then return all wanted books
    if books is None:
        searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"')
	#Otherwise return all books with matching ID
    else:
        searchbooks = []
        for book in books:
            searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']])
            for terms in searchbook:
                searchbooks.append(terms)

    for searchbook in searchbooks:
        bookid = searchbook[0]
        author = searchbook[1]
        book = searchbook[2]
	#Strip illegal chars
        dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':''}
	#Convert Author and Book to ASCII
        author = formatter.latinToAscii(formatter.replace_all(author, dic))
        book = formatter.latinToAscii(formatter.replace_all(book, dic))
	#Build Searchlist
        searchterm = author + ' ' + book
        searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8')
        searchlist.append({"bookid": bookid, "searchterm": searchterm})

    if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE:
        logger.info('No downloadmethod is set, use SABnzbd or blackhole')

    if not lazylibrarian.NEWZNAB:
        logger.info('No providers are set.')
	#Conatct all usenet providers and search
    for book in searchlist:
        resultlist = []
        if lazylibrarian.NEWZNAB and not resultlist:
            logger.info('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST)
            resultlist = providers.NewzNab(book)

        if lazylibrarian.NZBMATRIX and not resultlist:
            logger.info('Searching NZB at provider NZBMatrix ...')
            resultlist = providers.NZBMatrix(book)

        if not resultlist:
            logger.info("Search didn't have results. Adding book %s to queue." % book['searchterm'])

        else:
            for nzb in resultlist:
                bookid = nzb['bookid']
                nzbtitle = nzb['nzbtitle']
                nzburl = nzb['nzburl']
                nzbprov = nzb['nzbprov']

                controlValueDict = {"NZBurl": nzburl}
                newValueDict = {
                    "NZBprov": nzbprov,
                    "BookID": bookid,
                    "NZBdate": formatter.today(),
                    "NZBtitle": nzbtitle,
                    "Status": "Skipped"
                    }
                myDB.upsert("wanted", newValueDict, controlValueDict)

                snatchedbooks = myDB.action('SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone()
                if not snatchedbooks:
                    snatch = DownloadMethod(bookid, nzbprov, nzbtitle, nzburl)
                time.sleep(1)
Ejemplo n.º 55
0
    def get_author_books(self, authorid=None, authorname=None, refresh=False):

        api_hits = 0
        gr_lang_hits = 0
        lt_lang_hits = 0
        gb_lang_change = 0
        cache_hits = 0
        not_cached = 0
        URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(
            self.params)

        # Artist is loading
        myDB = database.DBConnection()
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": "Loading"}
        myDB.upsert("authors", newValueDict, controlValueDict)
        books_dict = []
        try:
            rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
        except Exception as e:
            logger.error("Error fetching author books: %s" % e)
            return books_dict
        if rootxml is None:
            logger.debug("Error requesting author books")
            return books_dict
        if not in_cache:
            api_hits = api_hits + 1
        resultxml = rootxml.getiterator('book')

        valid_langs = ([
            valid_lang.strip()
            for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')
        ])

        if not len(resultxml):
            logger.warn('[%s] No books found for author with ID: %s' %
                        (authorname, authorid))
        else:
            logger.debug("[%s] Now processing books with GoodReads API" %
                         authorname)

            resultsCount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            logger.debug(u"url " + URL)

            authorNameResult = rootxml.find('./author/name').text
            logger.debug(u"author name " + authorNameResult)
            loopCount = 1

            while resultxml is not None:
                for book in resultxml:
                    total_count = total_count + 1

                    if (book.find('publication_year').text is None):
                        pubyear = "0000"
                    else:
                        pubyear = book.find('publication_year').text

                    try:
                        bookimg = book.find('image_url').text
                        if ('nocover' in bookimg):
                            bookimg = 'images/nocover.png'
                    except (KeyError, AttributeError):
                        bookimg = 'images/nocover.png'

    # PAB this next section tries to get the book language using the isbn13 to look it up. If no isbn13 we skip the
    # book entirely, rather than including it with an "Unknown" language. Changed this so we can still include the book
    # with language set to "Unknown". There is a setting in config.ini to allow or skip books with "Unknown" language
    # if you really don't want to include them.
    # Not all GR books have isbn13 filled in, but all have a GR bookid, which we've already got, so use that.
    # Also, with GR API rules we can only call the API once per second, which slows us down a lot when all we want
    # is to get the language. We sleep for one second per book that GR knows about for each author you have in your
    # library. The libraryThing API has the same 1 second restriction, and is limited to 1000 hits per day, but has
    # fewer books with unknown language. To get around this and speed up the process, see if we already have a book
    # in the database with a similar start to the ISBN. The way ISBNs work, digits 3-5 of a 13 char ISBN or digits 0-2
    # of a 10 digit ISBN indicate the region/language so if two books have the same 3 digit isbn code, they _should_
    # be the same language.
    # I ran a simple python script on my library of 1500 books, and these codes were 100% correct on matching book
    # languages, no mis-matches. It did result in a small number of books with "unknown" language being wrongly matched
    # but most "unknown" were matched to the correct language.
    # We could look up ISBNs we already know about in the database, but this only holds books in the languages we want
    # to keep, which reduces the number of cache hits, so we create a new database table, holding ALL results including
    # the ISBNs for languages we don't want and books we reject.
    # The new table is created (if not exists) in init.py so by the time we get here there is an existing table.
    # If we haven't an already matching partial ISBN, look up language code from libraryThing
    # "http://www.librarything.com/api/thingLang.php?isbn=1234567890"
    # If you find a matching language, add it to the database.  If "unknown" or "invalid", try GR as maybe GR can
    # provide a match.
    # If both LT and GR return unknown, add isbn to db as "unknown". No point in repeatedly asking LT for a code
    # it's told you it doesn't know.
    # As an extra option, if language includes "All" in config.ini, we can skip this whole section and process
    # everything much faster by not querying for language at all.
    # It does mean we include a lot of unwanted foreign translations in the database, but it's _much_ faster.

                    bookLanguage = "Unknown"
                    find_field = "id"
                    isbn = ""
                    isbnhead = ""
                    if "All" not in valid_langs:  # do we care about language
                        if (book.find('isbn').text is not None):
                            find_field = "isbn"
                            isbn = book.find('isbn').text
                            isbnhead = isbn[0:3]
                        else:
                            if (book.find('isbn13').text is not None):
                                find_field = "isbn13"
                                isbn = book.find('isbn13').text
                                isbnhead = isbn[3:6]
                        if (find_field != 'id'):  # isbn or isbn13 found

                            match = myDB.action(
                                'SELECT lang FROM languages where isbn = "%s"'
                                % (isbnhead)).fetchone()
                            if (match):
                                bookLanguage = match['lang']
                                cache_hits = cache_hits + 1
                                logger.debug(
                                    "Found cached language [%s] for %s [%s]" %
                                    (bookLanguage, find_field, isbnhead))
                            else:
                                # no match in cache, try searching librarything for a language code using the isbn
                                # if no language found, librarything return value is "invalid" or "unknown"
                                # returns plain text, not xml
                                BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn
                                try:
                                    librarything_wait()
                                    resp = urllib2.urlopen(BOOK_URL,
                                                           timeout=30).read()
                                    lt_lang_hits = lt_lang_hits + 1
                                    logger.debug(
                                        "LibraryThing reports language [%s] for %s"
                                        % (resp, isbnhead))

                                    if ('invalid' in resp
                                            or 'Unknown' in resp):
                                        find_field = "id"  # reset the field to force search on goodreads
                                    else:
                                        bookLanguage = resp  # found a language code
                                        myDB.action(
                                            'insert into languages values ("%s", "%s")'
                                            % (isbnhead, bookLanguage))
                                        logger.debug(u"LT language %s: %s" %
                                                     (isbnhead, bookLanguage))
                                except Exception as e:
                                    logger.error(
                                        "Error finding LT language result for [%s], %s"
                                        % (isbn, e))
                                    find_field = "id"  # reset the field to search on goodreads

                        if (find_field == 'id'):
                            # [or bookLanguage == "Unknown"] no earlier match, we'll have to search the goodreads api
                            try:
                                if (book.find(find_field).text is not None):
                                    BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \
                                        book.find(find_field).text + '&' + urllib.urlencode(self.params)
                                    logger.debug(u"Book URL: " + BOOK_URL)

                                    try:
                                        time_now = int(time.time())
                                        if time_now <= lazylibrarian.LAST_GOODREADS:
                                            time.sleep(1)

                                        BOOK_rootxml, in_cache = get_xml_request(
                                            BOOK_URL)
                                        if BOOK_rootxml is None:
                                            logger.debug(
                                                'Error requesting book language code'
                                            )
                                            bookLanguage = ""
                                        else:
                                            if not in_cache:
                                                # only update last_goodreads if the result wasn't found in the cache
                                                lazylibrarian.LAST_GOODREADS = time_now
                                            bookLanguage = BOOK_rootxml.find(
                                                './book/language_code').text
                                    except Exception as e:
                                        logger.error(
                                            "Error finding book results: %s" %
                                            e)
                                    if not in_cache:
                                        gr_lang_hits = gr_lang_hits + 1
                                    if not bookLanguage:
                                        bookLanguage = "Unknown"

                                    if (isbnhead != ""):
                                        # GR didn't give an isbn so we can't cache it, just use language for this book
                                        myDB.action(
                                            'insert into languages values ("%s", "%s")'
                                            % (isbnhead, bookLanguage))
                                        logger.debug(
                                            "GoodReads reports language [%s] for %s"
                                            % (bookLanguage, isbnhead))
                                    else:
                                        not_cached = not_cached + 1

                                    logger.debug(u"GR language: " +
                                                 bookLanguage)
                                else:
                                    logger.debug(
                                        "No %s provided for [%s]" %
                                        (find_field, book.find('title').text))
                                    # continue

                            except Exception as e:
                                logger.debug(u"An error has occured: %s" % e)

                        if bookLanguage not in valid_langs:
                            logger.debug('Skipped a book with language %s' %
                                         bookLanguage)
                            ignored = ignored + 1
                            continue
                    bookname = book.find('title').text
                    bookid = book.find('id').text
                    bookdesc = book.find('description').text
                    bookisbn = book.find('isbn').text
                    bookpub = book.find('publisher').text
                    booklink = book.find('link').text
                    bookrate = float(book.find('average_rating').text)
                    bookpages = book.find('num_pages').text
                    bookname = unaccented(bookname)
                    if ': ' in bookname:
                        parts = bookname.split(': ', 1)
                        bookname = parts[0]
                        booksub = parts[1]
                    else:
                        booksub = ''
                    dic = {':': '', '"': '', '\'': ''}
                    bookname = replace_all(bookname, dic)
                    bookname = bookname.strip()  # strip whitespace
                    booksub = replace_all(booksub, dic)
                    booksub = booksub.strip()  # strip whitespace
                    if booksub:
                        series, seriesNum = bookSeries(booksub)
                    else:
                        series, seriesNum = bookSeries(bookname)

                    # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions)
                    # and sometimes uses the same bookid if the book is the same but the title is slightly different
                    # We use bookid, then reject if another author/title has a different bookid so we just keep one...
                    find_book_status = myDB.select(
                        'SELECT * FROM books WHERE BookID = "%s"' % bookid)
                    if find_book_status:
                        for resulted in find_book_status:
                            book_status = resulted['Status']
                            locked = resulted['Manual']
                    else:
                        book_status = lazylibrarian.NEWBOOK_STATUS
                        locked = False

                    rejected = False

                    if re.match('[^\w-]', bookname
                                ):  # reject books with bad characters in title
                        logger.debug(u"removed result [" + bookname +
                                     "] for bad characters")
                        removedResults = removedResults + 1
                        rejected = True

                    if not rejected and not bookname:
                        logger.debug(
                            'Rejecting bookid %s for %s, no bookname' %
                            (bookid, authorNameResult))
                        removedResults = removedResults + 1
                        rejected = True

                    if not rejected:
                        find_books = myDB.select(
                            'SELECT * FROM books WHERE BookName = "%s" and AuthorName = "%s"'
                            % (bookname, authorNameResult))
                        if find_books:
                            for find_book in find_books:
                                if find_book['BookID'] != bookid:
                                    # we have a book with this author/title already
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got %s'
                                        % (find_book['BookID'],
                                           authorNameResult, bookname, bookid))
                                    duplicates = duplicates + 1
                                    rejected = True
                                    break

                    if not rejected:
                        find_books = myDB.select(
                            'SELECT * FROM books WHERE BookID = "%s"' % bookid)
                        if find_books:
                            # we have a book with this bookid already
                            logger.debug(
                                'Rejecting bookid %s for [%s][%s] already got this bookid in database'
                                % (bookid, authorNameResult, bookname))
                            duplicates = duplicates + 1
                            rejected = True
                            break

                    if not rejected:
                        if book_status != "Ignored":
                            if not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorName": authorNameResult,
                                    "AuthorID": authorid,
                                    "AuthorLink": None,
                                    "BookName": bookname,
                                    "BookSub": booksub,
                                    "BookDesc": bookdesc,
                                    "BookIsbn": bookisbn,
                                    "BookPub": bookpub,
                                    "BookGenre": None,
                                    "BookImg": bookimg,
                                    "BookLink": booklink,
                                    "BookRate": bookrate,
                                    "BookPages": bookpages,
                                    "BookDate": pubyear,
                                    "BookLang": bookLanguage,
                                    "Status": book_status,
                                    "BookAdded": today(),
                                    "Series": series,
                                    "SeriesNum": seriesNum
                                }

                                resultsCount = resultsCount + 1

                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                                logger.debug(u"Book found: " +
                                             book.find('title').text + " " +
                                             pubyear)

                            if 'nocover' in bookimg or 'nophoto' in bookimg:
                                # try to get a cover from librarything
                                workcover = getBookCover(bookid)
                                if workcover:
                                    logger.debug(
                                        u'Updated cover for %s to %s' %
                                        (bookname, workcover))
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": workcover}
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)

                            elif bookimg and bookimg.startswith('http'):
                                link = cache_cover(bookid, bookimg)
                                if link is not None:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": link}
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)

                            if seriesNum == None:
                                # try to get series info from librarything
                                series, seriesNum = getWorkSeries(bookid)
                                if seriesNum:
                                    logger.debug(u'Updated series: %s [%s]' %
                                                 (series, seriesNum))
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {
                                        "Series": series,
                                        "SeriesNum": seriesNum
                                    }
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)

                            worklink = getWorkPage(bookid)
                            if worklink:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {"WorkPage": worklink}
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)

                            if not find_book_status:
                                logger.debug(u"[%s] Added book: %s" %
                                             (authorname, bookname))
                                added_count = added_count + 1
                            else:
                                logger.debug(u"[%s] Updated book: %s" %
                                             (authorname, bookname))
                                updated_count = updated_count + 1
                        else:
                            book_ignore_count = book_ignore_count + 1

                loopCount = loopCount + 1
                URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \
                      urllib.urlencode(self.params) + '&page=' + str(loopCount)
                resultxml = None
                try:
                    rootxml, in_cache = get_xml_request(URL,
                                                        useCache=not refresh)
                    if rootxml is None:
                        logger.debug('Error requesting next page of results')
                    else:
                        resultxml = rootxml.getiterator('book')
                        if not in_cache:
                            api_hits = api_hits + 1
                except Exception as e:
                    resultxml = None
                    logger.error("Error finding next page of results: %s" % e)

                if resultxml is not None:
                    if all(False for book in
                           resultxml):  # returns True if iterator is empty
                        resultxml = None

        lastbook = myDB.action(
            'SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \
                                AND Status != "Ignored" order by BookDate DESC'
            % authorid).fetchone()
        if lastbook:
            lastbookname = lastbook['BookName']
            lastbooklink = lastbook['BookLink']
            lastbookdate = lastbook['BookDate']
        else:
            lastbookname = None
            lastbooklink = None
            lastbookdate = None

        controlValueDict = {"AuthorID": authorid}
        newValueDict = {
            "Status": "Active",
            "LastBook": lastbookname,
            "LastLink": lastbooklink,
            "LastDate": lastbookdate
        }
        myDB.upsert("authors", newValueDict, controlValueDict)

        # This is here because GoodReads sometimes has several entries with the same BookID!
        modified_count = added_count + updated_count

        logger.debug("Found %s total book%s for author" %
                     (total_count, plural(total_count)))
        logger.debug("Removed %s bad language result%s for author" %
                     (ignored, plural(ignored)))
        logger.debug(
            "Removed %s bad character or no-name result%s for author" %
            (removedResults, plural(removedResults)))
        logger.debug("Removed %s duplicate result%s for author" %
                     (duplicates, plural(duplicates)))
        logger.debug("Ignored %s book%s by author marked as Ignored" %
                     (book_ignore_count, plural(book_ignore_count)))
        logger.debug("Imported/Updated %s book%s for author" %
                     (modified_count, plural(modified_count)))

        myDB.action(
            'insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)'
            %
            (authorname, api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
             cache_hits, ignored, removedResults, not_cached, duplicates))

        if refresh:
            logger.info(
                "[%s] Book processing complete: Added %s book%s / Updated %s book%s"
                % (authorname, added_count, plural(added_count), updated_count,
                   plural(updated_count)))
        else:
            logger.info(
                "[%s] Book processing complete: Added %s book%s to the database"
                % (authorname, added_count, plural(added_count)))

        return books_dict
Ejemplo n.º 56
0
								"AuthorID":     authorid,
								"AuthorLink":   None,
								"BookName":     bookname,
								"BookSub":      None,
								"BookDesc":     bookdesc,
								"BookIsbn":     bookisbn,
								"BookPub":      bookpub,
								"BookGenre":    None,
								"BookImg":      bookimg,
								"BookLink":     booklink,
								"BookRate":     bookrate,
								"BookPages":    bookpages,
								"BookDate":     pubyear,
								"BookLang":     bookLanguage,
								"Status":       book_status,
								"BookAdded":    formatter.today(),
                                                                "Series":       series,
                                                                "SeriesOrder":  seriesOrder
							}

							resultsCount = resultsCount + 1

							myDB.upsert("books", newValueDict, controlValueDict)
							logger.debug(u"book found " + book.find('title').text + " " + pubyear)
							if not find_book_status:
								logger.info("[%s] Added book: %s" % (authorname, bookname))
								added_count = added_count + 1
							else:
								logger.info("[%s] Updated book: %s" % (authorname, bookname))
								updated_count = updated_count + 1
						else:
Ejemplo n.º 57
0
    def find_book(self, bookid=None, queue=None):
        myDB = database.DBConnection()

        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params)

        try:
            rootxml, in_cache = get_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting book")
                return
        except Exception as e:
            logger.error("Error finding book: %s" % e)
            return

        bookLanguage = rootxml.find('./book/language_code').text
        bookname = rootxml.find('./book/title').text

        if not bookLanguage:
            bookLanguage = "Unknown"
#
# PAB user has said they want this book, don't block for bad language, just warn
#
        valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])
        if bookLanguage not in valid_langs:
            logger.debug('Book %s language does not match preference' % bookname)

        if (rootxml.find('./book/publication_year').text is None):
            bookdate = "0000"
        else:
            bookdate = rootxml.find('./book/publication_year').text

        try:
            bookimg = rootxml.find('./book/img_url').text
            if 'assets/nocover' in bookimg:
                bookimg = 'images/nocover.png'
        except (KeyError, AttributeError):
            bookimg = 'images/nocover.png'

        authorname = rootxml.find('./book/authors/author/name').text
        bookdesc = rootxml.find('./book/description').text
        bookisbn = rootxml.find('./book/isbn').text
        bookpub = rootxml.find('./book/publisher').text
        booklink = rootxml.find('./book/link').text
        bookrate = float(rootxml.find('./book/average_rating').text)
        bookpages = rootxml.find('.book/num_pages').text

        name = authorname
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        booksub = ''
        bookname = unaccented(bookname)
        if ': ' in bookname:
            parts = bookname.split(': ', 1)
            bookname = parts[0]
            booksub = parts[1]

        dic = {':': '', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic)
        bookname = bookname.strip()  # strip whitespace
        booksub = replace_all(booksub, dic)
        booksub = booksub.strip()  # strip whitespace
        if booksub:
           series,seriesNum = bookSeries(booksub)
        else:
           series,seriesNum = bookSeries(bookname)

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": None,
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": None,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": bookLanguage,
            "Status": "Wanted",
            "BookAdded": today(),
            "Series": series,
            "SeriesNum": seriesNum
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.debug("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

        elif bookimg and bookimg.startswith('http'):
            link = cache_cover(bookid, bookimg)
            if link is not None:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": link}
                myDB.upsert("books", newValueDict, controlValueDict)

        if seriesNum == None:
            #  try to get series info from librarything
            series, seriesNum = getWorkSeries(bookid)
            if seriesNum:
                logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))
                controlValueDict = {"BookID": bookid}
                newValueDict = {
                    "Series": series,
                    "SeriesNum": seriesNum
                }
                myDB.upsert("books", newValueDict, controlValueDict)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Ejemplo n.º 58
0
    def find_book(self, bookid=None, queue=None):
        threading.currentThread().name = "GB-ADD-BOOK"
        myDB = database.DBConnection()

        URL = 'https://www.googleapis.com/books/v1/volumes/' + str(bookid) + "?key="+lazylibrarian.GB_API
        jsonresults = json.JSONDecoder().decode(urllib2.urlopen(URL, timeout=30).read())

#  Darkie67:
#        replacing German Umlauts and filtering out ":"
#
        booknamealt = jsonresults['volumeInfo']['title']
        booknametmp1=booknamealt.replace(u'\xf6',u'oe')
        booknametmp2=booknametmp1.replace(u'\xe4',u'ae')
        booknametmp3=booknametmp2.replace(u'\xdf',u'ss')
        booknametmp4=booknametmp3.replace(u'\xc4',u'Ae')
        booknametmp5=booknametmp4.replace(u'\xdc',u'Ue')
        booknametmp6=booknametmp5.replace(u'\xd6',u'Oe')
        booknametmp7=booknametmp6.replace(':','')
        bookname=booknametmp7.replace(u'\xfc',u'ue')
# Darkie67 end        
        
        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug('Book %s does not contain author field' % bookname)

        try:
            #skip if language is in ignore list
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])
            if booklang not in valid_langs:
                logger.debug('Book %s language does not match preference' % bookname)
        except KeyError:
            logger.debug('Book does not have language field')

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = None

        try:
            booksub = jsonresults['volumeInfo']['subtitle']
        except KeyError:
            booksub = None

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = None

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = None

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0]['identifier']
            else:
                bookisbn = None
        except KeyError:
            bookisbn = None

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        name = jsonresults['volumeInfo']['authors'][0]
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName":   authorname,
            "AuthorID":     AuthorID,
            "AuthorLink":   "",
            "BookName":     bookname,
            "BookSub":      booksub,
            "BookDesc":     bookdesc,
            "BookIsbn":     bookisbn,
            "BookPub":      bookpub,
            "BookGenre":    bookgenre,
            "BookImg":      bookimg,
            "BookLink":     booklink,
            "BookRate":     bookrate,
            "BookPages":    bookpages,
            "BookDate":     bookdate,
            "BookLang":     booklang,
            "Status":       "Wanted",
            "BookAdded":    formatter.today()
            }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)