Ejemplo n.º 1
0
def setBookAuthors(book):
    myDB = database.DBConnection()
    newauthors = 0
    newrefs = 0
    try:
        authorlist = getBookAuthors(book['bookid'])
        for author in authorlist:
            authtype = author['type']
            if authtype in ['primary author', 'main author', 'secondary author']:
                if author['role'] in ['Author', '—'] and author['work'] == 'all editions':
                    name = formatAuthorName(unaccented(author['name']))
                    exists = myDB.match('select authorid from authors where authorname=?', (name,))
                    if exists:
                        authorid = exists['authorid']
                    else:
                        # try to add new author to database by name
                        name, authorid, new = lazylibrarian.importer.addAuthorNameToDB(name, False, False)
                        if new and authorid:
                            newauthors += 1
                    if authorid:
                        # suppress duplicates in bookauthors
                        myDB.action('INSERT into bookauthors (AuthorID, BookID) VALUES (?, ?)',
                                    (authorid, book['bookid']), suppress='UNIQUE')
                        newrefs += 1
    except Exception as e:
        logger.debug("Error parsing authorlist for %s: %s" % (book['bookname'], str(e)))
    return newauthors, newrefs
Ejemplo n.º 2
0
def setBookAuthors(book):
    myDB = database.DBConnection()
    newauthors = 0
    newrefs = 0
    try:
        authorlist = getBookAuthors(book['bookid'])
        for author in authorlist:
            role = ''
            if 'id' in author:
                # it's a goodreads data source
                authorname = author['name']
                exists = myDB.match(
                    'select authorid from authors where authorid=?',
                    (author['id'], ))
                if 'role' in author:
                    role = author['role']
            else:
                # it's a librarything data source
                authorname = formatAuthorName(author['name'])
                exists = myDB.match(
                    'select authorid from authors where authorname=?',
                    (authorname, ))
                if 'type' in author:
                    authtype = author['type']
                    if authtype in [
                            'primary author', 'main author', 'secondary author'
                    ]:
                        role = authtype
                    elif author['role'] in [
                            'Author', '—'
                    ] and author['work'] == 'all editions':
                        role = 'Author'
            if exists:
                authorid = exists['authorid']
            else:
                # try to add new author to database by name
                authorname, authorid, new = lazylibrarian.importer.addAuthorNameToDB(
                    authorname, False, False)
                if new and authorid:
                    newauthors += 1
            if authorid:
                myDB.action(
                    'INSERT into bookauthors (AuthorID, BookID, Role) VALUES (?, ?, ?)',
                    (authorid, book['bookid'], role),
                    suppress='UNIQUE')
                newrefs += 1
    except Exception as e:
        logger.error("Error parsing authorlist for %s: %s %s" %
                     (book['bookname'], type(e).__name__, str(e)))
    return newauthors, newrefs
Ejemplo n.º 3
0
    def _findAuthor(self, **kwargs):
        if 'name' not in kwargs:
            self.data = 'Missing parameter: name'
            return

        authorname = formatAuthorName(kwargs['name'])
        if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
            GB = GoogleBooks(authorname)
            queue = Queue.Queue()
            search_api = threading.Thread(target=GB.find_results, name='API-GBRESULTS', args=[authorname, queue])
            search_api.start()
        else:  # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
            GR = GoodReads(authorname)
            queue = Queue.Queue()
            search_api = threading.Thread(target=GR.find_results, name='API-GRRESULTS', args=[authorname, queue])
            search_api.start()

        search_api.join()
        self.data = queue.get()
Ejemplo n.º 4
0
def setBookAuthors(book):
    myDB = database.DBConnection()
    newauthors = 0
    newrefs = 0
    try:
        authorlist = getBookAuthors(book['bookid'])
        for author in authorlist:
            role = ''
            if 'id' in author:
                # it's a goodreads data source
                authorname = author['name']
                exists = myDB.match('select authorid from authors where authorid=?', (author['id'],))
                if 'role' in author:
                    role = author['role']
            else:
                # it's a librarything data source
                authorname = formatAuthorName(author['name'])
                exists = myDB.match('select authorid from authors where authorname=?', (authorname,))
                if 'type' in author:
                    authtype = author['type']
                    if authtype in ['primary author', 'main author', 'secondary author']:
                        role = authtype
                    elif author['role'] in ['Author', '—'] and author['work'] == 'all editions':
                        role = 'Author'
            if exists:
                authorid = exists['authorid']
            else:
                # try to add new author to database by name
                authorname, authorid, new = lazylibrarian.importer.addAuthorNameToDB(authorname, False, False)
                if new and authorid:
                    newauthors += 1
            if authorid:
                myDB.action('INSERT into bookauthors (AuthorID, BookID, Role) VALUES (?, ?, ?)',
                            (authorid, book['bookid'], role), suppress='UNIQUE')
                newrefs += 1
    except Exception as e:
        logger.error("Error parsing authorlist for %s: %s %s" % (book['bookname'], type(e).__name__, str(e)))
    return newauthors, newrefs
Ejemplo n.º 5
0
    def find_author_id(self, refresh=False):
        author = self.name
        author = formatAuthorName(unaccented(author))
        URL = 'https://www.goodreads.com/api/author_url/' + urllib.quote(author) + '?' + urllib.urlencode(self.params)

        # googlebooks gives us author names with long form unicode characters
        if isinstance(author, str) and hasattr(author, "decode"):
            author = author.decode('utf-8')  # make unicode
        author = unicodedata.normalize('NFC', author)  # normalize to short form

        logger.debug("Searching for author with name: %s" % author)

        authorlist = []
        try:
            rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
        except Exception as e:
            logger.error("%s finding authorid: %s, %s" % (type(e).__name__, URL, str(e)))
            return authorlist
        if rootxml is None:
            logger.debug("Error requesting authorid")
            return authorlist

        resultxml = rootxml.getiterator('author')

        if resultxml is None:
            logger.warn('No authors found with name: %s' % author)
        else:
            # In spite of how this looks, goodreads only returns one result, even if there are multiple matches
            # we just have to hope we get the right one. eg search for "James Lovelock" returns "James E. Lovelock"
            # who only has one book listed under googlebooks, the rest are under "James Lovelock"
            # goodreads has all his books under "James E. Lovelock". Can't come up with a good solution yet.
            # For now we'll have to let the user handle this by selecting/adding the author manually
            for author in resultxml:
                authorid = author.attrib.get("id")
                authorlist = self.get_author_info(authorid)
        return authorlist
Ejemplo n.º 6
0
def search_wishlist():
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "SEARCHWISHLIST"

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
            scheduleJob(action='Stop', target='search_wishlist')
            return  # No point in continuing

        # for each item in resultlist, add to database if necessary, and mark as wanted
        logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist))))
        for book in resultlist:
            # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
            # we can just use bookid if goodreads, or try isbn and name matching on author/title if not
            # eg NYTimes wishlist
            if 'E' in book['types']:
                ebook_status = "Wanted"
            else:
                ebook_status = "Skipped"
            if 'A' in book['types']:
                audio_status = "Wanted"
            else:
                audio_status = "Skipped"
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book['rss_bookid']:
                cmd = 'select books.Status as Status,AudioStatus,authors.Status as AuthorStatus,'
                cmd += 'AuthorName,BookName,Requester,AudioRequester from books,authors '
                cmd += 'where books.AuthorID = authors.AuthorID and bookid=?'
                bookmatch = myDB.match(cmd, (book['rss_bookid'],))
                if bookmatch:
                    cmd = 'SELECT SeriesName,Status from series,member '
                    cmd += 'where series.SeriesID=member.SeriesID and member.BookID=?'
                    series = myDB.select(cmd, (book['rss_bookid'],))
                    reject_series = None
                    for ser in series:
                        if ser['Status'] in ['Paused', 'Ignored']:
                            reject_series = {"Name": ser['SeriesName'], "Status": ser['Status']}
                            break
                    bookname = bookmatch['BookName']
                    if bookmatch['Status'] in ['Open', 'Wanted', 'Have']:
                        logger.info('Found book %s, already marked %s' % (bookname, bookmatch['Status']))
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif bookmatch['AuthorStatus'] in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is %s' % (bookname, bookmatch['AuthorStatus']))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif ebook_status == "Wanted":  # skipped/ignored
                        logger.info('Found book %s, marking as "Wanted"' % bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']:
                        logger.info('Found audiobook %s, already marked %s' % (bookname, bookmatch['AudioStatus']))
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif bookmatch['AuthorStatus'] in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is %s' % (bookname, bookmatch['AuthorStatus']))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif audio_status == "Wanted":  # skipped/ignored
                        logger.info('Found audiobook %s, marking as "Wanted"' % bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"AudioStatus": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    import_book(book['rss_bookid'], ebook_status, audio_status)
                    new_books += 1
                    newValueDict = {"Requester": book["dispname"] + ' '}
                    controlValueDict = {"BookID": book['rss_bookid']}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    newValueDict = {"AudioRequester": book["dispname"] + ' '}
                    controlValueDict = {"BookID": book['rss_bookid']}
                    myDB.upsert("books", newValueDict, controlValueDict)
            else:
                item = {}
                results = None
                item['Title'] = book['rss_title']
                if book['rss_bookid']:
                    item['BookID'] = book['rss_bookid']
                if book['rss_isbn']:
                    item['ISBN'] = book['rss_isbn']
                bookmatch = finditem(item, book['rss_author'])
                if bookmatch:  # it's already in the database
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    auth_res = myDB.match('SELECT Status from authors WHERE authorname=?', (authorname,))
                    if auth_res:
                        auth_status = auth_res['Status']
                    else:
                        auth_status = 'Unknown'
                    cmd = 'SELECT SeriesName,Status from series,member '
                    cmd += 'where series.SeriesID=member.SeriesID and member.BookID=?'
                    series = myDB.select(cmd, (book['rss_bookid'],))
                    reject_series = None
                    for ser in series:
                        if ser['Status'] in ['Paused', 'Ignored']:
                            reject_series = {"Name": ser['SeriesName'], "Status": ser['Status']}
                            break
                    if bookmatch['Status'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookmatch['Status']))
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif auth_status in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is "%s"' % (bookname, auth_status))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif ebook_status == 'Wanted':  # skipped/ignored
                        logger.info('Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found audiobook %s by %s, already marked as "%s"' %
                            (bookname, authorname, bookmatch['AudioStatus']))
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif auth_status in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is "%s"' % (bookname, auth_status))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif audio_status == 'Wanted':  # skipped/ignored
                        logger.info('Found audiobook %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"AudioStatus": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                else:  # not in database yet
                    if book['rss_isbn']:
                        results = search_for(book['rss_isbn'])
                    if results:
                        result = results[0]  # type: dict
                        if result['isbn_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info("Found (%s%%) %s: %s" %
                                        (result['isbn_fuzz'], result['authorname'], result['bookname']))
                            import_book(result['bookid'], ebook_status, audio_status)
                            new_books += 1
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": result['bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            bookmatch = True
                    if not results:
                        searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName(book['rss_author']))
                        results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]  # type: dict
                        if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                      result['authorname'], result['bookname']))
                            import_book(result['bookid'], ebook_status, audio_status)
                            new_books += 1
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": result['bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'], book['rss_author'])
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            result = results[0]  # type: dict
                            msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                        result['authorname'], result['bookname'])
                        logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books)))

    except Exception:
        logger.error('Unhandled exception in search_wishlist: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Ejemplo n.º 7
0
def search_rss_book(books=None, library=None):
    """
    books is a list of new books to add, or None for backlog search
    library is "eBook" or "AudioBook" or None to search all book types
    """
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if books is None:
                threading.currentThread().name = "SEARCHALLRSS"
            else:
                threading.currentThread().name = "SEARCHRSS"

        if not (lazylibrarian.USE_RSS()):
            logger.warn('RSS search is disabled')
            scheduleJob(action='Stop', target='search_rss_book')
            return

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
        else:
            # for each item in resultlist, add to database if necessary, and mark as wanted
            logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist))))
            for book in resultlist:
                # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
                # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks
                # not sure if anyone would use a goodreads wishlist if not using goodreads interface...
                if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book['rss_bookid']:
                    bookmatch = myDB.match('select Status,BookName from books where bookid=?', (book['rss_bookid'],))
                    if bookmatch:
                        bookstatus = bookmatch['Status']
                        bookname = bookmatch['BookName']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            logger.info(u'Found book %s, already marked as "%s"' % (bookname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(u'Found book %s, marking as "Wanted"' % bookname)
                            controlValueDict = {"BookID": book['rss_bookid']}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            new_books += 1
                    else:
                        import_book(book['rss_bookid'])
                        new_books += 1
                else:
                    item = {}
                    headers = []
                    item['Title'] = book['rss_title']
                    if book['rss_bookid']:
                        item['BookID'] = book['rss_bookid']
                        headers.append('BookID')
                    if book['rss_isbn']:
                        item['ISBN'] = book['rss_isbn']
                        headers.append('ISBN')
                    bookmatch = finditem(item, book['rss_author'], headers)
                    if bookmatch:  # it's already in the database
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        bookstatus = bookmatch['Status']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            logger.info(
                                u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            new_books += 1
                    else:  # not in database yet
                        results = ''
                        if book['rss_isbn']:
                            results = search_for(book['rss_isbn'])
                        if results:
                            result = results[0]  # type: dict
                            if result['isbn_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                                logger.info("Found (%s%%) %s: %s" %
                                            (result['isbn_fuzz'], result['authorname'], result['bookname']))
                                import_book(result['bookid'])
                                new_books += 1
                                bookmatch = True
                        if not results:
                            searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName(book['rss_author']))
                            results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]  # type: dict
                            if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                    and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                                logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                          result['authorname'], result['bookname']))
                                import_book(result['bookid'])
                                new_books += 1
                                bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'], book['rss_author'])
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                        result['authorname'], result['bookname'])
                            logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books)))

        searchbooks = []
        if books is None:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus '
            cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") '
            cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc'
            results = myDB.select(cmd)
            for terms in results:
                searchbooks.append(terms)
        else:
            # The user has added a new book
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus '
                cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID'
                results = myDB.select(cmd, (book['bookid'],))
                for terms in results:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            logger.debug("SearchRSS - No books to search for")
            return

        resultlist, nproviders = IterateOverRSSSites()
        if not nproviders and not wishproviders:
            logger.warn('No rss providers are available')
            return  # No point in continuing

        logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks))))

        searchlist = []
        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            if library is None or library == 'eBook':
                if searchbook['Status'] == "Wanted":
                    searchlist.append(
                        {"bookid": searchbook['BookID'],
                         "bookName": searchbook['BookName'],
                         "bookSub": searchbook['BookSub'],
                         "authorName": searchbook['AuthorName'],
                         "library": "eBook",
                         "searchterm": searchterm})

            if library is None or library == 'AudioBook':
                if searchbook['AudioStatus'] == "Wanted":
                    searchlist.append(
                        {"bookid": searchbook['BookID'],
                         "bookName": searchbook['BookName'],
                         "bookSub": searchbook['BookSub'],
                         "authorName": searchbook['AuthorName'],
                         "library": "AudioBook",
                         "searchterm": searchterm})

        rss_count = 0
        for book in searchlist:
            if book['library'] == 'AudioBook':
                searchtype = 'audio'
            else:
                searchtype = 'book'
            found = processResultList(resultlist, book, searchtype, 'rss')

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:  # anything to shorten?
                searchtype = 'short' + searchtype
                found = processResultList(resultlist, book, searchtype, 'rss')

            if not found:
                logger.info("NZB Searches for %s %s returned no results." % (book['library'], book['searchterm']))
            if found > True:
                rss_count += 1

        logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count)))

    except Exception:
        logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Ejemplo n.º 8
0
def import_CSV(search_dir=None, library='eBook'):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
        Optionally delete the file on successful completion
    """
    # noinspection PyBroadException
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg

        csvFile = csv_file(search_dir, library=library)

        headers = None

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        total = 0
        existing = 0

        if not csvFile:
            msg = "No %s CSV file found in %s" % (library, search_dir)
            logger.warn(msg)
            return msg
        else:
            logger.debug('Reading file %s' % csvFile)
            csvreader = reader(open(csvFile, 'rU'))
            for row in csvreader:
                if csvreader.line_num == 1:
                    # If we are on the first line, create the headers list from the first row
                    headers = row
                    if 'Author' not in headers or 'Title' not in headers:
                        msg = 'Invalid CSV file found %s' % csvFile
                        logger.warn(msg)
                        return msg
                else:
                    total += 1
                    item = dict(list(zip(headers, row)))
                    authorname = formatAuthorName(item['Author'])
                    title = makeUnicode(item['Title'])

                    authmatch = myDB.match('SELECT * FROM authors where AuthorName=?', (authorname,))

                    if authmatch:
                        logger.debug("CSV: Author %s found in database" % authorname)
                    else:
                        logger.debug("CSV: Author %s not found" % authorname)
                        newauthor, authorid, new = addAuthorNameToDB(author=authorname,
                                                                     addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS'])
                        if len(newauthor) and newauthor != authorname:
                            logger.debug("Preferred authorname changed from [%s] to [%s]" % (authorname, newauthor))
                            authorname = newauthor
                        if new:
                            authcount += 1

                    bookmatch = finditem(item, authorname, library=library)
                    result = ''
                    imported = ''
                    if bookmatch:
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        if library == 'eBook':
                            bookstatus = bookmatch['Status']
                        else:
                            bookstatus = bookmatch['AudioStatus']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            existing += 1
                            logger.info('Found %s %s by %s, already marked as "%s"' %
                                        (library, bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                            logger.info('Found %s %s by %s, marking as "Wanted"' % (library, bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            if library == 'eBook':
                                newValueDict = {"Status": "Wanted"}
                            else:
                                newValueDict = {"AudioStatus": "Wanted"}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            bookcount += 1
                    else:
                        searchterm = "%s <ll> %s" % (title, authorname)
                        results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]
                            if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \
                                    and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']:
                                bookmatch = True
                        if not bookmatch:  # no match on full searchterm, try splitting out subtitle
                            newtitle, _ = split_title(authorname, title)
                            if newtitle != title:
                                title = newtitle
                                searchterm = "%s <ll> %s" % (title, authorname)
                                results = search_for(unaccented(searchterm))
                                if results:
                                    result = results[0]
                                    if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \
                                            and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']:
                                        bookmatch = True
                        if bookmatch:
                            logger.info("Found (%s%% %s%%) %s: %s for %s: %s" %
                                        (result['author_fuzz'], result['book_fuzz'],
                                         result['authorname'], result['bookname'],
                                         authorname, title))
                            if library == 'eBook':
                                import_book(result['bookid'], ebook="Wanted", wait=True)
                            else:
                                import_book(result['bookid'], audio="Wanted", wait=True)
                            imported = myDB.match('select * from books where BookID=?', (result['bookid'],))
                            if imported:
                                bookcount += 1
                            else:
                                bookmatch = False

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (title, authorname)
                        if not result:
                            msg += ', No results found'
                            logger.warn(msg)
                        elif not imported:
                            msg += ', Failed to import %s' % result['bookid']
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                        result['authorname'], result['bookname'])
                            logger.warn(msg)
                        skipcount += 1

            msg = "Found %i %s%s in csv file, %i already existing or wanted" % (total, library,
                                                                                plural(total), existing)
            logger.info(msg)
            msg = "Added %i new author%s, marked %i %s%s as 'Wanted', %i %s%s not found" % \
                  (authcount, plural(authcount), bookcount, library, plural(bookcount),
                   skipcount, plural(skipcount), library)
            logger.info(msg)
            if lazylibrarian.CONFIG['DELETE_CSV']:
                if skipcount == 0:
                    logger.info("Deleting %s on successful completion" % csvFile)
                    try:
                        os.remove(csvFile)
                    except OSError as why:
                        logger.warn('Unable to delete %s: %s' % (csvFile, why.strerror))
                else:
                    logger.warn("Not deleting %s as not all books found" % csvFile)
                    if os.path.isdir(csvFile + '.fail'):
                        try:
                            shutil.rmtree(csvFile + '.fail')
                        except Exception as why:
                            logger.warn("Unable to remove %s, %s %s" % (csvFile + '.fail',
                                                                        type(why).__name__, str(why)))
                    try:
                        _ = safe_move(csvFile, csvFile + '.fail')
                    except Exception as e:
                        logger.error("Unable to rename %s, %s %s" %
                                     (csvFile, type(e).__name__, str(e)))
                        if not os.access(csvFile, os.R_OK):
                            logger.error("%s is not readable" % csvFile)
                        if not os.access(csvFile, os.W_OK):
                            logger.error("%s is not writeable" % csvFile)
                        parent = os.path.dirname(csvFile)
                        try:
                            with open(os.path.join(parent, 'll_temp'), 'w') as f:
                                f.write('test')
                            os.remove(os.path.join(parent, 'll_temp'))
                        except Exception as why:
                            logger.error("Directory %s is not writeable: %s" % (parent, why))
            return msg
    except Exception:
        msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg
Ejemplo n.º 9
0
def addAuthorNameToDB(author=None, refresh=False, addbooks=True):
    # get authors name in a consistent format, look them up in the database
    # if not in database, try to import them.
    # return authorname,new where new=False if author already in db, new=True if added
    # authorname returned is our preferred name, or empty string if not found or unable to add

    new = False
    if not author or len(author) < 2:
        logger.debug('Invalid Author Name [%s]' % author)
        return "", "", False

    author = formatAuthorName(author)
    myDB = database.DBConnection()
    # Check if the author exists, and import the author if not,
    check_exist_author = myDB.match(
        'SELECT AuthorID FROM authors where AuthorName=?',
        (author.replace('"', '""'), ))

    if not check_exist_author and lazylibrarian.CONFIG['ADD_AUTHOR']:
        logger.debug('Author %s not found in database, trying to add' % author)
        # no match for supplied author, but we're allowed to add new ones
        GR = GoodReads(author)
        try:
            author_gr = GR.find_author_id()
        except Exception as e:
            logger.warn("%s finding author id for [%s] %s" %
                        (type(e).__name__, author, str(e)))
            return "", "", False

        # only try to add if GR data matches found author data
        if author_gr:
            authorname = author_gr['authorname']
            # authorid = author_gr['authorid']
            # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
            match_auth = author.replace('.', ' ')
            match_auth = ' '.join(match_auth.split())

            match_name = authorname.replace('.', ' ')
            match_name = ' '.join(match_name.split())

            match_name = unaccented(match_name)
            match_auth = unaccented(match_auth)

            # allow a degree of fuzziness to cater for different accented character handling.
            # some author names have accents,
            # filename may have the accented or un-accented version of the character
            # The currently non-configurable value of fuzziness might need to go in config
            # We stored GoodReads unmodified author name in
            # author_gr, so store in LL db under that
            # fuzz.ratio doesn't lowercase for us
            match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
            if match_fuzz < 90:
                logger.debug(
                    "Failed to match author [%s] to authorname [%s] fuzz [%d]"
                    % (author, match_name, match_fuzz))

            # To save loading hundreds of books by unknown authors at GR or GB, ignore unknown
            if (author != "Unknown") and (match_fuzz >= 90):
                # use "intact" name for author that we stored in
                # GR author_dict, not one of the various mangled versions
                # otherwise the books appear to be by a different author!
                author = author_gr['authorname']
                authorid = author_gr['authorid']
                # this new authorname may already be in the
                # database, so check again
                check_exist_author = myDB.match(
                    'SELECT AuthorID FROM authors where AuthorID=?',
                    (authorid, ))
                if check_exist_author:
                    logger.debug('Found goodreads authorname %s in database' %
                                 author)
                else:
                    logger.info("Adding new author [%s]" % author)
                    try:
                        addAuthorToDB(authorname=author,
                                      refresh=refresh,
                                      authorid=authorid,
                                      addbooks=addbooks)
                        check_exist_author = myDB.match(
                            'SELECT AuthorID FROM authors where AuthorID=?',
                            (authorid, ))
                        if check_exist_author:
                            new = True
                    except Exception as e:
                        logger.debug('Failed to add author [%s] to db: %s %s' %
                                     (author, type(e).__name__, str(e)))
    # check author exists in db, either newly loaded or already there
    if not check_exist_author:
        logger.debug("Failed to match author [%s] in database" % author)
        return "", "", False
    if isinstance(author, str) and hasattr(author, "decode"):
        author = author.decode(lazylibrarian.SYS_ENCODING)
    return author, check_exist_author['AuthorID'], new
Ejemplo n.º 10
0
def GEN(book=None, prov=None, test=False):
    errmsg = ''
    provider = "libgen.io"
    if prov is None:
        prov = 'GEN'
    host = lazylibrarian.CONFIG[prov + '_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    search = lazylibrarian.CONFIG[prov + '_SEARCH']
    if not search or not search.endswith('.php'):
        search = 'search.php'
    if 'index.php' not in search and 'search.php' not in search:
        search = 'search.php'
    if search[0] == '/':
        search = search[1:]

    sterm = makeUnicode(book['searchterm'])

    page = 1
    results = []
    next_page = True

    while next_page:
        if 'index.php' in search:
            params = {
                "s": book['searchterm'],
                "f_lang": "All",
                "f_columns": 0,
                "f_ext": "All"
            }
        else:
            params = {
                "view": "simple",
                "open": 0,
                "phrase": 0,
                "column": "def",
                "res": 100,
                "req": book['searchterm']
            }

        if page > 1:
            params['page'] = page

        providerurl = url_fix(host + "/%s" % search)
        searchURL = providerurl + "?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" %
                             (provider, sterm))
                success = True
            elif '111' in result:
                # looks like libgen has ip based access limits
                logger.error(
                    'Access forbidden. Please wait a while before trying %s again.'
                    % provider)
                errmsg = result
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching page data from %s: %s' %
                             (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (searchURL, provider))
            try:
                soup = BeautifulSoup(result, 'html5lib')
                rows = []

                try:
                    table = soup.find_all('table')[
                        -1]  # un-named table, last one in page
                    if table:
                        rows = table.find_all('tr')
                except IndexError:  # no results table in result page
                    rows = []

                if len(rows) > 1:  # skip table headers
                    rows = rows[1:]

                for row in rows:
                    author = ''
                    title = ''
                    size = ''
                    extn = ''
                    link = ''
                    td = row.find_all('td')

                    if 'index.php' in search and len(td) > 3:
                        try:
                            author = formatAuthorName(td[0].text)
                            title = td[2].text
                            newsoup = BeautifulSoup(str(td[4]), 'html5lib')
                            data = newsoup.find('a')
                            if data:
                                link = data.get('href')
                                extn = td[4].text.split('(')[0].strip()
                                size = td[4].text.split('(')[1].split(')')[0]
                                size = size.upper()
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen index.php results: %s' %
                                str(e))

                    elif 'search.php' in search and len(td) > 8:
                        try:
                            author = formatAuthorName(td[1].text)
                            title = td[2].text
                            size = td[7].text.upper()
                            extn = td[8].text
                            link = ''
                            newsoup = BeautifulSoup(str(td[2]), 'html5lib')
                            for res in newsoup.find_all('a'):
                                output = res.get('href')
                                if 'md5' in output:
                                    link = output
                                    break
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen search.php results; %s' %
                                str(e))

                    size = size_in_bytes(size)

                    if link and title:
                        if author:
                            title = author.strip() + ' ' + title.strip()
                        if extn:
                            title = title + '.' + extn

                        if link.startswith('http'):
                            url = redirect_url(host, link)
                        else:
                            if "/index.php?" in link:
                                link = 'md5' + link.split('md5')[1]

                            if "/ads.php?" in link:
                                url = url_fix(host + "/" + link)
                            else:
                                url = url_fix(host + "/ads.php?" + link)

                        bookresult, success = fetchURL(url)
                        if not success:
                            logger.debug(
                                'Error fetching link data from %s: %s' %
                                (provider, bookresult))
                            logger.debug(url)
                            url = None
                        else:
                            url = None
                            try:
                                new_soup = BeautifulSoup(
                                    bookresult, 'html5lib')
                                for link in new_soup.find_all('a'):
                                    output = link.get('href')
                                    if output:
                                        if output.startswith(
                                                'http'
                                        ) and '/get.php' in output:
                                            url = output
                                            break
                                        elif '/get.php' in output:
                                            url = '/get.php' + output.split(
                                                '/get.php')[1]
                                            break
                                        elif '/download/book' in output:
                                            url = '/download/book' + output.split(
                                                '/download/book')[1]
                                            break

                                if url and not url.startswith('http'):
                                    url = url_fix(host + url)
                                else:
                                    url = redirect_url(host, url)
                            except Exception as e:
                                logger.error(
                                    '%s parsing bookresult for %s: %s' %
                                    (type(e).__name__, link, str(e)))
                                url = None

                        if url:
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider + '/' + search,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                'direct',
                                'priority':
                                lazylibrarian.CONFIG[prov + '_DLPRIORITY']
                            })
                            logger.debug('Found %s, Size %s' % (title, size))
                        next_page = True

            except Exception as e:
                logger.error("An error occurred in the %s parser: %s" %
                             (provider, str(e)))
                logger.debug('%s: %s' % (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn(
                'Maximum results page search reached, still more results available'
            )
            next_page = False

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg
Ejemplo n.º 11
0
def search_wishlist():
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "SEARCHWISHLIST"

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
            scheduleJob(action='Stop', target='search_wishlist')
            return  # No point in continuing

        # for each item in resultlist, add to database if necessary, and mark as wanted
        logger.debug('Processing %s item%s in wishlists' %
                     (len(resultlist), plural(len(resultlist))))
        for book in resultlist:
            # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
            # we can just use bookid if goodreads, or try isbn and name matching on author/title if not
            # eg NYTimes wishlist
            if 'E' in book['types']:
                ebook_status = "Wanted"
            else:
                ebook_status = "Skipped"
            if 'A' in book['types']:
                audio_status = "Wanted"
            else:
                audio_status = "Skipped"
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book[
                    'rss_bookid']:
                cmd = 'select Status,AudioStatus,BookName,Requester,AudioRequester from books where bookid=?'
                bookmatch = myDB.match(cmd, (book['rss_bookid'], ))
                if bookmatch:
                    bookname = bookmatch['BookName']
                    if bookmatch['Status'] in ['Open', 'Wanted', 'Have']:
                        logger.info('Found book %s, already marked as "%s"' %
                                    (bookname, bookmatch['Status']))
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {
                                    "Requester":
                                    bookmatch["Requester"] + book["dispname"] +
                                    ' '
                                }
                                controlValueDict = {
                                    "BookID": book['rss_bookid']
                                }
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "Requester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    elif ebook_status == "Wanted":  # skipped/ignored
                        logger.info('Found book %s, marking as "Wanted"' %
                                    bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {
                                    "Requester":
                                    bookmatch["Requester"] + book["dispname"] +
                                    ' '
                                }
                                controlValueDict = {
                                    "BookID": book['rss_bookid']
                                }
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "Requester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found audiobook %s, already marked as "%s"' %
                            (bookname, bookmatch['AudioStatus']))
                        if bookmatch[
                                "AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch[
                                    "AudioRequester"]:
                                newValueDict = {
                                    "AudioRequester":
                                    bookmatch["AudioRequester"] +
                                    book["dispname"] + ' '
                                }
                                controlValueDict = {
                                    "BookID": book['rss_bookid']
                                }
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "AudioRequester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    elif audio_status == "Wanted":  # skipped/ignored
                        logger.info('Found audiobook %s, marking as "Wanted"' %
                                    bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"AudioStatus": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch[
                                "AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch[
                                    "AudioRequester"]:
                                newValueDict = {
                                    "AudioRequester":
                                    bookmatch["AudioRequester"] +
                                    book["dispname"] + ' '
                                }
                                controlValueDict = {
                                    "BookID": book['rss_bookid']
                                }
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "AudioRequester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                else:
                    import_book(book['rss_bookid'], ebook_status, audio_status)
                    new_books += 1
                    newValueDict = {"Requester": book["dispname"] + ' '}
                    controlValueDict = {"BookID": book['rss_bookid']}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    newValueDict = {"AudioRequester": book["dispname"] + ' '}
                    controlValueDict = {"BookID": book['rss_bookid']}
                    myDB.upsert("books", newValueDict, controlValueDict)
            else:
                item = {}
                results = None
                item['Title'] = book['rss_title']
                if book['rss_bookid']:
                    item['BookID'] = book['rss_bookid']
                if book['rss_isbn']:
                    item['ISBN'] = book['rss_isbn']
                bookmatch = finditem(item, book['rss_author'])
                if bookmatch:  # it's already in the database
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    if bookmatch['Status'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found book %s by %s, already marked as "%s"' %
                            (bookname, authorname, bookmatch['Status']))
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {
                                    "Requester":
                                    bookmatch["Requester"] + book["dispname"] +
                                    ' '
                                }
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "Requester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    elif ebook_status == 'Wanted':  # skipped/ignored
                        logger.info(
                            'Found book %s by %s, marking as "Wanted"' %
                            (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {
                                    "Requester":
                                    bookmatch["Requester"] + book["dispname"] +
                                    ' '
                                }
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "Requester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found audiobook %s by %s, already marked as "%s"'
                            % (bookname, authorname, bookmatch['AudioStatus']))
                        if bookmatch[
                                "AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch[
                                    "AudioRequester"]:
                                newValueDict = {
                                    "AudioRequester":
                                    bookmatch["AudioRequester"] +
                                    book["dispname"] + ' '
                                }
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "AudioRequester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    elif audio_status == 'Wanted':  # skipped/ignored
                        logger.info(
                            'Found audiobook %s by %s, marking as "Wanted"' %
                            (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"AudioStatus": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch[
                                "AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch[
                                    "AudioRequester"]:
                                newValueDict = {
                                    "AudioRequester":
                                    bookmatch["AudioRequester"] +
                                    book["dispname"] + ' '
                                }
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "AudioRequester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                else:  # not in database yet
                    if book['rss_isbn']:
                        results = search_for(book['rss_isbn'])
                    if results:
                        result = results[0]  # type: dict
                        if result['isbn_fuzz'] > check_int(
                                lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info(
                                "Found (%s%%) %s: %s" %
                                (result['isbn_fuzz'], result['authorname'],
                                 result['bookname']))
                            import_book(result['bookid'], ebook_status,
                                        audio_status)
                            new_books += 1
                            newValueDict = {
                                "Requester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": result['bookid']}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            newValueDict = {
                                "AudioRequester": book["dispname"] + ' '
                            }
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            bookmatch = True
                    if not results:
                        searchterm = "%s <ll> %s" % (item['Title'],
                                                     formatAuthorName(
                                                         book['rss_author']))
                        results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]  # type: dict
                        if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info(
                                "Found (%s%% %s%%) %s: %s" %
                                (result['author_fuzz'], result['book_fuzz'],
                                 result['authorname'], result['bookname']))
                            import_book(result['bookid'], ebook_status,
                                        audio_status)
                            new_books += 1
                            newValueDict = {
                                "Requester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": result['bookid']}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            newValueDict = {
                                "AudioRequester": book["dispname"] + ' '
                            }
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'],
                                                          book['rss_author'])
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            result = results[0]  # type: dict
                            msg = "Closest match (%s%% %s%%) %s: %s" % (
                                result['author_fuzz'], result['book_fuzz'],
                                result['authorname'], result['bookname'])
                        logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" %
                        (new_books, plural(new_books)))

    except Exception:
        logger.error('Unhandled exception in search_wishlist: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Ejemplo n.º 12
0
def import_CSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
        Optionally delete the file on successful completion
    """
    # noinspection PyBroadException
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg

        csvFile = csv_file(search_dir)

        headers = None

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        total = 0
        existing = 0

        if not csvFile:
            msg = "No CSV file found in %s" % search_dir
            logger.warn(msg)
            return msg
        else:
            logger.debug('Reading file %s' % csvFile)
            csvreader = reader(open(csvFile, 'rU'))
            for row in csvreader:
                if csvreader.line_num == 1:
                    # If we are on the first line, create the headers list from the first row
                    headers = row
                    if 'Author' not in headers or 'Title' not in headers:
                        msg = 'Invalid CSV file found %s' % csvFile
                        logger.warn(msg)
                        return msg
                else:
                    total += 1
                    item = dict(list(zip(headers, row)))
                    authorname = formatAuthorName(item['Author'])
                    title = makeUnicode(item['Title'])

                    authmatch = myDB.match(
                        'SELECT * FROM authors where AuthorName=?',
                        (authorname, ))

                    if authmatch:
                        logger.debug("CSV: Author %s found in database" %
                                     authorname)
                    else:
                        logger.debug("CSV: Author %s not found" % authorname)
                        newauthor, authorid, new = addAuthorNameToDB(
                            author=authorname,
                            addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS'])
                        if len(newauthor) and newauthor != authorname:
                            logger.debug(
                                "Preferred authorname changed from [%s] to [%s]"
                                % (authorname, newauthor))
                            authorname = newauthor
                        if new:
                            authcount += 1

                    bookmatch = finditem(item, authorname)
                    result = ''
                    imported = ''
                    if bookmatch:
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        bookstatus = bookmatch['Status']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            existing += 1
                            logger.info(
                                'Found book %s by %s, already marked as "%s"' %
                                (bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(
                                'Found book %s by %s, marking as "Wanted"' %
                                (bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            bookcount += 1
                    else:
                        searchterm = "%s <ll> %s" % (title, authorname)
                        results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]
                            if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \
                                    and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']:
                                bookmatch = True
                        if not bookmatch:  # no match on full searchterm, try splitting out subtitle
                            newtitle, _ = split_title(authorname, title)
                            if newtitle != title:
                                title = newtitle
                                searchterm = "%s <ll> %s" % (title, authorname)
                                results = search_for(unaccented(searchterm))
                                if results:
                                    result = results[0]
                                    if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \
                                            and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']:
                                        bookmatch = True
                        if bookmatch:
                            logger.info(
                                "Found (%s%% %s%%) %s: %s for %s: %s" %
                                (result['author_fuzz'], result['book_fuzz'],
                                 result['authorname'], result['bookname'],
                                 authorname, title))
                            import_book(result['bookid'], wait=True)
                            imported = myDB.match(
                                'select * from books where BookID=?',
                                (result['bookid'], ))
                            if imported:
                                bookcount += 1
                            else:
                                bookmatch = False

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (title, authorname)
                        if not result:
                            msg += ', No results found'
                            logger.warn(msg)
                        elif not imported:
                            msg += ', Failed to import %s' % result['bookid']
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            msg = "Closest match (%s%% %s%%) %s: %s" % (
                                result['author_fuzz'], result['book_fuzz'],
                                result['authorname'], result['bookname'])
                            logger.warn(msg)
                        skipcount += 1

            msg = "Found %i book%s in csv file, %i already existing or wanted" % (
                total, plural(total), existing)
            logger.info(msg)
            msg = "Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % \
                  (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount))
            logger.info(msg)
            if lazylibrarian.CONFIG['DELETE_CSV']:
                if skipcount == 0:
                    logger.info("Deleting %s on successful completion" %
                                csvFile)
                    try:
                        os.remove(csvFile)
                    except OSError as why:
                        logger.warn('Unable to delete %s: %s' %
                                    (csvFile, why.strerror))
                else:
                    logger.warn("Not deleting %s as not all books found" %
                                csvFile)
            return msg
    except Exception:
        msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg
Ejemplo n.º 13
0
def addAuthorNameToDB(author=None, refresh=False, addbooks=True):
    # get authors name in a consistent format, look them up in the database
    # if not in database, try to import them.
    # return authorname,authorid,new where new=False if author already in db, new=True if added
    # authorname returned is our preferred name, or empty string if not found or unable to add

    new = False
    if not author or len(author) < 2:
        logger.debug('Invalid Author Name [%s]' % author)
        return "", "", False

    author = formatAuthorName(author)
    myDB = database.DBConnection()

    # Check if the author exists, and import the author if not,
    check_exist_author = myDB.match('SELECT AuthorID FROM authors where AuthorName=?', (author,))

    # If no exact match, look for a close fuzzy match to handle misspellings, accents
    if not check_exist_author:
        match_name = author.lower()
        res = myDB.action('select AuthorID,AuthorName from authors')
        for item in res:
            aname = item['AuthorName']
            if aname:
                match_fuzz = fuzz.ratio(aname.lower(), match_name)
                if match_fuzz >= 95:
                    logger.debug("Fuzzy match [%s] %s%% for [%s]" % (item['AuthorName'], match_fuzz, author))
                    check_exist_author = item
                    author = item['AuthorName']
                    break

    if not check_exist_author and lazylibrarian.CONFIG['ADD_AUTHOR']:
        logger.debug('Author %s not found in database, trying to add' % author)
        # no match for supplied author, but we're allowed to add new ones
        GR = GoodReads(author)
        try:
            author_gr = GR.find_author_id()
        except Exception as e:
            logger.warn("%s finding author id for [%s] %s" % (type(e).__name__, author, str(e)))
            return "", "", False

        # only try to add if GR data matches found author data
        if author_gr:
            authorname = author_gr['authorname']
            # authorid = author_gr['authorid']
            # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
            match_auth = author.replace('.', ' ')
            match_auth = ' '.join(match_auth.split())

            match_name = authorname.replace('.', ' ')
            match_name = ' '.join(match_name.split())

            match_name = unaccented(match_name)
            match_auth = unaccented(match_auth)

            # allow a degree of fuzziness to cater for different accented character handling.
            # some author names have accents,
            # filename may have the accented or un-accented version of the character
            # The currently non-configurable value of fuzziness might need to go in config
            # We stored GoodReads unmodified author name in
            # author_gr, so store in LL db under that
            # fuzz.ratio doesn't lowercase for us
            match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
            if match_fuzz < 90:
                logger.debug("Failed to match author [%s] to authorname [%s] fuzz [%d]" %
                             (author, match_name, match_fuzz))

            # To save loading hundreds of books by unknown authors at GR or GB, ignore unknown
            if (author != "Unknown") and (match_fuzz >= 90):
                # use "intact" name for author that we stored in
                # GR author_dict, not one of the various mangled versions
                # otherwise the books appear to be by a different author!
                author = author_gr['authorname']
                authorid = author_gr['authorid']
                # this new authorname may already be in the
                # database, so check again
                check_exist_author = myDB.match('SELECT AuthorID FROM authors where AuthorID=?', (authorid,))
                if check_exist_author:
                    logger.debug('Found goodreads authorname %s in database' % author)
                else:
                    logger.info("Adding new author [%s]" % author)
                    try:
                        addAuthorToDB(authorname=author, refresh=refresh, authorid=authorid, addbooks=addbooks)
                        check_exist_author = myDB.match('SELECT AuthorID FROM authors where AuthorID=?', (authorid,))
                        if check_exist_author:
                            new = True
                    except Exception as e:
                        logger.error('Failed to add author [%s] to db: %s %s' % (author, type(e).__name__, str(e)))
    # check author exists in db, either newly loaded or already there
    if not check_exist_author:
        logger.debug("Failed to match author [%s] in database" % author)
        return "", "", False
    author = makeUnicode(author)
    return author, check_exist_author['AuthorID'], new
Ejemplo n.º 14
0
def GEN(book=None, prov=None):
    errmsg = ''
    provider = "libgen.io"
    if prov is None:
        prov = 'GEN'
    host = lazylibrarian.CONFIG[prov + '_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    search = lazylibrarian.CONFIG[prov + '_SEARCH']
    if not search or not search.endswith('.php'):
        search = 'search.php'
    if 'index.php' not in search and 'search.php' not in search:
        search = 'search.php'
    if search[0] == '/':
        search = search[1:]

    page = 1
    results = []
    next_page = True

    while next_page:
        if 'index.php' in search:
            params = {
                "s": book['searchterm'],
                "f_lang": "All",
                "f_columns": 0,
                "f_ext": "All"
            }
        else:
            params = {
                "view": "simple",
                "open": 0,
                "phrase": 0,
                "column": "def",
                "res": 100,
                "req": book['searchterm']
            }

        if page > 1:
            params['page'] = page

        providerurl = url_fix(host + "/%s" % search)
        searchURL = providerurl + "?%s" % urllib.urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug(u"No results found from %s for %s" %
                             (provider, book['searchterm']))
            elif '111' in result:
                # looks like libgen has ip based access limits
                logger.error(
                    'Access forbidden. Please wait a while before trying %s again.'
                    % provider)
                errmsg = result
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching page data from %s: %s' %
                             (provider, result))
                errmsg = result
            result = False

        if result:
            logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                         (searchURL, provider))
            try:
                soup = BeautifulSoup(result)
                try:
                    table = soup.findAll('table')[2]  # un-named table
                    if table:
                        rows = table.findAll('tr')
                except IndexError:  # no results table in result page
                    rows = []

                if 'search.php' in search and len(rows) > 1:
                    rows = rows[1:]

                for row in rows:
                    author = ''
                    title = ''
                    size = ''
                    extn = ''
                    link = ''
                    td = row.findAll('td')
                    if 'index.php' in search and len(td) > 3:
                        try:
                            res = str(
                                BeautifulStoneSoup(
                                    td[0].text,
                                    convertEntities=BeautifulStoneSoup.
                                    HTML_ENTITIES))
                            author = formatAuthorName(res)
                            title = str(
                                BeautifulStoneSoup(
                                    td[2].text,
                                    convertEntities=BeautifulStoneSoup.
                                    HTML_ENTITIES))
                            temp = str(td[4])
                            temp = temp.split('onmouseout')[1]
                            extn = temp.split('">')[1].split('(')[0]
                            size = temp.split('">')[1].split('(')[1].split(
                                ')')[0]
                            size = size.upper()
                            link = temp.split('href=')[1].split('"')[1]
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen index.php results: %s' %
                                str(e))

                    elif 'search.php' in search and len(td) > 8:
                        try:
                            res = str(
                                BeautifulStoneSoup(
                                    td[1].text,
                                    convertEntities=BeautifulStoneSoup.
                                    HTML_ENTITIES))
                            author = formatAuthorName(res)
                            title = str(
                                td[2]).split('>')[2].split('<')[0].strip()
                            title = str(
                                BeautifulStoneSoup(
                                    title,
                                    convertEntities=BeautifulStoneSoup.
                                    HTML_ENTITIES))
                            link = str(td[2]).split('href="')[1].split(
                                '?')[1].split('"')[0]
                            size = unaccented(td[7].text).upper()
                            extn = td[8].text
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen search.php results; %s' %
                                str(e))

                    if not size:
                        size = 0
                    else:
                        try:
                            mult = 1
                            if 'K' in size:
                                size = size.split('K')[0]
                                mult = 1024
                            elif 'M' in size:
                                size = size.split('M')[0]
                                mult = 1024 * 1024
                            elif 'G' in size:
                                size = size.split('G')[0]
                                mult = 1024 * 1024 * 1024
                            size = int(float(size) * mult)
                        except (ValueError, IndexError):
                            size = 0

                    if link and title:
                        if author:
                            title = author.strip() + ' ' + title.strip()
                        if extn:
                            title = title + '.' + extn

                        if not link.startswith('http'):
                            if "/ads.php?" in link:
                                url = url_fix(host + link)
                            else:
                                url = url_fix(host + "/ads.php?" + link)
                        else:
                            url = redirect_url(host, link)

                        bookresult, success = fetchURL(url)
                        if not success:
                            # may return 404 if no results, not really an error
                            if '404' in bookresult:
                                logger.debug(
                                    u"No results found from %s for %s" %
                                    (provider, book['searchterm']))
                            else:
                                logger.debug(url)
                                logger.debug(
                                    'Error fetching link data from %s: %s' %
                                    (provider, bookresult))
                                errmsg = bookresult
                            bookresult = False

                        if bookresult:
                            url = None
                            try:
                                new_soup = BeautifulSoup(bookresult)
                                for link in new_soup.findAll('a'):
                                    output = link.get('href')
                                    if output:
                                        if output.startswith(
                                                'http'
                                        ) and '/get.php' in output:
                                            url = output
                                            break
                                        elif '/get.php' in output:
                                            url = '/get.php' + output.split(
                                                '/get.php')[1]
                                            break
                                        elif '/download/book' in output:
                                            url = '/download/book' + output.split(
                                                '/download/book')[1]
                                            break

                                if url and not url.startswith('http'):
                                    url = url_fix(host + url)
                                else:
                                    url = redirect_url(host, url)
                            except Exception as e:
                                logger.debug(
                                    'Error parsing bookresult for %s: %s' %
                                    (link, str(e)))
                                url = None

                        if url:
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider + '/' + search,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                'direct',
                                'priority':
                                lazylibrarian.CONFIG[prov + '_DLPRIORITY']
                            })
                            logger.debug('Found %s, Size %s' % (title, size))
                        next_page = True

            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" %
                             (provider, str(e)))
                logger.debug('%s: %s' % (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn(
                'Maximum results page search reached, still more results available'
            )
            next_page = False

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results, errmsg
Ejemplo n.º 15
0
def GEN(book=None, prov=None, test=False):
    errmsg = ''
    provider = "libgen.io"
    if not prov:
        prov = 'GEN'
    host = lazylibrarian.CONFIG[prov + '_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    search = lazylibrarian.CONFIG[prov + '_SEARCH']
    if not search or not search.endswith('.php'):
        search = 'search.php'
    if 'index.php' not in search and 'search.php' not in search:
        search = 'search.php'
    if search[0] == '/':
        search = search[1:]

    sterm = makeUnicode(book['searchterm'])

    page = 1
    results = []
    next_page = True

    while next_page:
        if 'index.php' in search:
            params = {
                "s": book['searchterm'],
                "f_lang": "All",
                "f_columns": 0,
                "f_ext": "All"
            }
        else:
            params = {
                "view": "simple",
                "open": 0,
                "phrase": 0,
                "column": "def",
                "res": 100,
                "req": book['searchterm']
            }

        if page > 1:
            params['page'] = page

        providerurl = url_fix(host + "/%s" % search)
        searchURL = providerurl + "?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" % (provider, sterm))
                success = True
            elif '111' in result:
                # looks like libgen has ip based access limits
                logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                errmsg = result
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching page data from %s: %s' % (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
            try:
                soup = BeautifulSoup(result, 'html5lib')
                rows = []

                try:
                    table = soup.find_all('table', rules='rows')[-1]  # the last table with rules=rows
                    if table:
                        rows = table.find_all('tr')

                except IndexError:  # no results table in result page
                    rows = []

                if len(rows) > 1:  # skip table headers
                    rows = rows[1:]

                for row in rows:
                    author = ''
                    title = ''
                    size = ''
                    extn = ''
                    link = ''
                    td = row.find_all('td')

                    if 'index.php' in search and len(td) > 3:
                        # Foreign fiction
                        try:
                            author = formatAuthorName(td[0].text)
                            title = td[2].text
                            newsoup = BeautifulSoup(str(td[4]), 'html5lib')
                            data = newsoup.find('a')
                            if data:
                                link = data.get('href')
                                extn = td[4].text.split('(')[0].strip()
                                size = td[4].text.split('(')[1].split(')')[0]
                                size = size.upper()
                        except IndexError as e:
                            logger.debug('Error parsing libgen index.php results: %s' % str(e))

                    elif 'search.php' in search and len(td) > 8:
                        # Non-fiction
                        try:
                            author = formatAuthorName(td[1].text)
                            title = td[2].text
                            size = td[7].text.upper()
                            extn = td[8].text
                            link = ''
                            newsoup = BeautifulSoup(str(td[2]), 'html5lib')
                            for res in newsoup.find_all('a'):
                                output = res.get('href')
                                if 'md5' in output:
                                    link = output
                                    break
                        except IndexError as e:
                            logger.debug('Error parsing libgen search.php results; %s' % str(e))

                    size = size_in_bytes(size)

                    if link and title:
                        if author:
                            title = author.strip() + ' ' + title.strip()
                        if extn:
                            title = title + '.' + extn

                        if link.startswith('http'):
                            url = redirect_url(host, link)
                        else:
                            if "/index.php?" in link:
                                link = 'md5' + link.split('md5')[1]

                            if "/ads.php?" in link:
                                url = url_fix(host + "/" + link)
                            else:
                                url = url_fix(host + "/ads.php?" + link)

                        bookresult, success = fetchURL(url)
                        if not success:
                            logger.debug('Error fetching link data from %s: %s' % (provider, bookresult))
                            logger.debug(url)
                            url = None
                        else:
                            url = None
                            try:
                                new_soup = BeautifulSoup(bookresult, 'html5lib')
                                for link in new_soup.find_all('a'):
                                    output = link.get('href')
                                    if output:
                                        if output.startswith('http') and '/get.php' in output:
                                            url = output
                                            break
                                        elif '/get.php' in output:
                                            url = '/get.php' + output.split('/get.php')[1]
                                            break
                                        elif '/download/book' in output:
                                            url = '/download/book' + output.split('/download/book')[1]
                                            break

                                if url and not url.startswith('http'):
                                    url = url_fix(host + url)
                                else:
                                    url = redirect_url(host, url)
                            except Exception as e:
                                logger.error('%s parsing bookresult for %s: %s' % (type(e).__name__, link, str(e)))
                                url = None

                        if url:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider + '/' + search,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                                'tor_type': 'direct',
                                'priority': lazylibrarian.CONFIG[prov + '_DLPRIORITY']
                            })
                            logger.debug('Found %s, Size %s' % (title, size))
                        next_page = True

            except Exception as e:
                logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                logger.debug('%s: %s' % (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn('Maximum results page search reached, still more results available')
            next_page = False

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))
    return results, errmsg
Ejemplo n.º 16
0
def import_CSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
    """
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg

        csvFile = csv_file(search_dir)

        headers = None
        content = {}

        if not csvFile:
            msg = "No CSV file found in %s" % search_dir
            logger.warn(msg)
            return msg
        else:
            logger.debug(u'Reading file %s' % csvFile)
            reader = csv.reader(open(csvFile))
            for row in reader:
                if reader.line_num == 1:
                    # If we are on the first line, create the headers list from the first row
                    headers = row
                else:
                    # Otherwise, the key in the content dictionary is the first item in the
                    # row and we can create the sub-dictionary by using the zip() function.
                    # we include the key in the dictionary as our exported csv files use
                    # bookid as the key
                    content[row[0]] = dict(zip(headers, row))

            # We can now get to the content by using the resulting dictionary, so to see
            # the list of lines, we can do: print content.keys()  to get a list of keys
            # To see the list of fields available for each book:  print headers

            if 'Author' not in headers or 'Title' not in headers:
                msg = 'Invalid CSV file found %s' % csvFile
                logger.warn(msg)
                return msg

            myDB = database.DBConnection()
            bookcount = 0
            authcount = 0
            skipcount = 0
            logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys()))))
            for item in content.keys():
                authorname = content[item]['Author']
                if isinstance(authorname, str) and hasattr(authorname, "decode"):
                    authorname = authorname.decode(lazylibrarian.SYS_ENCODING)
                authorname = formatAuthorName(authorname)
                title = content[item]['Title']
                if isinstance(title, str) and hasattr(title, "decode"):
                    title = title.decode(lazylibrarian.SYS_ENCODING)

                authmatch = myDB.match('SELECT * FROM authors where AuthorName=?', (authorname,))

                if authmatch:
                    logger.debug(u"CSV: Author %s found in database" % authorname)
                else:
                    logger.debug(u"CSV: Author %s not found" % authorname)
                    newauthor, authorid, new = addAuthorNameToDB(author=authorname,
                                                                 addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS'])
                    if len(newauthor) and newauthor != authorname:
                        logger.debug("Preferred authorname changed from [%s] to [%s]" % (authorname, newauthor))
                        authorname = newauthor
                    if new:
                        authcount += 1

                bookmatch = finditem(content[item], authorname, headers)
                result = ''
                if bookmatch:
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    bookstatus = bookmatch['Status']
                    if bookstatus in ['Open', 'Wanted', 'Have']:
                        logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                    else:  # skipped/ignored
                        logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        bookcount += 1
                else:
                    searchterm = "%s <ll> %s" % (title, authorname)
                    results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]
                        if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \
                                and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']:
                            logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                      result['authorname'], result['bookname']))
                            import_book(result['bookid'])
                            bookcount += 1
                            bookmatch = True

                if not bookmatch:
                    msg = "Skipping book %s by %s" % (title, authorname)
                    if not result:
                        msg += ', No results returned'
                        logger.warn(msg)
                    else:
                        msg += ', No match found'
                        logger.warn(msg)
                        msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                    result['authorname'], result['bookname'])
                        logger.warn(msg)
                    skipcount += 1
            msg = "Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % \
                  (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount))
            logger.info(msg)
            return msg
    except Exception:
        msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg
Ejemplo n.º 17
0
def search_wishlist():
    if not (lazylibrarian.USE_RSS()):
        logger.warn('RSS search is disabled')
        scheduleJob(action='Stop', target='search_wishlist')
        return
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "SEARCHWISHLIST"

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
            scheduleJob(action='Stop', target='search_wishlist')
            return  # No point in continuing

        # for each item in resultlist, add to database if necessary, and mark as wanted
        logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist))))
        for book in resultlist:
            # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
            # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks
            # not sure if anyone would use a goodreads wishlist if not using goodreads interface...
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book['rss_bookid']:
                bookmatch = myDB.match('select Status,BookName from books where bookid=?', (book['rss_bookid'],))
                if bookmatch:
                    bookstatus = bookmatch['Status']
                    bookname = bookmatch['BookName']
                    if bookstatus in ['Open', 'Wanted', 'Have']:
                        logger.info('Found book %s, already marked as "%s"' % (bookname, bookstatus))
                    else:  # skipped/ignored
                        logger.info('Found book %s, marking as "Wanted"' % bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                else:
                    import_book(book['rss_bookid'])
                    new_books += 1
            else:
                item = {}
                results = None
                item['Title'] = book['rss_title']
                if book['rss_bookid']:
                    item['BookID'] = book['rss_bookid']
                if book['rss_isbn']:
                    item['ISBN'] = book['rss_isbn']
                bookmatch = finditem(item, book['rss_author'])
                if bookmatch:  # it's already in the database
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    bookstatus = bookmatch['Status']
                    if bookstatus in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                    else:  # skipped/ignored
                        logger.info('Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                else:  # not in database yet
                    if book['rss_isbn']:
                        results = search_for(book['rss_isbn'])
                    if results:
                        result = results[0]  # type: dict
                        if result['isbn_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info("Found (%s%%) %s: %s" %
                                        (result['isbn_fuzz'], result['authorname'], result['bookname']))
                            import_book(result['bookid'])
                            new_books += 1
                            bookmatch = True
                    if not results:
                        searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName(book['rss_author']))
                        results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]  # type: dict
                        if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                      result['authorname'], result['bookname']))
                            import_book(result['bookid'])
                            new_books += 1
                            bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'], book['rss_author'])
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            result = results[0]  # type: dict
                            msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                        result['authorname'], result['bookname'])
                        logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books)))

    except Exception:
        logger.error('Unhandled exception in search_wishlist: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"