Example #1
0
def setAllBookSeries():
    """ Try to set series details for all books """
    myDB = database.DBConnection()
    books = myDB.select('select BookID,WorkID,BookName from books where Manual is not "1"')
    counter = 0
    if books:
        logger.info('Checking series for %s book%s' % (len(books), plural(len(books))))
        for book in books:
            if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
                workid = book['WorkID']
                if not workid:
                    logger.debug("No workid for book %s: %s" % (book['BookID'], book['BookName']))
            else:
                workid = book['BookID']
                if not workid:
                    logger.debug("No bookid for book: %s" % book['BookName'])
            if workid:
                serieslist = getWorkSeries(workid)
                if serieslist:
                    counter += 1
                    setSeries(serieslist, book['BookID'])
    deleteEmptySeries()
    msg = 'Updated %s book%s' % (counter, plural(counter))
    logger.info('Series check complete: ' + msg)
    return msg
Example #2
0
def setWorkPages():
    """ Set the workpage link for any books that don't already have one """

    myDB = database.DBConnection()
    cmd = 'select BookID,AuthorName,BookName from books,authors where length(WorkPage) < 4'
    cmd += ' and books.AuthorID = authors.AuthorID'
    books = myDB.select(cmd)
    if books:
        logger.debug('Setting WorkPage for %s book%s' % (len(books), plural(len(books))))
        counter = 0
        for book in books:
            bookid = book['BookID']
            worklink = getWorkPage(bookid)
            if worklink:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"WorkPage": worklink}
                myDB.upsert("books", newValueDict, controlValueDict)
                counter += 1
            else:
                logger.debug('No WorkPage found for %s: %s' % (book['AuthorName'], book['BookName']))
        msg = 'Updated %s page%s' % (counter, plural(counter))
        logger.debug("setWorkPages complete: " + msg)
    else:
        msg = 'No missing WorkPages'
        logger.debug(msg)
    return msg
Example #3
0
def showJobs():
        result = []
        result.append("Cache %i hit%s, %i miss" % ( int(lazylibrarian.CACHE_HIT),
                        plural(int(lazylibrarian.CACHE_HIT)), int(lazylibrarian.CACHE_MISS)))
        myDB = database.DBConnection()
        snatched = myDB.match("SELECT count('Status') as counter from wanted WHERE Status = 'Snatched'")
        wanted = myDB.match("SELECT count('Status') as counter FROM books WHERE Status = 'Wanted'")
        result.append("%i item%s marked as Snatched" % (snatched['counter'], plural(snatched['counter'])))
        result.append("%i item%s marked as Wanted" % (wanted['counter'], plural(wanted['counter'])))
        for job in lazylibrarian.SCHED.get_jobs():
            job = str(job)
            if "search_magazines" in job:
                jobname = "Magazine search"
            elif "checkForUpdates" in job:
                jobname = "Check LazyLibrarian version"
            elif "search_tor_book" in job:
                jobname = "TOR book search"
            elif "search_nzb_book" in job:
                jobname = "NZB book search"
            elif "search_rss_book" in job:
                jobname = "RSS book search"
            elif "processDir" in job:
                jobname = "Process downloads"
            else:
                jobname = job.split(' ')[0].split('.')[2]

            jobinterval = job.split('[')[1].split(']')[0]
            jobtime = job.split('at: ')[1].split('.')[0]
            jobtime = next_run(jobtime)
            jobinfo = "%s: Next run in %s" % (jobname, jobtime)
            result.append(jobinfo)
        return result
Example #4
0
def getAuthorImages():
    """ Try to get an author image for all authors without one"""
    myDB = database.DBConnection()
    cmd = 'select AuthorID, AuthorName from authors where (AuthorImg like "%nophoto%" or AuthorImg is null)'
    cmd += ' and Manual is not "1"'
    authors = myDB.select(cmd)
    if authors:
        logger.info('Checking images for %s author%s' % (len(authors), plural(len(authors))))
        counter = 0
        for author in authors:
            authorid = author['AuthorID']
            imagelink = getAuthorImage(authorid)
            newValueDict = {}
            if not imagelink:
                logger.debug('No image found for %s' % author['AuthorName'])
                newValueDict = {"AuthorImg": 'images/nophoto.png'}
            elif 'nophoto' not in imagelink:
                logger.debug('Updating %s image to %s' % (author['AuthorName'], imagelink))
                newValueDict = {"AuthorImg": imagelink}

            if newValueDict:
                counter += 1
                controlValueDict = {"AuthorID": authorid}
                myDB.upsert("authors", newValueDict, controlValueDict)

        msg = 'Updated %s image%s' % (counter, plural(counter))
        logger.info('Author Image check complete: ' + msg)
    else:
        msg = 'No missing author images'
        logger.debug(msg)
    return msg
Example #5
0
def export_CSV(search_dir=None, status="Wanted"):
    """ Write a csv file to the search_dir containing all books marked as "Wanted" """

    if not search_dir or os.path.isdir(search_dir) is False:
        logger.warn("Please check Alternate Directory setting")
        return False

    csvFile = os.path.join(search_dir, "%s - %s.csv" % (status, now().replace(':', '-')))

    myDB = database.DBConnection()

    find_status = myDB.select('SELECT * FROM books WHERE Status = "%s"' % status)

    if not find_status:
        logger.warn(u"No books marked as %s" % status)
    else:
        count = 0
        with open(csvFile, 'wb') as csvfile:
            csvwrite = csv.writer(csvfile, delimiter=',',
                                  quotechar='"', quoting=csv.QUOTE_MINIMAL)

            # write headers, change AuthorName BookName BookIsbn to match import csv names (Author, Title, ISBN10)
            csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID'])

            for resulted in find_status:
                logger.debug(u"Exported CSV for book %s" % resulted['BookName'])
                row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'],
                        resulted['BookIsbn'], resulted['AuthorID']])
                csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row])
                count = count + 1
        logger.info(u"CSV exported %s book%s to %s" % (count, plural(count), csvFile))
Example #6
0
def export_CSV(search_dir=None, status="Wanted", library='eBook'):
    """ Write a csv file to the search_dir containing all books marked as "Wanted" """
    # noinspection PyBroadException
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg
        elif not os.access(search_dir, os.W_OK | os.X_OK):
            msg = "Alternate Directory [%s] not writable" % search_dir
            logger.warn(msg)
            return msg

        csvFile = os.path.join(search_dir, "%s %s - %s.csv" % (status, library, now().replace(':', '-')))

        myDB = database.DBConnection()

        cmd = 'SELECT BookID,AuthorName,BookName,BookIsbn,books.AuthorID FROM books,authors '
        if library == 'eBook':
            cmd += 'WHERE books.Status=? and books.AuthorID = authors.AuthorID'
        else:
            cmd += 'WHERE AudioStatus=? and books.AuthorID = authors.AuthorID'
        find_status = myDB.select(cmd, (status,))

        if not find_status:
            msg = "No %s marked as %s" % (library, status)
            logger.warn(msg)
        else:
            count = 0
            if PY2:
                fmode = 'wb'
            else:
                fmode = 'w'
            with open(csvFile, fmode) as csvfile:
                csvwrite = writer(csvfile, delimiter=',',
                                  quotechar='"', quoting=QUOTE_MINIMAL)

                # write headers, change AuthorName BookName BookIsbn to match import csv names
                csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID'])

                for resulted in find_status:
                    logger.debug("Exported CSV for %s %s" % (library, resulted['BookName']))
                    row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'],
                            resulted['BookIsbn'], resulted['AuthorID']])
                    if PY2:
                        csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row])
                    else:
                        csvwrite.writerow([("%s" % s) for s in row])
                    count += 1
            msg = "CSV exported %s %s%s to %s" % (count, library, plural(count), csvFile)
            logger.info(msg)
        return msg
    except Exception:
        msg = 'Unhandled exception in exportCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg
Example #7
0
def getAuthorImages():
    """ Try to get an author image for all authors without one"""
    myDB = database.DBConnection()
    authors = myDB.select('select AuthorID from authors where AuthorImg like "%nophoto%"')
    if authors:
        logger.info('Checking images for %s author%s' % (len(authors), plural(len(authors))))
        counter = 0
        for author in authors:
            authorid = author['AuthorID']
            imagelink = getAuthorImage(authorid)
            if imagelink and not "nophoto" in imagelink:
                controlValueDict = {"AuthorID": authorid}
                newValueDict = {"AuthorImg": imagelink}
                myDB.upsert("authors", newValueDict, controlValueDict)
                counter += 1
        logger.info('Author Image check completed, updated %s image%s' % (counter, plural(counter)))
    else:
        logger.debug('No missing images')
Example #8
0
def GOODREADS(host=None, feednr=None, priority=0, dispname=None, test=False):
    """
    Goodreads RSS query function, return all the results in a list, can handle multiple wishlists
    but expects goodreads format (looks for goodreads category names)
    """
    results = []
    basehost = host
    if not str(host)[:4] == "http":
        host = 'http://' + host

    URL = host

    result, success = fetchURL(URL)

    if test:
        return success

    if success:
        data = feedparser.parse(result)
    else:
        logger.error('Error fetching data from %s: %s' % (host, result))
        BlockProvider(basehost, result)
        return []

    if data:
        logger.debug('Parsing results from %s' % URL)
        provider = data['feed']['link']
        if not dispname:
            dispname = provider
        logger.debug("RSS %s returned %i result%s" % (provider, len(data.entries), plural(len(data.entries))))
        for post in data.entries:
            title = ''
            book_id = ''
            author_name = ''
            isbn = ''
            if 'title' in post:
                title = post.title
            if 'book_id' in post:
                book_id = post.book_id
            if 'author_name' in post:
                author_name = post.author_name
            if 'isbn' in post:
                isbn = post.isbn
            if title and author_name:
                results.append({
                    'rss_prov': provider,
                    'rss_feed': feednr,
                    'rss_title': title,
                    'rss_author': author_name,
                    'rss_bookid': book_id,
                    'rss_isbn': isbn,
                    'priority': priority,
                    'dispname': dispname
                })
    else:
        logger.debug('No data returned from %s' % host)
    return results
Example #9
0
    def _Magazine(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        if 'magid' not in kwargs:
            self.data = self._error_with_message('No Magazine Provided')
            return
        links = []
        entries = []
        title = ''
        cmd = "SELECT Title,IssueID,IssueDate,IssueAcquired,IssueFile from issues "
        cmd += "WHERE Title='%s' order by IssueDate DESC"
        results = myDB.select(cmd % kwargs['magid'])
        page = results[index:(index + self.PAGE_SIZE)]
        for issue in page:
            title = makeUnicode(issue['Title'])
            entry = {'title': escape('%s (%s)' % (title, issue['IssueDate'])),
                     'id': escape('issue:%s' % issue['IssueID']),
                     'updated': opdstime(issue['IssueAcquired']),
                     'content': escape('%s - %s' % (title, issue['IssueDate'])),
                     'href': '%s?cmd=Serve&amp;issueid=%s' % (self.opdsroot, quote_plus(issue['IssueID'])),
                     'kind': 'acquisition',
                     'rel': 'file',
                     'type': mimeType(issue['IssueFile'])}
            if lazylibrarian.CONFIG['OPDS_METAINFO']:
                fname = os.path.splitext(issue['IssueFile'])[0]
                res = cache_img('magazine', issue['IssueID'], fname + '.jpg')
                entry['image'] = self.searchroot + '/' + res[0]
            entries.append(entry)

        feed = {}
        title = '%s (%s)' % (escape(title), len(entries))
        feed['title'] = 'LazyLibrarian OPDS - %s' % title
        feed['id'] = 'magazine:%s' % escape(kwargs['magid'])
        feed['updated'] = now()
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Magazine&amp;magid=%s' % (self.opdsroot, quote_plus(kwargs['magid'])),
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Magazine&amp;magid=%s&amp;index=%s' % (self.opdsroot, quote_plus(kwargs['magid']),
                                                                            index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Magazine&amp;magid=%s&amp;index=%s' % (self.opdsroot, quote_plus(kwargs['magid']),
                                                                            index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s issue%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Example #10
0
def getBookCovers():
    """ Try to get a cover image for all books """

    myDB = database.DBConnection()
    books = myDB.select('select BookID,BookImg from books where BookImg like "%nocover%"')
    if books:
        logger.info('Checking covers for %s book%s' % (len(books), plural(len(books))))
        counter = 0
        for book in books:
            bookid = book['BookID']
            coverlink = getBookCover(bookid)
            if coverlink and "nocover" not in coverlink:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": coverlink}
                myDB.upsert("books", newValueDict, controlValueDict)
                counter += 1
        logger.info('Cover check complete, updated %s cover%s' % (counter, plural(counter)))
    else:
        logger.debug('No missing book covers')
Example #11
0
    def _RecentAudio(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Recent AudioBooks', 'id': 'Recent AudioBooks', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=RecentAudio' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Books'))

        cmd = "select BookName,BookID,AudioLibrary,BookDate,BookImg,BookDesc,BookAdded,AuthorID from books WHERE "
        if 'query' in kwargs:
            cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND "
        cmd += "AudioStatus='Open' order by AudioLibrary DESC"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for book in page:
            title = makeUnicode(book['BookName'])
            entry = {'title': escape(title),
                     'id': escape('audio:%s' % book['BookID']),
                     'updated': opdstime(book['AudioLibrary']),
                     'href': '%s?cmd=Serve&amp;audioid=%s' % (self.opdsroot, quote_plus(book['BookID'])),
                     'kind': 'acquisition',
                     'rel': 'file',
                     'type': mimeType("we_send.zip")}
            if lazylibrarian.CONFIG['OPDS_METAINFO']:
                author = myDB.match("SELECT AuthorName from authors WHERE AuthorID='%s'" % book['AuthorID'])
                author = makeUnicode(author['AuthorName'])
                entry['image'] = self.searchroot + '/' + book['BookImg']
                entry['content'] = escape('%s - %s' % (title, book['BookDesc']))
                entry['author'] = escape('%s' % author)
            else:
                entry['content'] = escape('%s (%s)' % (title, book['BookAdded']))
            entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=RecentAudio&amp;index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=RecentAudio&amp;index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s result%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Example #12
0
    def _Authors(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Authors', 'id': 'Authors', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Authors' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchauthors.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Authors'))
        cmd = "SELECT AuthorName,AuthorID,HaveBooks,TotalBooks,DateAdded from Authors WHERE "
        if 'query' in kwargs:
            cmd += "AuthorName LIKE '%" + kwargs['query'] + "%' AND "
        cmd += "CAST(HaveBooks AS INTEGER) > 0 order by AuthorName"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for author in page:
            totalbooks = check_int(author['TotalBooks'], 0)
            havebooks = check_int(author['HaveBooks'], 0)
            lastupdated = author['DateAdded']
            name = makeUnicode(author['AuthorName'])
            entry = {
                    'title': escape('%s (%s/%s)' % (name, havebooks, totalbooks)),
                    'id': escape('author:%s' % author['AuthorID']),
                    'updated': opdstime(lastupdated),
                    'content': escape('%s (%s)' % (name, havebooks)),
                    'href': '%s?cmd=Author&amp;authorid=%s' % (self.opdsroot, author['AuthorID']),
                    'author': escape('%s' % name),
                    'kind': 'navigation',
                    'rel': 'subsection',
                }
            # removed authorimg as it stops navigation ??
            # if lazylibrarian.CONFIG['OPDS_METAINFO']:
            #    entry['image'] = self.searchroot + '/' + author['AuthorImg']
            entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Authors&amp;index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Authors&amp;index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s author%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Example #13
0
    def _Magazines(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Magazines', 'id': 'Magazines', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Magazines' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines'))
        cmd = 'select magazines.*,(select count(*) as counter from issues where magazines.title = issues.title)'
        cmd += ' as Iss_Cnt from magazines '
        if 'query' in kwargs:
            cmd += "WHERE magazines.title LIKE '%" + kwargs['query'] + "%' "
        cmd += 'order by magazines.title'
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for mag in page:
            if mag['Iss_Cnt'] > 0:
                title = makeUnicode(mag['Title'])
                entry = {
                    'title': escape('%s (%s)' % (title, mag['Iss_Cnt'])),
                    'id': escape('magazine:%s' % title),
                    'updated': opdstime(mag['LastAcquired']),
                    'content': escape('%s' % title),
                    'href': '%s?cmd=Magazine&amp;magid=%s' % (self.opdsroot, quote_plus(title)),
                    'kind': 'navigation',
                    'rel': 'subsection',
                }
                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    res = cache_img('magazine', md5_utf8(mag['LatestCover']), mag['LatestCover'], refresh=True)
                    entry['image'] = self.searchroot + '/' + res[0]
                entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Magazines&amp;index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Magazines&amp;index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s magazine%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Example #14
0
    def _RecentMags(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Recent Magazines', 'id': 'Recent Magazines', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=RecentMags' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines'))
        cmd = "select Title,IssueID,IssueAcquired,IssueDate,IssueFile from issues "
        cmd += "where IssueFile != '' "
        if 'query' in kwargs:
            cmd += "AND Title LIKE '%" + kwargs['query'] + "%' "
        cmd += "order by IssueAcquired DESC"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for mag in page:
            title = makeUnicode(mag['Title'])
            entry = {'title': escape('%s' % mag['IssueDate']),
                     'id': escape('issue:%s' % mag['IssueID']),
                     'updated': opdstime(mag['IssueAcquired']),
                     'content': escape('%s - %s' % (title, mag['IssueDate'])),
                     'href': '%s?cmd=Serve&amp;issueid=%s' % (self.opdsroot, quote_plus(mag['IssueID'])),
                     'kind': 'acquisition',
                     'rel': 'file',
                     'author': escape(title),
                     'type': mimeType(mag['IssueFile'])}
            if lazylibrarian.CONFIG['OPDS_METAINFO']:
                fname = os.path.splitext(mag['IssueFile'])[0]
                res = cache_img('magazine', mag['IssueID'], fname + '.jpg')
                entry['image'] = self.searchroot + '/' + res[0]
            entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=RecentMags&amp;index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=RecentMags&amp;index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s issue%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Example #15
0
def createMagCovers(refresh=False):
    if not lazylibrarian.CONFIG['IMP_MAGCOVER']:
        logger.info('Cover creation is disabled in config')
        return
    myDB = database.DBConnection()
    #  <> '' ignores empty string or NULL
    issues = myDB.select("SELECT IssueFile from issues WHERE IssueFile <> ''")
    if refresh:
        logger.info("Creating covers for %s issue%s" % (len(issues), plural(len(issues))))
    else:
        logger.info("Checking covers for %s issue%s" % (len(issues), plural(len(issues))))
    cnt = 0
    for item in issues:
        try:
            createMagCover(item['IssueFile'], refresh=refresh)
            cnt += 1
        except Exception as why:
            logger.warn('Unable to create cover for %s, %s %s' % (item['IssueFile'], type(why).__name__, str(why)))
    logger.info("Cover creation completed")
    if refresh:
        return "Created covers for %s issue%s" % (cnt, plural(cnt))
    return "Checked covers for %s issue%s" % (cnt, plural(cnt))
Example #16
0
def dump_table(table, savedir=None, status=None):
    myDB = database.DBConnection()
    # noinspection PyBroadException
    try:
        columns = myDB.select('PRAGMA table_info(%s)' % table)
        if not columns:  # no such table
            logger.warn("No such table [%s]" % table)
            return 0

        if not os.path.isdir(savedir):
            savedir = lazylibrarian.DATADIR

        headers = ''
        for item in columns:
            if headers:
                headers += ','
            headers += item[1]
        if status:
            cmd = 'SELECT %s from %s WHERE status="%s"' % (headers, table, status)
        else:
            cmd = 'SELECT %s from %s' % (headers, table)
        data = myDB.select(cmd)
        count = 0
        if data is not None:
            label = table
            if status:
                label += '_%s' % status
            csvFile = os.path.join(savedir, "%s.csv" % label)

            if PY2:
                fmode = 'wb'
            else:
                fmode = 'w'
            with open(csvFile, fmode) as csvfile:
                csvwrite = writer(csvfile, delimiter=',', quotechar='"', quoting=QUOTE_MINIMAL)
                headers = headers.split(',')
                csvwrite.writerow(headers)
                for item in data:
                    if PY2:
                        csvwrite.writerow([makeBytestr(s) if s else '' for s in item])
                    else:
                        csvwrite.writerow([str(s) if s else '' for s in item])
                    count += 1
            msg = "Exported %s item%s to %s" % (count, plural(count), csvFile)
            logger.info(msg)
        return count

    except Exception:
        msg = 'Unhandled exception in dump_table: %s' % traceback.format_exc()
        logger.error(msg)
        return 0
Example #17
0
def sync_to_gr():
    msg = ''
    try:
        threading.currentThread().name = 'GRSync'
        if lazylibrarian.CONFIG['GR_WANTED']:
            to_read_shelf, ll_wanted = grsync('Wanted', lazylibrarian.CONFIG['GR_WANTED'])
            msg += "%s change%s to %s shelf\n" % (to_read_shelf, plural(to_read_shelf),
                                                  lazylibrarian.CONFIG['GR_WANTED'])
            msg += "%s change%s to Wanted from GoodReads\n" % (ll_wanted, plural(ll_wanted))
        else:
            msg += "Sync Wanted books is disabled\n"
        if lazylibrarian.CONFIG['GR_OWNED']:
            to_owned_shelf, ll_have = grsync('Open', lazylibrarian.CONFIG['GR_OWNED'])
            msg += "%s change%s to %s shelf\n" % (to_owned_shelf, plural(to_owned_shelf),
                                                  lazylibrarian.CONFIG['GR_OWNED'])
            msg += "%s change%s to Owned from GoodReads\n" % (ll_have, plural(ll_have))
        else:
            msg += "Sync Owned books is disabled\n"
        logger.info(msg.strip('\n').replace('\n', ', '))
    except Exception as e:
        logger.error("Exception in sync_to_gr: %s %s" % (type(e).__name__, str(e)))
    finally:
        threading.currentThread().name = 'WEBSERVER'
        return msg
Example #18
0
def dbUpdate(refresh=False):
    try:
        myDB = database.DBConnection()

        activeauthors = myDB.select('SELECT AuthorName from authors WHERE Status="Active" \
                                    or Status="Loading" order by DateAdded ASC')
        logger.info('Starting update for %i active author%s' % (len(activeauthors), plural(len(activeauthors))))

        for author in activeauthors:
            authorname = author[0]
            importer.addAuthorToDB(authorname, refresh=refresh)

        logger.info('Active author update complete')
    except Exception as e:
        logger.error('Unhandled exception in dbUpdate: %s' % traceback.format_exc())
Example #19
0
def setWorkPages():
    """ Set the workpage link for any books that don't already have one """

    myDB = database.DBConnection()

    books = myDB.select('select BookID,AuthorName,BookName from books where length(WorkPage) < 4')
    if books:
        logger.debug('Setting WorkPage for %s book%s' % (len(books), plural(len(books))))
        for book in books:
            bookid = book['BookID']
            worklink = getWorkPage(bookid)
            if worklink:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"WorkPage": worklink}
                myDB.upsert("books", newValueDict, controlValueDict)
            else:
               logger.debug('No WorkPage found for %s: %s' % (book['AuthorName'], book['BookName']))
        logger.debug('setWorkPages completed')
Example #20
0
def LIME(book=None, test=False):
    errmsg = ''
    provider = "Limetorrent"
    host = lazylibrarian.CONFIG['LIME_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    params = {"q": book['searchterm']}
    providerurl = url_fix(host + "/searchrss/other")
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(
                            seeders.split('Seeds:')[1].split(',')[0].strip())
                    except (IndexError, ValueError):
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            'torrent',
                            'priority':
                            lazylibrarian.CONFIG['LIME_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg
Example #21
0
def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None, test=False):
    """
    Generic NewzNabplus query function
    takes in host+key+type and returns the result set regardless of who
    based on site running NewzNab+
    ref http://usenetreviewz.com/nzb-sites/
    """

    host = provider['HOST']
    api_key = provider['API']
    logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % (
        searchType, host, searchMode, api_key, str(book)))

    results = []

    params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode)

    if params:
        if not str(host)[:4] == "http":
            host = 'http://' + host
        if host[-1:] == '/':
            host = host[:-1]
        URL = host + '/api?' + urllib.urlencode(params)

        sterm = makeUnicode(book['searchterm'])

        rootxml = None
        logger.debug("[NewzNabPlus] URL = %s" % URL)
        result, success = fetchURL(URL)

        if test:
            if result.startswith('<') and result.endswith('/>') and "error code" in result:
                result = result[1:-2]
                success = False
            if not success:
                logger.debug(result)
            return success

        if success:
            try:
                rootxml = ElementTree.fromstring(result)
            except Exception as e:
                logger.error('Error parsing data from %s: %s %s' % (host, type(e).__name__, str(e)))
                rootxml = None
        else:
            if not result or result == "''":
                result = "Got an empty response"
            logger.error('Error reading data from %s: %s' % (host, result))
            # maybe the host doesn't support the search type
            cancelled = cancelSearchType(searchType, result, provider)
            if not cancelled:  # it was some other problem
                BlockProvider(provider['HOST'], result)

        if rootxml is not None:
            # to debug because of api
            logger.debug('Parsing results from <a href="%s">%s</a>' % (URL, host))

            if rootxml.tag == 'error':
                errormsg = rootxml.get('description', default='unknown error')
                logger.error("%s - %s" % (host, errormsg))
                # maybe the host doesn't support the search type
                cancelled = cancelSearchType(searchType, errormsg, provider)
                if not cancelled:  # it was some other problem
                    BlockProvider(provider['HOST'], errormsg)
            else:
                resultxml = rootxml.getiterator('item')
                nzbcount = 0
                maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0)
                for nzb in resultxml:
                    try:
                        thisnzb = ReturnResultsFieldsBySearchType(book, nzb, host, searchMode, provider['DLPRIORITY'])
                        if not maxage:
                            nzbcount += 1
                            results.append(thisnzb)
                        else:
                            # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200
                            nzbdate = thisnzb['nzbdate']
                            try:
                                parts = nzbdate.split(' ')
                                nzbdate = ' '.join(parts[:5])  # strip the +0200
                                dt = datetime.datetime.strptime(nzbdate, "%a, %d %b %Y %H:%M:%S").timetuple()
                                nzbage = age('%04d-%02d-%02d' % (dt.tm_year, dt.tm_mon, dt.tm_mday))
                            except Exception as e:
                                logger.debug('Unable to get age from [%s] %s %s' %
                                             (thisnzb['nzbdate'], type(e).__name__, str(e)))
                                nzbage = 0
                            if nzbage <= maxage:
                                nzbcount += 1
                                results.append(thisnzb)
                            else:
                                logger.debug('%s is too old (%s day%s)' % (thisnzb['nzbtitle'], nzbage, plural(nzbage)))

                    except IndexError:
                        logger.debug('No results from %s for %s' % (host, sterm))
                logger.debug('Found %s nzb at %s for: %s' % (nzbcount, host, sterm))
        else:
            logger.debug('No data returned from %s for %s' % (host, sterm))
    return results
Example #22
0
    def _Members(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        if 'seriesid' not in kwargs:
            self.data = self._error_with_message('No Series Provided')
            return
        links = []
        entries = []
        series = myDB.match("SELECT SeriesName from Series WHERE SeriesID=?", (kwargs['seriesid'],))
        cmd = "SELECT BookName,BookDate,BookAdded,BookDesc,BookImg,BookFile,AudioFile,books.BookID,SeriesNum "
        cmd += "from books,member where (Status='Open' or AudioStatus='Open') and SeriesID=? "
        cmd += "and books.bookid = member.bookid order by CAST(SeriesNum AS INTEGER)"
        results = myDB.select(cmd, (kwargs['seriesid'],))
        cmd = 'SELECT AuthorName from authors,books WHERE authors.authorid = books.authorid AND '
        cmd += 'books.bookid=?'
        res = myDB.match(cmd, (results[0]['BookID'],))
        author = res['AuthorName']
        page = results[index:(index + self.PAGE_SIZE)]
        for book in page:
            mime_type = None
            if book['BookFile']:
                mime_type = mimeType(book['BookFile'])
            elif book['AudioFile']:
                mime_type = mimeType(book['AudioFile'])
            if mime_type:
                if book['SeriesNum']:
                    snum = ' (%s)' % book['SeriesNum']
                else:
                    snum = ''
                entry = {'title': escape('%s%s' % (book['BookName'], snum)),
                         'id': escape('book:%s' % book['BookID']),
                         'updated': opdstime(book['BookAdded']),
                         'href': '%s?cmd=Serve&amp;bookid=%s' % (self.opdsroot, book['BookID']),
                         'kind': 'acquisition',
                         'rel': 'file',
                         'author': escape("%s" % author),
                         'type': mime_type}

                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    entry['image'] = self.searchroot + '/' + book['BookImg']
                    entry['content'] = escape('%s (%s %s) %s' % (book['BookName'], series['SeriesName'],
                                                                 book['SeriesNum'], book['BookDesc']))
                else:
                    entry['content'] = escape('%s (%s %s) %s' % (book['BookName'], series['SeriesName'],
                                                                 book['SeriesNum'], book['BookAdded']))
                entries.append(entry)

        feed = {}
        seriesname = '%s (%s) %s' % (escape(series['SeriesName']), len(entries), author)
        feed['title'] = 'LazyLibrarian OPDS - %s' % seriesname
        feed['id'] = 'series:%s' % escape(kwargs['seriesid'])
        feed['updated'] = now()
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Series' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Members&amp;seriesid=%s&amp;index=%s' % (self.opdsroot, kwargs['seriesid'],
                                                                              index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Members&amp;seriesid=%s&amp;index=%s' % (self.opdsroot, kwargs['seriesid'],
                                                                              index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s book%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Example #23
0
def processDir(reset=False):

    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(
            lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError as why:
        logger.error('Could not access [%s] directory [%s]' %
                     (processpath, why.strerror))
        return

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if len(snatched) == 0:
        logger.info('Nothing marked as snatched.')
        scheduleJob(action='Stop', target='processDir')
        return

    if len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
        return

    logger.info("Checking %s download%s for %s snatched file%s" %
                (len(downloads), plural(
                    len(downloads)), len(snatched), plural(len(snatched))))
    ppcount = 0
    for book in snatched:
        matches = []
        for fname in downloads:
            if not fname.endswith('.fail'):  # has this failed before?
                # this is to get round differences in torrent filenames.
                # Torrents aren't always returned with the name we searched for
                # there might be a better way...
                if isinstance(fname, str):
                    matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                else:
                    matchname = fname
                if ' LL.(' in matchname:
                    matchname = matchname.split(' LL.(')[0]
                matchtitle = book['NZBtitle']
                match = 0
                if matchtitle:
                    if ' LL.(' in matchtitle:
                        matchtitle = matchtitle.split(' LL.(')[0]
                    match = fuzz.token_set_ratio(matchtitle, matchname)
                if match >= lazylibrarian.DLOAD_RATIO:
                    fname = matchname
                    if os.path.isfile(os.path.join(processpath, fname)):
                        # not a directory, handle single file downloads here. Book/mag file in download root.
                        # move the file into it's own subdirectory so we don't move/delete things that aren't ours
                        if is_valid_booktype(fname, booktype="book") \
                                or is_valid_booktype(fname, booktype="mag"):
                            fname = os.path.splitext(fname)[0]
                            dirname = os.path.join(processpath, fname)
                            if not os.path.exists(dirname):
                                try:
                                    os.makedirs(dirname)
                                except OSError as why:
                                    logger.debug(
                                        'Failed to create directory %s, %s' %
                                        (dirname, why.strerror))
                            if os.path.exists(dirname):
                                # move the book and any related files too
                                # ie other book formats, or opf, jpg with same title
                                # can't move metadata.opf or cover.jpg or similar
                                # as can't be sure they are ours
                                # not sure if we need a new listdir here, or whether we can use the old one
                                list_dir = os.listdir(processpath)
                                for ourfile in list_dir:
                                    if ourfile.startswith(fname):
                                        if is_valid_booktype(ourfile, booktype="book") \
                                            or is_valid_booktype(ourfile, booktype="mag") \
                                                or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']:
                                            try:
                                                shutil.move(
                                                    os.path.join(
                                                        processpath, ourfile),
                                                    os.path.join(
                                                        dirname, ourfile))
                                            except Exception as why:
                                                logger.debug(
                                                    "Failed to move file %s to %s, %s"
                                                    % (ourfile, dirname,
                                                       str(why)))

                    if os.path.isdir(os.path.join(processpath, fname)):
                        pp_path = os.path.join(processpath, fname)
                        logger.debug('Found folder (%s%%) %s for %s' %
                                     (match, pp_path, book['NZBtitle']))
                        matches.append([match, pp_path, book])
                else:
                    logger.debug('No match (%s%%) %s for %s' %
                                 (match, matchname, matchtitle))
            else:
                logger.debug('Skipping %s' % fname)

        if matches:
            highest = max(matches, key=lambda x: x[0])
            match = highest[0]
            pp_path = highest[1]
            book = highest[2]
            logger.debug(u'Best match (%s%%): %s for %s' %
                         (match, pp_path, book['NZBtitle']))

            data = myDB.match('SELECT * from books WHERE BookID="%s"' %
                              book['BookID'])
            if data:
                logger.debug(u'Processing book %s' % book['BookID'])
                authorname = data['AuthorName']
                bookname = data['BookName']
                if 'windows' in platform.system().lower(
                ) and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                    logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                    lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                        '/', '\\')
                # Default destination path, should be allowed change per config file.
                dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                    '$Author', authorname).replace('$Title', bookname)
                global_name = lazylibrarian.EBOOK_DEST_FILE.replace(
                    '$Author', authorname).replace('$Title', bookname)
                global_name = unaccented(global_name)
                # dest_path = authorname+'/'+bookname
                # global_name = bookname + ' - ' + authorname
                # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                dic = {
                    '<': '',
                    '>': '',
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': '',
                    ':': '',
                    ';': '',
                    '\'': ''
                }
                dest_path = unaccented_str(replace_all(dest_path, dic))
                dest_path = os.path.join(lazylibrarian.DESTINATION_DIR,
                                         dest_path).encode(
                                             lazylibrarian.SYS_ENCODING)
            else:
                data = myDB.match('SELECT * from magazines WHERE Title="%s"' %
                                  book['BookID'])
                if data:
                    logger.debug(u'Processing magazine %s' % book['BookID'])
                    # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                    # files are downloading, there will be an error in post-processing, trying to go to the
                    # same directory.
                    mostrecentissue = data[
                        'IssueDate']  # keep for processing issues arriving out of order
                    # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {
                        '<': '',
                        '>': '',
                        '...': '',
                        ' & ': ' ',
                        ' = ': ' ',
                        '?': '',
                        '$': 's',
                        ' + ': ' ',
                        '"': '',
                        ',': '',
                        '*': '',
                        ':': '',
                        ';': '',
                        '\'': ''
                    }
                    mag_name = unaccented_str(replace_all(book['BookID'], dic))
                    # book auxinfo is a cleaned date, eg 2015-01-01
                    dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                    if lazylibrarian.MAG_RELATIVE:
                        if dest_path[0] not in '._':
                            dest_path = '_' + dest_path
                        dest_path = os.path.join(
                            lazylibrarian.DESTINATION_DIR,
                            dest_path).encode(lazylibrarian.SYS_ENCODING)
                    else:
                        dest_path = dest_path.encode(
                            lazylibrarian.SYS_ENCODING)
                    authorname = None
                    bookname = None
                    global_name = lazylibrarian.MAG_DEST_FILE.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    global_name = unaccented(global_name)
                    # global_name = book['AuxInfo']+' - '+title
                else:
                    logger.debug(
                        "Snatched magazine %s is not in download directory" %
                        (book['BookID']))
                    continue
        else:
            logger.debug("Snatched %s %s is not in download directory" %
                         (book['NZBmode'], book['NZBtitle']))
            continue

        processBook = processDestination(pp_path, dest_path, authorname,
                                         bookname, global_name,
                                         book['NZBmode'])

        if processBook:
            logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
            # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {
                "Status": "Processed",
                "NZBDate": now()
            }  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)

            if bookname is not None:  # it's a book, if None it's a magazine
                if len(lazylibrarian.IMP_CALIBREDB):
                    logger.debug(
                        'Calibre should have created the extras for us')
                else:
                    processExtras(myDB, dest_path, global_name, data)
            else:
                # update mags
                controlValueDict = {"Title": book['BookID']}
                if mostrecentissue:
                    if mostrecentissue.isdigit() and str(
                            book['AuxInfo']).isdigit():
                        older = int(mostrecentissue) > int(
                            book['AuxInfo'])  # issuenumber
                    else:
                        older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                else:
                    older = False
                if older:  # check this in case processing issues arriving out of order
                    newValueDict = {
                        "LastAcquired": today(),
                        "IssueStatus": "Open"
                    }
                else:
                    newValueDict = {
                        "IssueDate": book['AuxInfo'],
                        "LastAcquired": today(),
                        "IssueStatus": "Open"
                    }
                myDB.upsert("magazines", newValueDict, controlValueDict)
                # dest_path is where we put the magazine after processing, but we don't have the full filename
                # so look for any "book" in that directory
                dest_file = book_file(dest_path, booktype='mag')
                controlValueDict = {
                    "Title": book['BookID'],
                    "IssueDate": book['AuxInfo']
                }
                newValueDict = {
                    "IssueAcquired":
                    today(),
                    "IssueFile":
                    dest_file,
                    "IssueID":
                    create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                }
                myDB.upsert("issues", newValueDict, controlValueDict)

                # create a thumbnail cover for the new issue
                create_cover(dest_file)

            logger.info('Successfully processed: %s' % global_name)
            ppcount = ppcount + 1
            notify_download("%s from %s at %s" %
                            (global_name, book['NZBprov'], now()))
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' %
                         pp_path)
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Failed", "NZBDate": now()}
            myDB.upsert("wanted", newValueDict, controlValueDict)
            # if it's a book, reset status so we try for a different version
            # if it's a magazine, user can select a different one from pastissues table
            if bookname is not None:
                myDB.action(
                    'UPDATE books SET status = "Wanted" WHERE BookID="%s"' %
                    book['BookID'])

            # at this point, as it failed we should move it or it will get postprocessed
            # again (and fail again)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except Exception as e:
                logger.debug("Unable to rename %s, %s" % (pp_path, str(e)))

    downloads = os.listdir(
        processpath)  # check in case we processed/deleted some above
    for directory in downloads:
        if "LL.(" in directory and not directory.endswith('.fail'):
            bookID = str(directory).split("LL.(")[1].split(")")[0]
            logger.debug("Book with id: " + str(bookID) + " is in downloads")
            pp_path = os.path.join(processpath, directory)

            if os.path.isfile(pp_path):
                pp_path = os.path.join(processpath)

            if (os.path.isdir(pp_path)):
                logger.debug('Found LL folder %s.' % pp_path)
            if import_book(pp_path, bookID):
                ppcount = ppcount + 1

    if ppcount == 0:
        logger.info('No snatched books/mags have been found')
    else:
        logger.info('%s book%s/mag%s processed.' %
                    (ppcount, plural(ppcount), plural(ppcount)))

    if reset:
        scheduleJob(action='Restart', target='processDir')
Example #24
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
                                     )
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select(
                    'SELECT Title, Regex, LastAcquired, IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"',
                    (magazine['bookid'], ))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        logger.debug("Removing old magazine search results")
        myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' %
                    (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']

            if not searchterm:
                dic = {
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': ''
                }
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless it's not a latin-1 encodable name
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm})

        if not searchlist:
            logger.warn(
                'There is nothing to search for.  Mark some magazines as active.'
            )

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    logger.warn(
                        'No nzb providers are available. Check config and blocklist'
                    )

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(
                    book, 'mag')
                if not nproviders:
                    logger.warn(
                        'No direct providers are available. Check config and blocklist'
                    )

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(
                    book, 'mag')
                if not nproviders:
                    logger.warn(
                        'No torrent providers are available. Check config and blocklist'
                    )

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    logger.warn(
                        'No rss providers are available. Check config and blocklist'
                    )

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': item[
                                'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace(
                        "'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(
                        nzbsize_temp, 1000
                    )  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    results = myDB.match(
                        'SELECT * from magazines WHERE Title=?', (bookid, ))
                    if not results:
                        logger.debug(
                            'Magazine [%s] does not match search term [%s].' %
                            (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(
                            lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(
                                lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" %
                                             nzbtitle)
                                rejected = True

                        if not rejected:
                            dic = {
                                '.': ' ',
                                '-': ' ',
                                '/': ' ',
                                '+': ' ',
                                '_': ' ',
                                '(': '',
                                ')': ''
                            }
                            nzbtitle_formatted = replace_all(nzbtitle,
                                                             dic).strip()
                            # Need to make sure that substrings of magazine titles don't get found
                            # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this
                            # remove extra spaces if they're in a row
                            if nzbtitle_formatted and nzbtitle_formatted[
                                    0] == '[' and nzbtitle_formatted[-1] == ']':
                                nzbtitle_formatted = nzbtitle_formatted[1:-1]
                            nzbtitle_exploded_temp = " ".join(
                                nzbtitle_formatted.split())
                            nzbtitle_exploded = nzbtitle_exploded_temp.split(
                                ' ')

                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz
                                mag_title_match = fuzz.token_set_ratio(
                                    unaccented(bookid),
                                    unaccented(nzbtitle_formatted))

                                if mag_title_match < check_int(
                                        lazylibrarian.CONFIG['MATCH_RATIO'],
                                        90):
                                    logger.debug(
                                        u"Magazine token set Match failed: " +
                                        str(mag_title_match) + "% for " +
                                        nzbtitle_formatted)
                                    rejected = True
                                else:
                                    logger.debug(u"Magazine matched: " +
                                                 str(mag_title_match) + "% " +
                                                 bookid + " for " +
                                                 nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" %
                                             len(nzbtitle_exploded))
                                rejected = True

                        if not rejected:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(
                                str(results['Reject']).lower())
                            reject_list += getList(
                                lazylibrarian.CONFIG['REJECT_MAGS'])
                            lower_title = unaccented(
                                nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Reject: %s' %
                                                 str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" %
                                                 (nzbtitle_formatted, word))
                                    break

                        regex_pass = 0
                        if not rejected:
                            # Magazine names have many different styles of date
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            # MonthName DD YYYY or MonthName DD, YYYY
                            # YYYY MM or YYYY MM DD
                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            # nn YYYY issue number without "Nr" before it
                            # issue and year as a single 6 digit string eg 222015
                            newdatish = "none"
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            pos = 0
                            while pos < len(nzbtitle_exploded):
                                year = check_year(nzbtitle_exploded[pos])
                                if year and pos:
                                    month = month2num(nzbtitle_exploded[pos -
                                                                        1])
                                    if month:
                                        if pos - 1:
                                            day = check_int(
                                                nzbtitle_exploded[pos - 2], 1)
                                            if day > 31:  # probably issue number nn
                                                day = 1
                                        else:
                                            day = 1
                                        newdatish = "%04d-%02d-%02d" % (
                                            year, month, day)
                                        try:
                                            _ = datetime.date(year, month, day)
                                            regex_pass = 1
                                            break
                                        except ValueError:
                                            regex_pass = 0
                                pos += 1

                            # MonthName DD YYYY or MonthName DD, YYYY
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and (pos - 1):
                                        month = month2num(
                                            nzbtitle_exploded[pos - 2])
                                        if month:
                                            day = check_int(
                                                nzbtitle_exploded[
                                                    pos - 1].rstrip(','), 1)
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 2
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # YYYY MM or YYYY MM DD
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and pos + 1 < len(
                                            nzbtitle_exploded):
                                        month = check_int(
                                            nzbtitle_exploded[pos + 1], 0)
                                        if month:
                                            if pos + 2 < len(
                                                    nzbtitle_exploded):
                                                day = check_int(
                                                    nzbtitle_exploded[pos + 2],
                                                    1)
                                            else:
                                                day = 1
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 3
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    if nzbtitle_exploded[pos].lower() in [
                                            "issue", "no", "nr", "vol"
                                    ]:
                                        if pos + 1 < len(nzbtitle_exploded):
                                            issue = check_int(
                                                nzbtitle_exploded[pos + 1], 0)
                                            if issue:
                                                newdatish = str(
                                                    issue)  # 4 == 04 == 004
                                                if pos + 2 < len(
                                                        nzbtitle_exploded):
                                                    year = check_year(
                                                        nzbtitle_exploded[pos +
                                                                          2])
                                                    if year and year < int(
                                                            datetime.date.
                                                            today().year):
                                                        newdatish = '0'  # it's old
                                                    regex_pass = 4  # Issue/No/Nr/Vol nn, YYYY
                                                else:
                                                    regex_pass = 5  # Issue/No/Nr/Vol nn
                                                break
                                    pos += 1

                            # nn YYYY issue number without "Nr" before it
                            if not regex_pass:
                                pos = 1
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year:
                                        issue = check_int(
                                            nzbtitle_exploded[pos - 1], 0)
                                        if issue:
                                            newdatish = str(
                                                issue)  # 4 == 04 == 004
                                            regex_pass = 6
                                            if year < int(datetime.date.today(
                                            ).year):
                                                newdatish = '0'  # it's old
                                            break
                                    pos += 1

                            # issue and year as a single 6 digit string eg 222015
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    issue = nzbtitle_exploded[pos]
                                    if issue.isdigit() and len(issue) == 6:
                                        year = int(issue[2:])
                                        issue = int(issue[:2])
                                        newdatish = str(
                                            issue)  # 4 == 04 == 004
                                        regex_pass = 7
                                        if year < int(
                                                datetime.date.today().year):
                                            newdatish = '0'  # it's old
                                        break
                                    pos += 1

                            if not regex_pass:
                                logger.debug(
                                    'Magazine %s not in a recognised date format.'
                                    % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                regex_pass = 99

                        if rejected:
                            rejects += 1
                        else:
                            if lazylibrarian.LOGLEVEL > 2:
                                logger.debug("regex %s [%s] %s" %
                                             (regex_pass, nzbtitle_formatted,
                                              newdatish))
                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped"
                            insert_table = "pastissues"
                            insert_status = "Skipped"

                            control_date = results['IssueDate']
                            if control_date is None:  # we haven't got any copies of this magazine yet
                                # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                                # could perhaps calc differently for weekly, biweekly etc
                                # or for magazines with only an issue number, use zero

                                if str(newdatish).isdigit():
                                    logger.debug(
                                        'Magazine comparing issue numbers (%s)'
                                        % newdatish)
                                    control_date = 0
                                elif re.match('\d+-\d\d-\d\d', str(newdatish)):
                                    start_time = time.time()
                                    start_time -= int(
                                        lazylibrarian.CONFIG['MAG_AGE']
                                    ) * 24 * 60 * 60  # number of seconds in days
                                    if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                        start_time = 0
                                    control_date = time.strftime(
                                        "%Y-%m-%d", time.localtime(start_time))
                                    logger.debug(
                                        'Magazine date comparing to %s' %
                                        control_date)
                                else:
                                    logger.debug(
                                        'Magazine unable to find comparison type [%s]'
                                        % newdatish)
                                    control_date = 0

                            if str(control_date).isdigit() and str(
                                    newdatish).isdigit():
                                # for issue numbers, check if later than last one we have
                                comp_date = int(newdatish) - int(control_date)
                                newdatish = "%s" % newdatish
                                newdatish = newdatish.zfill(
                                    4)  # pad so we sort correctly
                            elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                    re.match('\d+-\d\d-\d\d', str(newdatish)):
                                # only grab a copy if it's newer than the most recent we have,
                                # or newer than a month ago if we have none
                                comp_date = datecompare(
                                    newdatish, control_date)
                            else:
                                # invalid comparison of date and issue number
                                if re.match('\d+-\d\d-\d\d',
                                            str(control_date)):
                                    logger.debug(
                                        'Magazine %s failed: Expecting a date'
                                        % nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        'Magazine %s failed: Expecting issue number'
                                        % nzbtitle_formatted)
                                bad_date += 1
                                newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                                comp_date = 0

                            if comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + newdatish
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug(
                                        'This issue of %s is new, downloading'
                                        % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' %
                                                 len(issues))
                                    if lazylibrarian.LOGLEVEL > 2:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    insert_status = "Wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug(
                                        'This issue of %s is already flagged for download'
                                        % issue)
                            else:
                                if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                    logger.debug(
                                        'This issue of %s is old; skipping.' %
                                        nzbtitle_formatted)
                                    old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match(
                                'SELECT * from %s WHERE NZBtitle=? and NZBprov=?'
                                % insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug(
                                        '%s is already in %s marked %s' %
                                        (nzbtitle, insert_table,
                                         insert_status))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": newdatish,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict,
                                            controlValueDict)
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Added %s to %s marked %s' %
                                                 (nzbtitle, insert_table,
                                                  insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (
                    total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (
                    old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch = TORDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    else:
                        snatch = NZBDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    if snatch:
                        logger.info(
                            'Downloading %s from %s' %
                            (magazine['nzbtitle'], magazine["nzbprov"]))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']),
                                       magazine["nzbprov"], now()))
                        custom_notify_snatch(magazine['bookid'])
                        scheduleJob(action='Start', target='processDir')

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Example #25
0
def LIME(book=None, test=False):
    errmsg = ''
    provider = "Limetorrent"
    host = lazylibrarian.CONFIG['LIME_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    params = {
        "q": book['searchterm']
    }
    providerurl = url_fix(host + "/searchrss/other")
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(seeders.split('Seeds:')[1].split(' ,')[0].replace(',', '').strip())
                    except (IndexError, ValueError):
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    try:
                        pubdate = item['published']
                    except KeyError:
                        pubdate = None

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        res = {
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                            'tor_type': 'torrent',
                            'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY']
                        }
                        if pubdate:
                            res['tor_date'] = pubdate
                        results.append(res)
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                        logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))

    return results, errmsg
Example #26
0
    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped",
                         audiostatus="Skipped", entrystatus='Active', refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' % authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            locked_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = gb_json_request(URL, useCache=not refresh)
                        if not jsonresults:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1
                        book = bookdict(item)
                        # skip if no author, no author is no book.
                        if not book['author']:
                            logger.debug('Skipped a result without authorfield.')
                            continue

                        isbnhead = ""
                        if len(book['isbn']) == 10:
                            isbnhead = book['isbn'][0:3]
                        elif len(book['isbn']) == 13:
                            isbnhead = book['isbn'][3:6]

                        booklang = book['lang']
                        # do we care about language?
                        if "All" not in valid_langs:
                            if book['isbn']:
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(book['isbn']) == 13 and book['isbn'].startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_979_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            elif (len(book['isbn']) == 10) or \
                                                    (len(book['isbn']) == 13 and book['isbn'].startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_978_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                controlValueDict = {"isbn": isbnhead}
                                                newValueDict = {"lang": booklang}
                                                myDB.upsert("languages", newValueDict, controlValueDict)

                                    if not match:
                                        booklang = thingLang(book['isbn'])
                                        lt_lang_hits += 1
                                        if booklang:
                                            match = True
                                            myDB.action('insert into languages values (?, ?)', (isbnhead, booklang))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]:
                                            # these are all english, may need to expand this list
                                            logger.debug("%s Google thinks [%s], we think [%s]" %
                                                         (book['name'], googlelang, booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                logger.debug('Skipped [%s] with language %s' % (book['name'], booklang))
                                ignored += 1
                                continue

                        ignorable = ['future', 'date', 'isbn']
                        if lazylibrarian.CONFIG['NO_LANG']:
                            ignorable.append('lang')
                        rejected = None
                        check_status = False
                        existing_book = None
                        bookname = book['name']
                        bookid = item['id']
                        if not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname))
                            rejected = 'name', 'No bookname'
                        else:
                            bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip()
                            if re.match('[^\w-]', bookname):  # remove books with bad characters in title
                                logger.debug("[%s] removed book for bad characters" % bookname)
                                rejected = 'chars', 'Bad characters in bookname'

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if book['date'] > today()[:len(book['date'])]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, book['date']))
                                rejected = 'future', 'Future publication date [%s]' % book['date']

                        if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']:
                            if not book['date']:
                                logger.debug('Rejecting %s, no publication date' % bookname)
                                rejected = 'date', 'No publication date'

                        if not rejected and lazylibrarian.CONFIG['NO_ISBN']:
                            if not isbnhead:
                                logger.debug('Rejecting %s, no isbn' % bookname)
                                rejected = 'isbn', 'No ISBN'

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname, authorname))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorname, bookname, bookid))
                                    rejected = 'bookid', 'Got under different bookid %s' % bookid
                                    duplicates += 1

                        cmd = 'SELECT AuthorName,BookName,AudioStatus,books.Status FROM books,authors'
                        cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                        match = myDB.match(cmd, (bookid,))
                        if match:  # we have a book with this bookid already
                            if bookname != match['BookName'] or authorname != match['AuthorName']:
                                logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                             (bookid, authorname, bookname, match['AuthorName'], match['BookName']))
                            else:
                                logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                             (bookid, authorname, bookname))
                                check_status = True
                            duplicates += 1
                            rejected = 'got', 'Already got this book in database'

                            # Make sure we don't reject books we have got
                            if match['Status'] in ['Open', 'Have'] or match['AudioStatus'] in ['Open', 'Have']:
                                rejected = None

                        if rejected and rejected[0] not in ignorable:
                            removedResults += 1
                        if check_status or rejected is None or (
                                lazylibrarian.CONFIG['IMP_IGNORE'] and rejected[0] in ignorable):  # dates, isbn

                            cmd = 'SELECT Status,AudioStatus,BookFile,AudioFile,Manual,BookAdded,BookName '
                            cmd += 'FROM books WHERE BookID=?'
                            existing = myDB.match(cmd, (bookid,))
                            if existing:
                                book_status = existing['Status']
                                audio_status = existing['AudioStatus']
                                if lazylibrarian.CONFIG['FOUND_STATUS'] == 'Open':
                                    if book_status == 'Have' and existing['BookFile']:
                                        book_status = 'Open'
                                    if audio_status == 'Have' and existing['AudioFile']:
                                        audio_status = 'Open'
                                locked = existing['Manual']
                                added = existing['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                book_status = bookstatus  # new_book status, or new_author status
                                audio_status = audiostatus
                                added = today()
                                locked = False

                            if rejected:
                                reason = rejected[1]
                                if rejected[0] in ignorable:
                                    book_status = 'Ignored'
                                    audio_status = 'Ignored'
                                    book_ignore_count += 1
                            else:
                                reason = ''

                            if locked:
                                locked_count += 1
                            else:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": book['sub'],
                                    "BookDesc": book['desc'],
                                    "BookIsbn": book['isbn'],
                                    "BookPub": book['pub'],
                                    "BookGenre": book['genre'],
                                    "BookImg": book['img'],
                                    "BookLink": book['link'],
                                    "BookRate": float(book['rate']),
                                    "BookPages": book['pages'],
                                    "BookDate": book['date'],
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": audio_status,
                                    "BookAdded": added,
                                    "WorkID": '',
                                    "ScanResult": reason
                                }

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug("Book found: " + bookname + " " + book['date'])
                                if 'nocover' in book['img'] or 'nophoto' in book['img']:
                                    # try to get a cover from another source
                                    workcover, source = getBookCover(bookid)
                                    if workcover:
                                        logger.debug('Updated cover for %s using %s' % (bookname, source))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)

                                elif book['img'] and book['img'].startswith('http'):
                                    link, success, _ = cache_img("book", bookid, book['img'], refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                    else:
                                        logger.debug('Failed to cache image for %s' % book['img'])

                                serieslist = []
                                if book['series']:
                                    serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))]
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    newserieslist = getWorkSeries(bookid)
                                    if newserieslist:
                                        serieslist = newserieslist
                                        logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
                                    setSeries(serieslist, bookid)

                                new_status = setStatus(bookid, serieslist, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug("[%s] Added book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    added_count += 1
                                else:
                                    logger.debug("[%s] Updated book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    updated_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' %
                         (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg, BookID from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid,))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookid = lastbook['BookID']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookid = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookID": lastbookid,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)
            resultcount = added_count + updated_count
            logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
            logger.debug("Found %s locked book%s" % (locked_count, plural(locked_count)))
            logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored)))
            logger.debug("Removed %s incorrect/incomplete result%s" % (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s" % (duplicates, plural(duplicates)))
            logger.debug("Ignored %s book%s" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount)))

            myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                        (authorname, api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
Example #27
0
    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped",
                         entrystatus='Active', refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' % authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = get_json_request(URL, useCache=not refresh)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1

                        # skip if no author, no author is no book.
                        try:
                            _ = item['volumeInfo']['authors'][0]
                        except KeyError:
                            logger.debug('Skipped a result without authorfield.')
                            continue

                        try:
                            if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                                bookisbn = item['volumeInfo'][
                                    'industryIdentifiers'][0]['identifier']
                            else:
                                bookisbn = ""
                        except KeyError:
                            bookisbn = ""

                        isbnhead = ""
                        if len(bookisbn) == 10:
                            isbnhead = bookisbn[0:3]
                        elif len(bookisbn) == 13:
                            isbnhead = bookisbn[3:6]

                        try:
                            booklang = item['volumeInfo']['language']
                        except KeyError:
                            booklang = "Unknown"

                        # do we care about language?
                        if "All" not in valid_langs:
                            if bookisbn != "":
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(bookisbn) == 13 and bookisbn.startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_979_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            elif (len(bookisbn) == 10) or \
                                                    (len(bookisbn) == 13 and bookisbn.startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_978_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                myDB.action('insert into languages values (?, ?)',
                                                            (isbnhead, booklang))
                                                logger.debug("GB language: " + booklang)

                                    if not match:
                                        # try searching librarything for a language code using the isbn
                                        # if no language found, librarything return value is "invalid" or "unknown"
                                        # librarything returns plain text, not xml
                                        BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + bookisbn
                                        proxies = proxyList()
                                        try:
                                            librarything_wait()
                                            timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
                                            r = requests.get(BOOK_URL, timeout=timeout, proxies=proxies)
                                            resp = r.text
                                            lt_lang_hits += 1
                                            logger.debug(
                                                "LibraryThing reports language [%s] for %s" % (resp, isbnhead))
                                            if resp != 'invalid' and resp != 'unknown':
                                                booklang = resp  # found a language code
                                                match = True
                                                myDB.action('insert into languages values (?, ?)',
                                                            (isbnhead, booklang))
                                                logger.debug("LT language: " + booklang)
                                        except Exception as e:
                                            booklang = ""
                                            logger.error("%s finding language: %s" % (type(e).__name__, str(e)))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]:
                                            # these are all english, may need to expand this list
                                            booknamealt = item['volumeInfo']['title']
                                            logger.debug("%s Google thinks [%s], we think [%s]" %
                                                         (booknamealt, googlelang, booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                booknamealt = item['volumeInfo']['title']
                                logger.debug(
                                    'Skipped [%s] with language %s' %
                                    (booknamealt, booklang))
                                ignored += 1
                                continue

                        try:
                            bookpub = item['volumeInfo']['publisher']
                        except KeyError:
                            bookpub = ""

                        try:
                            booksub = item['volumeInfo']['subtitle']
                        except KeyError:
                            booksub = ""

                        if not booksub:
                            series = ""
                            seriesNum = ""
                        else:
                            try:
                                series = booksub.split('(')[1].split(' Series ')[0]
                            except IndexError:
                                series = ""
                            if series.endswith(')'):
                                series = series[:-1]
                            try:
                                seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0]
                                if seriesNum[0] == '#':
                                    seriesNum = seriesNum[1:]
                            except IndexError:
                                seriesNum = ""

                            if not seriesNum and '#' in series:
                                words = series.rsplit('#', 1)
                                series = words[0].strip()
                                seriesNum = words[1].strip()
                            if not seriesNum and ' ' in series:
                                words = series.rsplit(' ', 1)
                                # has to be unicode for isnumeric()
                                if (u"%s" % words[1]).isnumeric():
                                    series = words[0]
                                    seriesNum = words[1]

                        try:
                            bookdate = item['volumeInfo']['publishedDate']
                        except KeyError:
                            bookdate = '0000-00-00'

                        try:
                            bookimg = item['volumeInfo']['imageLinks']['thumbnail']
                        except KeyError:
                            bookimg = 'images/nocover.png'

                        try:
                            bookrate = item['volumeInfo']['averageRating']
                        except KeyError:
                            bookrate = 0

                        try:
                            bookpages = item['volumeInfo']['pageCount']
                        except KeyError:
                            bookpages = 0

                        try:
                            bookgenre = item['volumeInfo']['categories'][0]
                        except KeyError:
                            bookgenre = ""

                        try:
                            bookdesc = item['volumeInfo']['description']
                        except KeyError:
                            bookdesc = ""

                        rejected = False
                        check_status = False

                        bookname = item['volumeInfo']['title']

                        if not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname))
                            removedResults += 1
                            rejected = True
                        else:
                            bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip()
                            booklink = item['volumeInfo']['canonicalVolumeLink']
                            bookrate = float(bookrate)
                            bookid = item['id']

                            # GoodReads sometimes has multiple bookids for the same book (same author/title, different
                            # editions) and sometimes uses the same bookid if the book is the same but the title is
                            # slightly different. Not sure if googlebooks does too, but we only want one...
                            existing_book = myDB.match('SELECT Status,Manual,BookAdded FROM books WHERE BookID=?',
                                                       (bookid,))
                            if existing_book:
                                book_status = existing_book['Status']
                                locked = existing_book['Manual']
                                added = existing_book['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                book_status = bookstatus  # new_book status, or new_author status
                                added = today()
                                locked = False

                        if not rejected and re.match('[^\w-]', bookname):  # remove books with bad characters in title
                            logger.debug("[%s] removed book for bad characters" % bookname)
                            removedResults += 1
                            rejected = True

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if bookdate > today()[:len(bookdate)]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, bookdate))
                                removedResults += 1
                                rejected = True

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""')))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorname, bookname, bookid))
                                    rejected = True
                                    duplicates += 1

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                            match = myDB.match(cmd, (bookid,))
                            if match:  # we have a book with this bookid already
                                if bookname != match['BookName'] or authorname != match['AuthorName']:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                                 (bookid, authorname, bookname, match['AuthorName'], match['BookName']))
                                else:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                                 (bookid, authorname, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = True

                        if check_status or not rejected:
                            if book_status != "Ignored" and not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": booksub,
                                    "BookDesc": bookdesc,
                                    "BookIsbn": bookisbn,
                                    "BookPub": bookpub,
                                    "BookGenre": bookgenre,
                                    "BookImg": bookimg,
                                    "BookLink": booklink,
                                    "BookRate": bookrate,
                                    "BookPages": bookpages,
                                    "BookDate": bookdate,
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'],
                                    "BookAdded": added
                                }
                                resultcount += 1

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug("Book found: " + bookname + " " + bookdate)
                                updated = False
                                if 'nocover' in bookimg or 'nophoto' in bookimg:
                                    # try to get a cover from librarything
                                    workcover = getBookCover(bookid)
                                    if workcover:
                                        logger.debug('Updated cover for %s to %s' % (bookname, workcover))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True

                                elif bookimg and bookimg.startswith('http'):
                                    link, success = cache_img("book", bookid, bookimg, refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug('Failed to cache image for %s' % bookimg)

                                seriesdict = {}
                                if lazylibrarian.CONFIG['ADD_SERIES']:  # prefer series info from librarything
                                    seriesdict = getWorkSeries(bookid)
                                    if seriesdict:
                                        logger.debug('Updated series: %s [%s]' % (bookid, seriesdict))
                                        updated = True
                                    # librarything doesn't have series info. Any in the title?
                                    elif series:
                                        seriesdict = {cleanName(unaccented(series)): seriesNum}
                                    setSeries(seriesdict, bookid)

                                new_status = setStatus(bookid, seriesdict, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug("[%s] Added book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug("[%s] Updated book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' %
                         (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid,))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)

            logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored)))
            logger.debug("Removed %s bad character or no-name result%s for author" %
                         (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount)))

            myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                        (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
Example #28
0
def TDL(book=None):

    provider = "torrentdownloads"
    host = lazylibrarian.TDL_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host)

    params = {"type": "search", "cid": "2", "search": book['searchterm']}
    searchURL = providerurl + "/rss.xml?%s" % urllib.urlencode(params)

    try:
        request = urllib2.Request(searchURL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST,
                              lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', USER_AGENT)
        data = urllib2.urlopen(request, timeout=90)
    except (socket.timeout) as e:
        logger.debug('Timeout fetching data from %s' % provider)
        data = False
    except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e:
        # may return 404 if no results, not really an error
        if hasattr(e, 'code') and e.code == 404:
            logger.debug(searchURL)
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            if hasattr(e, 'reason'):
                errmsg = e.reason
            else:
                errmsg = str(e)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, errmsg))
        data = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = item['title']
                    seeders = int(item['seeders'])
                    link = item['link']
                    size = int(item['size'])
                    url = None

                    if link and minimumseeders < seeders:
                        # no point requesting the magnet link if not enough seeders
                        request = urllib2.Request(link)
                        if lazylibrarian.PROXY_HOST:
                            request.set_proxy(lazylibrarian.PROXY_HOST,
                                              lazylibrarian.PROXY_TYPE)
                        request.add_header('User-Agent', USER_AGENT)

                        conn = urllib2.urlopen(request, timeout=90)
                        result = conn.read()
                        url = None
                        new_soup = BeautifulSoup(result)
                        for link in new_soup.findAll('a'):
                            output = link.get('href')
                            if output and output.startswith('magnet'):
                                url = output
                                break

                    if minimumseeders < int(seeders):
                        if not url or not title:
                            logger.debug('Missing url or title')
                        else:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error(u"An error occurred in the %s parser: %s" %
                                 (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #29
0
def GEN(book=None):

    provider = "libgen"
    host = lazylibrarian.GEN_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    searchURL = url_fix(
        host +
        "/search.php?view=simple&open=0&phrase=0&column=def&res=100&req=" +
        book['searchterm'])

    result, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in result:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
        elif '111' in result:
            # looks like libgen has ip based access limits
            logger.error(
                'Access forbidden. Please wait a while before trying %s again.'
                % provider)
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, result))
        result = False

    results = []

    if result:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        soup = BeautifulSoup(result)
        try:
            table = soup.findAll('table')[2]
            rows = table.findAll('tr')
        except Exception:  # no results = no table in result page
            rows = []

        c1 = []
        c2 = []
        c7 = []
        c8 = []

        if len(rows) > 1:
            for row in rows[1:]:
                if len(row.findAll('td')) > 8:
                    c1.append(row.findAll('td')[1])
                    c2.append(row.findAll('td')[2])
                    c7.append(row.findAll('td')[7])
                    c8.append(row.findAll('td')[8])

        for col1, col2, col7, col8 in zip(c1, c2, c7, c8):
            try:
                author = unaccented(col1.text)
                title = unaccented(
                    str(col2).split('>')[2].split('<')[0].strip())
                link = str(col2).split('href="')[1].split('?')[1].split('"')[0]
                size = unaccented(col7.text).upper()
                extn = col8.text

                try:
                    mult = 1
                    if 'K' in size:
                        size = size.split('K')[0]
                        mult = 1024
                    elif 'M' in size:
                        size = size.split('M')[0]
                        mult = 1024 * 1024
                    size = int(float(size) * mult)
                except (ValueError, IndexError) as e:
                    size = 0

                if link and title:
                    if author:
                        title = author.strip() + ' ' + title.strip()
                    if extn:
                        title = title + '.' + extn

                    bookURL = url_fix(host + "/ads.php?" + link)
                    bookresult, success = fetchURL(bookURL)
                    if not success:
                        # may return 404 if no results, not really an error
                        if '404' in bookresult:
                            logger.debug(u"No results found from %s for %s" %
                                         (provider, book['searchterm']))
                            bookresult = False
                        else:
                            logger.debug(bookURL)
                            logger.debug('Error fetching data from %s: %s' %
                                         (provider, bookresult))
                        bookresult = False
                    if bookresult:
                        url = None
                        new_soup = BeautifulSoup(bookresult)
                        for link in new_soup.findAll('a'):
                            output = link.get('href')
                            if output and output.startswith('/get.php'):
                                url = output
                                break

                        if url:
                            url = url_fix(host + url)
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                            })
                            logger.debug('Found %s, Size %s' % (title, size))

            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" %
                             (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #30
0
def LIME(book=None):

    provider = "Limetorrent"
    host = lazylibrarian.LIME_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    searchURL = url_fix(host + "/searchrss/other/?q=" + book['searchterm'])

    try:
        request = urllib2.Request(searchURL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST,
                              lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', USER_AGENT)
        data = urllib2.urlopen(request, timeout=90)
    except (socket.timeout) as e:
        logger.debug('Timeout fetching data from %s' % provider)
        data = False
    except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e:
        # may return 404 if no results, not really an error
        if hasattr(e, 'code') and e.code == 404:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            if hasattr(e, 'reason'):
                errmsg = e.reason
            else:
                errmsg = str(e)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, errmsg))
        data = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(
                            seeders.split('Seeds:')[1].split(',')[0].strip())
                    except (IndexError, ValueError) as e:
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error(
                            u"An error occurred in the %s parser: %s" %
                            (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #31
0
def ZOO(book=None):

    provider = "zooqle"
    host = lazylibrarian.ZOO_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/search?q=" + book['searchterm'])

    params = {"category": "books", "fmt": "rss"}
    searchURL = providerurl + "&%s" % urllib.urlencode(params)

    try:
        request = urllib2.Request(searchURL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST,
                              lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', USER_AGENT)
        data = urllib2.urlopen(request, timeout=90)
    except (socket.timeout) as e:
        logger.debug('Timeout fetching data from %s' % provider)
        data = False
    except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e:
        # may return 404 if no results, not really an error
        if hasattr(e, 'code') and e.code == 404:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            if hasattr(e, 'reason'):
                errmsg = e.reason
            else:
                errmsg = str(e)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, errmsg))
        data = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    seeders = int(item['torrent_seeds'])
                    link = item['links'][1]['href']
                    size = int(item['links'][1]['length'])
                    magnet = item['torrent_magneturi']

                    url = None
                    if link:
                        url = link
                    if magnet:  # if both, prefer magnet over torrent
                        url = magnet

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # looks like zooqle has ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error(
                            u"An error occurred in the %s parser: %s" %
                            (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #32
0
def oldKAT(book=None):

    provider = "KAT"
    host = lazylibrarian.KAT_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/usearch/" + book['searchterm'])
    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1

    params = {
        "category": "books",
        "field": "seeders",
        "sorder": "desc",
        "rss": "1"
    }
    searchURL = providerurl + "/?%s" % urllib.urlencode(params)

    try:
        request = urllib2.Request(searchURL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST,
                              lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', USER_AGENT)
        data = urllib2.urlopen(request, timeout=90)
    except (socket.timeout) as e:
        logger.debug('Timeout fetching data from %s' % provider)
        data = False
    except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e:
        # seems KAT returns 404 if no results, not really an error
        if hasattr(e, 'code') and e.code == 404:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            if hasattr(e, 'reason'):
                errmsg = e.reason
            else:
                errmsg = str(e)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, errmsg))
        data = False

    results = []

    if data:
        logger.debug(u'Parsing results from <a href="%s">KAT</a>' % searchURL)
        d = feedparser.parse(data)
        if len(d.entries):
            logger.debug(
                u"Found %i result%s from %s for %s, checking seeders" %
                (len(d.entries), plural(len(
                    d.entries)), provider, book['searchterm']))
            for item in d.entries:
                try:
                    title = item['title']

                    seeders = item['torrent_seeds']
                    url = item['links'][1]['href']
                    size = int(item['links'][1]['length'])

                    if minimumseeders < int(seeders):
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': "KAT",
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })

                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug(
                            'Found %s but %s seeder%s' %
                            (title, int(seeders), plural(int(seeders))))

                except Exception as e:
                    logger.error(
                        u"An unknown error occurred in the KAT parser: %s" %
                        str(e))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #33
0
def KAT(book=None):

    provider = "KAT"
    host = lazylibrarian.KAT_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/usearch/" + book['searchterm'])

    params = {"category": "books", "field": "seeders", "sorder": "desc"}
    searchURL = providerurl + "/?%s" % urllib.urlencode(params)

    result, success = fetchURL(searchURL)
    if not success:
        # seems KAT returns 404 if no results, not really an error
        if '404' in result:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
            result = False
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, result))
        result = False

    results = []

    if result:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
        soup = BeautifulSoup(result)

        try:
            table = soup.findAll('table')[1]
            rows = table.findAll('tr')
        except Exception:  # no results = no table in result page
            rows = []

        c0 = []
        c1 = []
        c3 = []

        if len(rows) > 1:
            for row in rows[1:]:
                if len(row.findAll('td')) > 3:
                    c0.append(row.findAll('td')[0])
                    c1.append(row.findAll('td')[1])
                    c3.append(row.findAll('td')[3])

        for col0, col1, col3 in zip(c0, c1, c3):
            try:
                title = unaccented(
                    str(col0).split('cellMainLink">')[1].split('<')[0])
                # kat can return magnet or torrent or both. If both, prefer magnet...
                try:
                    url = 'magnet' + str(col0).split('href="magnet')[1].split(
                        '"')[0]
                except IndexError:
                    url = 'http' + str(col0).split('href="http')[1].split(
                        '.torrent?')[0] + '.torrent'

                try:
                    size = str(col1.text).replace('&nbsp;', '').upper()
                    mult = 1
                    if 'K' in size:
                        size = size.split('K')[0]
                        mult = 1024
                    elif 'M' in size:
                        size = size.split('M')[0]
                        mult = 1024 * 1024
                    size = int(float(size) * mult)
                except (ValueError, IndexError):
                    size = 0
                try:
                    seeders = int(col3.text)
                except ValueError:
                    seeders = 0

                if not url or not title:
                    logger.debug('Missing url or title')
                elif minimumseeders < seeders:
                    results.append({
                        'bookid': book['bookid'],
                        'tor_prov': provider,
                        'tor_title': title,
                        'tor_url': url,
                        'tor_size': str(size),
                    })
                    logger.debug('Found %s. Size: %s' % (title, size))
                else:
                    logger.debug('Found %s but %s seeder%s' %
                                 (title, seeders, plural(seeders)))
            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" %
                             (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #34
0
def KAT(book=None):

    provider = "KAT"
    host = lazylibrarian.KAT_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/usearch/" + book['searchterm'])

    params = {
        "category": "books",
        "field": "seeders",
        "sorder": "desc"
    }
    searchURL = providerurl + "/?%s" % urllib.urlencode(params)

    result, success = fetchURL(searchURL)
    if not success:
        # seems KAT returns 404 if no results, not really an error
        if '404' in result:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
            result = False
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, result))
        result = False

    results = []

    if result:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
        soup = BeautifulSoup(result)

        try:
            table = soup.findAll('table')[1]
            rows = table.findAll('tr')
        except Exception:  # no results = no table in result page
            rows = []

        c0 = []
        c1 = []
        c3 = []

        if len(rows) > 1:
            for row in rows[1:]:
                if len(row.findAll('td')) > 3:
                    c0.append(row.findAll('td')[0])
                    c1.append(row.findAll('td')[1])
                    c3.append(row.findAll('td')[3])

        for col0, col1, col3 in zip(c0, c1, c3):
            try:
                title = unaccented(str(col0).split('cellMainLink">')[1].split('<')[0])
                # kat can return magnet or torrent or both. If both, prefer magnet...
                try:
                    url = 'magnet' + str(col0).split('href="magnet')[1].split('"')[0]
                except IndexError:
                    url = 'http' + str(col0).split('href="http')[1].split('.torrent?')[0] + '.torrent'

                try:
                    size = str(col1.text).replace('&nbsp;', '').upper()
                    mult = 1
                    if 'K' in size:
                        size = size.split('K')[0]
                        mult = 1024
                    elif 'M' in size:
                        size = size.split('M')[0]
                        mult = 1024 * 1024
                    size = int(float(size) * mult)
                except (ValueError, IndexError):
                    size = 0
                try:
                    seeders = int(col3.text)
                except ValueError:
                    seeders = 0

                if not url or not title:
                    logger.debug('Missing url or title')
                elif minimumseeders < seeders:
                    results.append({
                        'bookid': book['bookid'],
                        'tor_prov': provider,
                        'tor_title': title,
                        'tor_url': url,
                        'tor_size': str(size),
                    })
                    logger.debug('Found %s. Size: %s' % (title, size))
                else:
                    logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))
            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #35
0
    def get_shelf_list(self):
        global consumer, client, token, user_id
        if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \
                lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.warn("Goodreads get shelf error: Please authorise first")
            return []
        else:
            #
            # loop over each page of shelves
            #     loop over each shelf
            #         add shelf to list
            #
            consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']),
                                      secret=str(
                                          lazylibrarian.CONFIG['GR_SECRET']))
            token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'],
                                lazylibrarian.CONFIG['GR_OAUTH_SECRET'])
            client = oauth.Client(consumer, token)
            user_id = self.getUserId()

            current_page = 0
            shelves = []
            page_shelves = 1
            while page_shelves:
                current_page = current_page + 1
                page_shelves = 0
                shelf_template = Template(
                    '${base}/shelf/list.xml?user_id=${user_id}&key=${key}&page=${page}'
                )
                body = urllib.urlencode({})
                headers = {'Content-Type': 'application/x-www-form-urlencoded'}
                request_url = shelf_template.substitute(
                    base='https://www.goodreads.com',
                    user_id=user_id,
                    page=current_page,
                    key=lazylibrarian.CONFIG['GR_API'])
                time_now = int(time.time())
                if time_now <= lazylibrarian.LAST_GOODREADS:
                    time.sleep(1)
                    lazylibrarian.LAST_GOODREADS = time_now
                try:
                    response, content = client.request(request_url, 'GET',
                                                       body, headers)
                except Exception as e:
                    logger.error("Exception in client.request: %s %s" %
                                 (type(e).__name__, str(e)))
                    return shelves

                if response['status'] != '200':
                    raise Exception('Failure status: %s for page %s' %
                                    (response['status'], current_page))
                xmldoc = xml.dom.minidom.parseString(content)

                shelf_list = xmldoc.getElementsByTagName('shelves')[0]
                for item in shelf_list.getElementsByTagName('user_shelf'):
                    shelf_name = item.getElementsByTagName(
                        'name')[0].firstChild.nodeValue
                    shelf_count = item.getElementsByTagName(
                        'book_count')[0].firstChild.nodeValue
                    shelf_exclusive = item.getElementsByTagName(
                        'exclusive_flag')[0].firstChild.nodeValue
                    shelves.append({
                        'name': shelf_name,
                        'books': shelf_count,
                        'exclusive': shelf_exclusive
                    })
                    page_shelves += 1

                    if lazylibrarian.LOGLEVEL > 2:
                        logger.debug(
                            'Shelf %s : %s: Exclusive %s' %
                            (shelf_name, shelf_count, shelf_exclusive))

                if lazylibrarian.LOGLEVEL > 2:
                    logger.debug('Found %s shelves on page %s' %
                                 (page_shelves, current_page))

            logger.debug(
                'Found %s shelves on %s page%s' %
                (len(shelves), current_page - 1, plural(current_page - 1)))
            # print shelves
            return shelves
Example #36
0
def ZOO(book=None):

    provider = "zooqle"
    host = lazylibrarian.ZOO_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/search?q=" + book['searchterm'])

    params = {
        "category": "books",
        "fmt": "rss"
    }
    searchURL = providerurl + "&%s" % urllib.urlencode(params)

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
        result = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    seeders = int(item['torrent_seeds'])
                    link = item['links'][1]['href']
                    size = int(item['links'][1]['length'])
                    magnet = item['torrent_magneturi']

                    url = None
                    if link:
                        url = link
                    if magnet:  # if both, prefer magnet over torrent
                        url = magnet

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # looks like zooqle has ip based access limits
                        logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                    else:
                        logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #37
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select(
                'SELECT Title, Regex, DateType, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
            )
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select(
                    'SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"',
                    (magazine['bookid'], ))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        # logger.debug("Removing old magazine search results")
        # myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' %
                    (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']
            datetype = searchmag['DateType']
            if not datetype:
                datetype = ''

            if not searchterm:
                dic = {
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': ''
                }
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                if PY2:
                    searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({
                "bookid": bookid,
                "searchterm": searchterm,
                "datetype": datetype
            })

        if not searchlist:
            logger.warn(
                'There is nothing to search for.  Mark some magazines as active.'
            )

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No nzb providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG,
                                 0) + 1200 < timenow:
                        logger.warn(
                            'No direct providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No tor providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No rss providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': item[
                                'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace(
                        "'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(
                        nzbsize_temp, 1000
                    )  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) so split into "words"
                    dic = {
                        '.': ' ',
                        '-': ' ',
                        '/': ' ',
                        '+': ' ',
                        '_': ' ',
                        '(': '',
                        ')': '',
                        '[': ' ',
                        ']': ' ',
                        '#': '# '
                    }
                    nzbtitle_formatted = replace_all(nzbtitle, dic).strip()
                    # remove extra spaces if they're in a row
                    nzbtitle_formatted = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_formatted.split(' ')

                    results = myDB.match(
                        'SELECT * from magazines WHERE Title=?', (bookid, ))
                    if not results:
                        logger.debug(
                            'Magazine [%s] does not match search term [%s].' %
                            (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(
                            lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(
                                lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" %
                                             nzbtitle)
                                rejected = True

                        if not rejected:
                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # Check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    word = unaccented(word).lower()
                                    if word:
                                        wlist.append(word)
                                for word in bookid_exploded:
                                    word = unaccented(word).lower()
                                    if word and word not in wlist:
                                        logger.debug(
                                            "Rejecting %s, missing %s" %
                                            (nzbtitle, word))
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        "Magazine title match failed " +
                                        bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug("Magazine title matched " +
                                                 bookid + " for " +
                                                 nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" %
                                             len(nzbtitle_exploded))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG[
                                'BLACKLIST_FAILED']:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG[
                                'BLACKLIST_PROCESSED']:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=?',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(
                                str(results['Reject']).lower())
                            reject_list += getList(
                                lazylibrarian.CONFIG['REJECT_MAGS'])
                            lower_title = unaccented(
                                nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Reject: %s' %
                                                 str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" %
                                                 (nzbtitle_formatted, word))
                                    break

                        if rejected:
                            rejects += 1
                        else:
                            regex_pass, issuedate, year = get_issue_date(
                                nzbtitle_exploded)
                            if regex_pass:
                                logger.debug('Issue %s (regex %s) for %s ' %
                                             (issuedate, regex_pass,
                                              nzbtitle_formatted))
                                datetype_ok = True
                                datetype = book['datetype']
                                if datetype:
                                    # check all wanted parts are in the regex result
                                    # Day Month Year Vol Iss (MM needs two months)

                                    if 'M' in datetype and regex_pass not in [
                                            1, 2, 3, 4, 5, 6, 7, 12
                                    ]:
                                        datetype_ok = False
                                    elif 'D' in datetype and regex_pass not in [
                                            3, 5, 6
                                    ]:
                                        datetype_ok = False
                                    elif 'MM' in datetype and regex_pass not in [
                                            1
                                    ]:  # bi monthly
                                        datetype_ok = False
                                    elif 'V' in datetype and 'I' in datetype and regex_pass not in [
                                            8, 9, 17, 18
                                    ]:
                                        datetype_ok = False
                                    elif 'V' in datetype and regex_pass not in [
                                            2, 10, 11, 12, 13, 14, 17, 18
                                    ]:
                                        datetype_ok = False
                                    elif 'I' in datetype and regex_pass not in [
                                            2, 10, 11, 12, 13, 14, 16, 17, 18
                                    ]:
                                        datetype_ok = False
                                    elif 'Y' in datetype and regex_pass not in [
                                            1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13,
                                            15, 16, 18
                                    ]:
                                        datetype_ok = False
                            else:
                                datetype_ok = False
                                logger.debug(
                                    'Magazine %s not in a recognised date format.'
                                    % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                issuedate = "1970-01-01"  # provide a fake date for bad-date issues

                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped" or "Have"
                            insert_table = "pastissues"
                            comp_date = 0
                            if datetype_ok:
                                control_date = results['IssueDate']
                                logger.debug("Control date: [%s]" %
                                             control_date)
                                if not control_date:  # we haven't got any copies of this magazine yet
                                    # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd
                                    # could perhaps calc differently for weekly, biweekly etc
                                    # For magazines with only an issue number use zero as we can't tell age

                                    if str(issuedate).isdigit():
                                        logger.debug(
                                            'Magazine comparing issue numbers (%s)'
                                            % issuedate)
                                        control_date = 0
                                    elif re.match('\d+-\d\d-\d\d',
                                                  str(issuedate)):
                                        start_time = time.time()
                                        start_time -= int(
                                            lazylibrarian.CONFIG['MAG_AGE']
                                        ) * 24 * 60 * 60  # number of seconds in days
                                        if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                            start_time = 0
                                        control_date = time.strftime(
                                            "%Y-%m-%d",
                                            time.localtime(start_time))
                                        logger.debug(
                                            'Magazine date comparing to %s' %
                                            control_date)
                                    else:
                                        logger.debug(
                                            'Magazine unable to find comparison type [%s]'
                                            % issuedate)
                                        control_date = 0

                                if str(control_date).isdigit() and str(
                                        issuedate).isdigit():
                                    # for issue numbers, check if later than last one we have
                                    if regex_pass in [10, 12, 13] and year:
                                        issuedate = "%s%04d" % (year,
                                                                int(issuedate))
                                    else:
                                        issuedate = str(issuedate).zfill(4)
                                    if not control_date:
                                        comp_date = 1
                                    else:
                                        comp_date = int(issuedate) - int(
                                            control_date)
                                elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                        re.match('\d+-\d\d-\d\d', str(issuedate)):
                                    # only grab a copy if it's newer than the most recent we have,
                                    # or newer than a month ago if we have none
                                    comp_date = datecompare(
                                        issuedate, control_date)
                                else:
                                    # invalid comparison of date and issue number
                                    comp_date = 0
                                    if re.match('\d+-\d\d-\d\d',
                                                str(control_date)):
                                        if regex_pass > 9 and year:
                                            # we assumed it was an issue number, but it could be a date
                                            year = check_int(year, 0)
                                            if regex_pass in [10, 12, 13]:
                                                issuedate = int(issuedate[:4])
                                            issuenum = check_int(issuedate, 0)
                                            if year and 1 <= issuenum <= 12:
                                                issuedate = "%04d-%02d-01" % (
                                                    year, issuenum)
                                                comp_date = datecompare(
                                                    issuedate, control_date)
                                        if not comp_date:
                                            logger.debug(
                                                'Magazine %s failed: Expecting a date'
                                                % nzbtitle_formatted)
                                    else:
                                        logger.debug(
                                            'Magazine %s failed: Expecting issue number'
                                            % nzbtitle_formatted)
                                    if not comp_date:
                                        bad_date += 1
                                        issuedate = "1970-01-01"

                            if issuedate == "1970-01-01":
                                logger.debug(
                                    'This issue of %s is unknown age; skipping.'
                                    % nzbtitle_formatted)
                            elif not datetype_ok:
                                logger.debug(
                                    'This issue of %s not in a wanted date format.'
                                    % nzbtitle_formatted)
                            elif comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + issuedate
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug(
                                        'This issue of %s is new, downloading'
                                        % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' %
                                                 len(issues))
                                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug(
                                        'This issue of %s is already flagged for download'
                                        % issue)
                            else:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug(
                                        'This issue of %s is old; skipping.' %
                                        nzbtitle_formatted)
                                old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match(
                                'SELECT Status from %s WHERE NZBtitle=? and NZBprov=?'
                                % insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug(
                                        '%s is already in %s marked %s' %
                                        (nzbtitle, insert_table,
                                         mag_entry['Status']))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                if insert_table == 'pastissues':
                                    # try to mark ones we've already got
                                    match = myDB.match(
                                        "SELECT * from issues WHERE Title=? AND IssueDate=?",
                                        (bookid, issuedate))
                                    if match:
                                        insert_status = "Have"
                                    else:
                                        insert_status = "Skipped"
                                else:
                                    insert_status = "Wanted"
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": issuedate,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict,
                                            controlValueDict)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Added %s to %s marked %s' %
                                                 (nzbtitle, insert_table,
                                                  insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (
                    total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (
                    old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch, res = TORDownloadMethod(
                            magazine['bookid'], magazine['nzbtitle'],
                            magazine['nzburl'], 'Magazine')
                    elif magazine['nzbmode'] == 'direct':
                        snatch, res = DirectDownloadMethod(
                            magazine['bookid'], magazine['nzbtitle'],
                            magazine['nzburl'], 'Magazine')
                    elif magazine['nzbmode'] == 'nzb':
                        snatch, res = NZBDownloadMethod(
                            magazine['bookid'], magazine['nzbtitle'],
                            magazine['nzburl'], 'Magazine')
                    else:
                        res = 'Unhandled NZBmode [%s] for %s' % (
                            magazine['nzbmode'], magazine["nzburl"])
                        logger.error(res)
                        snatch = 0

                    if snatch:
                        logger.info(
                            'Downloading %s from %s' %
                            (magazine['nzbtitle'], magazine["nzbprov"]))
                        custom_notify_snatch(
                            "%s %s" % (magazine['bookid'], magazine['nzburl']))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']),
                                       magazine["nzbprov"], now()))
                        scheduleJob(action='Start', target='processDir')
                    else:
                        myDB.action(
                            'UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?',
                            (res, magazine["nzburl"]))

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Example #38
0
def LIME(book=None):

    provider = "Limetorrent"
    host = lazylibrarian.LIME_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    searchURL = url_fix(host + "/searchrss/other/?q=" + book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
        result = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(seeders.split('Seeds:')[1].split(',')[0].strip())
                    except (IndexError, ValueError) as e:
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                    else:
                        logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #39
0
    def _RecentBooks(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Recent Books', 'id': 'Recent Books', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=RecentBooks' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Books'))
        cmd = "select BookName,BookID,BookLibrary,BookDate,BookImg,BookDesc,BookAdded,BookFile,AuthorID "
        cmd += "from books where Status='Open' "
        if 'query' in kwargs:
            cmd += "AND BookName LIKE '%" + kwargs['query'] + "%' "
        cmd += "order by BookLibrary DESC"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for book in page:
            mime_type = None
            if book['BookFile']:
                mime_type = mimeType(book['BookFile'])
            elif book['AudioFile']:
                mime_type = mimeType(book['AudioFile'])
            if mime_type:
                title = makeUnicode(book['BookName'])
                entry = {'title': escape(title),
                         'id': escape('book:%s' % book['BookID']),
                         'updated': opdstime(book['BookLibrary']),
                         'href': '%s?cmd=Serve&amp;bookid=%s' % (self.opdsroot, quote_plus(book['BookID'])),
                         'kind': 'acquisition',
                         'rel': 'file',
                         'type': mime_type}
                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    author = myDB.match("SELECT AuthorName from authors WHERE AuthorID='%s'" % book['AuthorID'])
                    author = makeUnicode(author['AuthorName'])
                    entry['image'] = self.searchroot + '/' + book['BookImg']
                    entry['content'] = escape('%s - %s' % (title, book['BookDesc']))
                    entry['author'] = escape('%s' % author)
                else:
                    entry['content'] = escape('%s (%s)' % (title, book['BookAdded']))
                entries.append(entry)

            """
                <link type="application/epub+zip" rel="http://opds-spec.org/acquisition"
                title="EPUB (no images)" length="18552" href="//www.gutenberg.org/ebooks/57490.epub.noimages"/>
                <link type="application/x-mobipocket-ebook" rel="http://opds-spec.org/acquisition"
                title="Kindle (no images)" length="110360" href="//www.gutenberg.org/ebooks/57490.kindle.noimages"/>
            """

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=RecentBooks&amp;index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=RecentBooks&amp;index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s book%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Example #40
0
def TDL(book=None):

    provider = "torrentdownloads"
    host = lazylibrarian.TDL_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host)

    params = {
        "type": "search",
        "cid": "2",
        "search": book['searchterm']
    }
    searchURL = providerurl + "/rss.xml?%s" % urllib.urlencode(params)

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
        result = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = item['title']
                    seeders = int(item['seeders'])
                    link = item['link']
                    size = int(item['size'])
                    url = None

                    if link and minimumseeders < seeders:
                        # no point requesting the magnet link if not enough seeders
                        # TDL gives us a relative link
                        result, success = fetchURL(providerurl+link)
                        url = None
                        if success:
                            new_soup = BeautifulSoup(result)
                            for link in new_soup.findAll('a'):
                                output = link.get('href')
                                if output and output.startswith('magnet'):
                                    url = output
                                    break

                    if minimumseeders < int(seeders):
                        if not url or not title:
                            logger.debug('Missing url or title')
                        else:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #41
0
def search_wishlist():
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "SEARCHWISHLIST"

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
            scheduleJob(action='Stop', target='search_wishlist')
            return  # No point in continuing

        # for each item in resultlist, add to database if necessary, and mark as wanted
        logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist))))
        for book in resultlist:
            # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
            # we can just use bookid if goodreads, or try isbn and name matching on author/title if not
            # eg NYTimes wishlist
            if 'E' in book['types']:
                ebook_status = "Wanted"
            else:
                ebook_status = "Skipped"
            if 'A' in book['types']:
                audio_status = "Wanted"
            else:
                audio_status = "Skipped"
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book['rss_bookid']:
                cmd = 'select books.Status as Status,AudioStatus,authors.Status as AuthorStatus,'
                cmd += 'AuthorName,BookName,Requester,AudioRequester from books,authors '
                cmd += 'where books.AuthorID = authors.AuthorID and bookid=?'
                bookmatch = myDB.match(cmd, (book['rss_bookid'],))
                if bookmatch:
                    cmd = 'SELECT SeriesName,Status from series,member '
                    cmd += 'where series.SeriesID=member.SeriesID and member.BookID=?'
                    series = myDB.select(cmd, (book['rss_bookid'],))
                    reject_series = None
                    for ser in series:
                        if ser['Status'] in ['Paused', 'Ignored']:
                            reject_series = {"Name": ser['SeriesName'], "Status": ser['Status']}
                            break
                    bookname = bookmatch['BookName']
                    if bookmatch['Status'] in ['Open', 'Wanted', 'Have']:
                        logger.info('Found book %s, already marked %s' % (bookname, bookmatch['Status']))
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif bookmatch['AuthorStatus'] in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is %s' % (bookname, bookmatch['AuthorStatus']))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif ebook_status == "Wanted":  # skipped/ignored
                        logger.info('Found book %s, marking as "Wanted"' % bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']:
                        logger.info('Found audiobook %s, already marked %s' % (bookname, bookmatch['AudioStatus']))
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif bookmatch['AuthorStatus'] in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is %s' % (bookname, bookmatch['AuthorStatus']))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif audio_status == "Wanted":  # skipped/ignored
                        logger.info('Found audiobook %s, marking as "Wanted"' % bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"AudioStatus": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    import_book(book['rss_bookid'], ebook_status, audio_status)
                    new_books += 1
                    newValueDict = {"Requester": book["dispname"] + ' '}
                    controlValueDict = {"BookID": book['rss_bookid']}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    newValueDict = {"AudioRequester": book["dispname"] + ' '}
                    controlValueDict = {"BookID": book['rss_bookid']}
                    myDB.upsert("books", newValueDict, controlValueDict)
            else:
                item = {}
                results = None
                item['Title'] = book['rss_title']
                if book['rss_bookid']:
                    item['BookID'] = book['rss_bookid']
                if book['rss_isbn']:
                    item['ISBN'] = book['rss_isbn']
                bookmatch = finditem(item, book['rss_author'])
                if bookmatch:  # it's already in the database
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    auth_res = myDB.match('SELECT Status from authors WHERE authorname=?', (authorname,))
                    if auth_res:
                        auth_status = auth_res['Status']
                    else:
                        auth_status = 'Unknown'
                    cmd = 'SELECT SeriesName,Status from series,member '
                    cmd += 'where series.SeriesID=member.SeriesID and member.BookID=?'
                    series = myDB.select(cmd, (book['rss_bookid'],))
                    reject_series = None
                    for ser in series:
                        if ser['Status'] in ['Paused', 'Ignored']:
                            reject_series = {"Name": ser['SeriesName'], "Status": ser['Status']}
                            break
                    if bookmatch['Status'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookmatch['Status']))
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif auth_status in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is "%s"' % (bookname, auth_status))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif ebook_status == 'Wanted':  # skipped/ignored
                        logger.info('Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found audiobook %s by %s, already marked as "%s"' %
                            (bookname, authorname, bookmatch['AudioStatus']))
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif auth_status in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is "%s"' % (bookname, auth_status))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif audio_status == 'Wanted':  # skipped/ignored
                        logger.info('Found audiobook %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"AudioStatus": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                else:  # not in database yet
                    if book['rss_isbn']:
                        results = search_for(book['rss_isbn'])
                    if results:
                        result = results[0]  # type: dict
                        if result['isbn_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info("Found (%s%%) %s: %s" %
                                        (result['isbn_fuzz'], result['authorname'], result['bookname']))
                            import_book(result['bookid'], ebook_status, audio_status)
                            new_books += 1
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": result['bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            bookmatch = True
                    if not results:
                        searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName(book['rss_author']))
                        results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]  # type: dict
                        if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                      result['authorname'], result['bookname']))
                            import_book(result['bookid'], ebook_status, audio_status)
                            new_books += 1
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": result['bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'], book['rss_author'])
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            result = results[0]  # type: dict
                            msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                        result['authorname'], result['bookname'])
                        logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books)))

    except Exception:
        logger.error('Unhandled exception in search_wishlist: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Example #42
0
def ZOO(book=None, test=False):
    errmsg = ''
    provider = "zooqle"
    host = lazylibrarian.CONFIG['ZOO_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/search")

    params = {"q": book['searchterm'], "category": "books", "fmt": "rss"}
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    seeders = int(item['torrent_seeds'])
                    link = item['links'][1]['href']
                    size = int(item['links'][1]['length'])
                    magnet = item['torrent_magneturi']

                    url = None
                    mode = 'torrent'
                    if link:
                        url = link
                        mode = 'torrent'
                    if magnet:
                        if not url or (url and
                                       lazylibrarian.CONFIG['PREFER_MAGNET']):
                            url = magnet
                            mode = 'magnet'

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            mode,
                            'priority':
                            lazylibrarian.CONFIG['ZOO_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # looks like zooqle has ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg
Example #43
0
    def _Author(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        if 'authorid' not in kwargs:
            self.data = self._error_with_message('No Author Provided')
            return
        links = []
        entries = []
        links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Books'))
        author = myDB.match("SELECT AuthorName from authors WHERE AuthorID=?", (kwargs['authorid'],))
        author = makeUnicode(author['AuthorName'])
        cmd = "SELECT BookName,BookDate,BookID,BookAdded,BookDesc,BookImg,BookFile,AudioFile from books WHERE "
        if 'query' in kwargs:
            cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND "
        cmd += "(Status='Open' or AudioStatus='Open') and AuthorID=? order by BookDate DESC"
        results = myDB.select(cmd, (kwargs['authorid'],))
        page = results[index:(index + self.PAGE_SIZE)]

        for book in page:
            mime_type = None
            if book['BookFile']:
                mime_type = mimeType(book['BookFile'])
            elif book['AudioFile']:
                mime_type = mimeType(book['AudioFile'])

            if mime_type:
                entry = {'title': escape('%s (%s)' % (book['BookName'], book['BookDate'])),
                         'id': escape('book:%s' % book['BookID']),
                         'updated': opdstime(book['BookAdded']),
                         'href': '%s?cmd=Serve&amp;bookid=%s' % (self.opdsroot, book['BookID']),
                         'kind': 'acquisition',
                         'rel': 'file',
                         'type': mime_type}
                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    entry['image'] = self.searchroot + '/' + book['BookImg']
                    entry['content'] = escape('%s - %s' % (book['BookName'], book['BookDesc']))
                    entry['author'] = escape('%s' % author)
                else:
                    entry['content'] = escape('%s (%s)' % (book['BookName'], book['BookAdded']))
                entries.append(entry)

        feed = {}
        authorname = '%s (%s)' % (escape(author), len(entries))
        feed['title'] = 'LazyLibrarian OPDS - %s' % authorname
        feed['id'] = 'author:%s' % escape(kwargs['authorid'])
        feed['updated'] = now()
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Authors' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Author&amp;authorid=%s&amp;index=%s' % (self.opdsroot,
                                                                             quote_plus(kwargs['authorid']),
                                                                             index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Author&amp;authorid=%s&amp;index=%s' % (self.opdsroot,
                                                                             quote_plus(kwargs['authorid']),
                                                                             index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))
        feed['links'] = links
        feed['entries'] = entries
        self.data = feed
        logger.debug("Returning %s book%s" % (len(entries), plural(len(entries))))
        return
Example #44
0
def search_nzb_book(books=None, reset=False):
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHNZB"

    if not lazylibrarian.USE_NZB():
        logger.warn('No NEWZNAB/TORZNAB providers set, check config')
        return
    myDB = database.DBConnection()
    searchlist = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select(
            'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" order by BookAdded desc'
        )
    else:
        # The user has added a new book
        searchbooks = []
        for book in books:
            searchbook = myDB.select(
                'SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \
                                     AND Status="Wanted"' % book['bookid'])
            for terms in searchbook:
                searchbooks.append(terms)

    if len(searchbooks) == 0:
        logger.debug("NZB search requested for no books or invalid BookID")
        return
    else:
        logger.info('NZB Searching for %i book%s' %
                    (len(searchbooks), plural(len(searchbooks))))

    for searchbook in searchbooks:
        # searchterm is only used for display purposes
        searchterm = searchbook['AuthorName'] + ' "' + searchbook['BookName']
        if searchbook['BookSub']:
            searchterm = searchterm + ': ' + searchbook['BookSub']
        searchterm = searchterm + '"'

        searchlist.append({
            "bookid": searchbook['BookID'],
            "bookName": searchbook['BookName'],
            "bookSub": searchbook['BookSub'],
            "authorName": searchbook['AuthorName'],
            "searchterm": searchterm
        })

    if not lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE and not lazylibrarian.NZBGET_HOST:
        logger.warn(
            'No download method is set, use SABnzbd/NZBGet or blackhole, check config'
        )

    nzb_count = 0
    for book in searchlist:
        # first attempt, try author/title in category "book"
        resultlist, nproviders = providers.IterateOverNewzNabSites(
            book, 'book')

        if not nproviders:
            logger.warn(
                'No NewzNab or TorzNab providers are set, check config')
            return  # no point in continuing

        found = processResultList(resultlist, book, "book")

        # if you can't find the book, try author/title without any "(extended details, series etc)"
        if not found and '(' in book['bookName']:
            resultlist, nproviders = providers.IterateOverNewzNabSites(
                book, 'shortbook')
            found = processResultList(resultlist, book, "shortbook")

        # if you can't find the book under "books", you might find under general search
        if not found:
            resultlist, nproviders = providers.IterateOverNewzNabSites(
                book, 'general')
            found = processResultList(resultlist, book, "general")

        if not found:
            logger.debug("NZB Searches for %s returned no results." %
                         book['searchterm'])
        if found > True:
            nzb_count = nzb_count + 1  # we found it

    logger.info("NZBSearch for Wanted items complete, found %s book%s" %
                (nzb_count, plural(nzb_count)))

    if reset:
        scheduleJob(action='Restart', target='search_nzb_book')
Example #45
0
def TDL(book=None, test=False):
    errmsg = ''
    provider = "torrentdownloads"
    host = lazylibrarian.CONFIG['TDL_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host)

    params = {"type": "search", "cid": "2", "search": book['searchterm']}
    searchURL = providerurl + "/rss.xml?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = item['title']
                    seeders = int(item['seeders'])
                    link = item['link']
                    size = int(item['size'])
                    url = None

                    if link and minimumseeders < int(seeders):
                        # no point requesting the magnet link if not enough seeders
                        # TDL gives us a relative link
                        result, success = fetchURL(providerurl + link)
                        if success:
                            new_soup = BeautifulSoup(result, 'html5lib')
                            for link in new_soup.find_all('a'):
                                output = link.get('href')
                                if output and output.startswith('magnet'):
                                    url = output
                                    break

                        if not url or not title:
                            logger.debug('Missing url or title')
                        else:
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                'magnet',
                                'priority':
                                lazylibrarian.CONFIG['TDL_DLPRIORITY']
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" %
                                 (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg
Example #46
0
def TPB(book=None):

    provider = "TPB"
    host = lazylibrarian.TPB_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/s/?q=" + book['searchterm'])

    params = {"category": "601", "page": "0", "orderby": "99"}
    searchURL = providerurl + "&%s" % urllib.urlencode(params)

    result, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in result:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
            result = False
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, result))
        result = False

    results = []

    if result:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
        soup = BeautifulSoup(result)
        try:
            table = soup.findAll('table')[0]
            rows = table.findAll('tr')
        except Exception:  # no results = no table in result page
            rows = []

        c1 = []
        c2 = []

        if len(rows) > 1:
            for row in rows[1:]:
                if len(row.findAll('td')) > 2:
                    c1.append(row.findAll('td')[1])
                    c2.append(row.findAll('td')[2])

        for col1, col2 in zip(c1, c2):
            try:
                title = unaccented(
                    str(col1).split('title=')[1].split('>')[1].split('<')[0])
                magnet = str(col1).split('href="')[1].split('"')[0]
                size = unaccented(col1.text.split(', Size ')[1].split('iB')[0])
                mult = 1
                try:
                    if 'K' in size:
                        size = size.split('K')[0]
                        mult = 1024
                    elif 'M' in size:
                        size = size.split('M')[0]
                        mult = 1024 * 1024
                    size = int(float(size) * mult)
                except (ValueError, IndexError):
                    size = 0
                try:
                    seeders = int(col2.text)
                except ValueError:
                    seeders = 0

                if magnet and minimumseeders < seeders:
                    # no point in asking for magnet link if not enough seeders
                    magurl = '%s/%s' % (host, magnet)
                    result, success = fetchURL(magurl)
                    if not success:
                        logger.debug('Error fetching url %s, %s' %
                                     (magurl, result))
                    else:
                        magnet = None
                        new_soup = BeautifulSoup(result)
                        for link in new_soup.findAll('a'):
                            output = link.get('href')
                            if output and output.startswith('magnet'):
                                magnet = output
                                break
                    if not magnet or not title:
                        logger.debug('Missing magnet or title')
                    else:
                        if minimumseeders < seeders:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': magnet,
                                'tor_size': str(size),
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                        else:
                            logger.debug('Found %s but %s seeder%s' %
                                         (title, seeders, plural(seeders)))
                else:
                    logger.debug('Found %s but %s seeder%s' %
                                 (title, seeders, plural(seeders)))
            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" %
                             (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #47
0
    def find_results(self, searchterm=None, queue=None):
        """ GoogleBooks performs much better if we search for author OR title
            not both at once, so if searchterm is not isbn, two searches needed.
            Lazylibrarian searches use <ll> to separate title from author in searchterm
            If this token isn't present, it's an isbn or searchterm as supplied by user
        """
        try:
            myDB = database.DBConnection()
            resultlist = []
            # See if we should check ISBN field, otherwise ignore it
            api_strings = ['inauthor:', 'intitle:']
            if is_valid_isbn(searchterm):
                api_strings = ['isbn:']

            api_hits = 0

            ignored = 0
            total_count = 0
            no_author_count = 0
            title = ''
            authorname = ''

            if ' <ll> ' in searchterm:  # special token separates title from author
                title, authorname = searchterm.split(' <ll> ')

            fullterm = searchterm.replace(' <ll> ', ' ')
            logger.debug('Now searching Google Books API with searchterm: %s' % fullterm)

            for api_value in api_strings:
                set_url = self.url
                if api_value == "isbn:":
                    set_url = set_url + quote(api_value + searchterm)
                elif api_value == 'intitle:':
                    searchterm = fullterm
                    if title:  # just search for title
                        # noinspection PyUnresolvedReferences
                        title = title.split(' (')[0]  # without any series info
                        searchterm = title
                    searchterm = searchterm.replace("'", "").replace('"', '').strip()  # and no quotes
                    if PY2:
                        searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote(api_value + '"' + searchterm + '"')
                elif api_value == 'inauthor:':
                    searchterm = fullterm
                    if authorname:
                        searchterm = authorname  # just search for author
                    searchterm = searchterm.strip()
                    if PY2:
                        searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote_plus(api_value + '"' + searchterm + '"')

                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0
                try:
                    while startindex < number_results:

                        self.params['startIndex'] = startindex
                        URL = set_url + '&' + urlencode(self.params)

                        try:
                            jsonresults, in_cache = gb_json_request(URL)
                            if not jsonresults:
                                number_results = 0
                            else:
                                if not in_cache:
                                    api_hits += 1
                                number_results = jsonresults['totalItems']
                                logger.debug('Searching url: ' + URL)
                            if number_results == 0:
                                logger.warn('Found no results for %s with value: %s' % (api_value, searchterm))
                                break
                            else:
                                pass
                        except Exception as err:
                            if hasattr(err, 'reason'):
                                errmsg = err.reason
                            else:
                                errmsg = str(err)
                            logger.warn(
                                'Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                            break

                        startindex += 40

                        for item in jsonresults['items']:
                            total_count += 1

                            book = bookdict(item)
                            if not book['author']:
                                logger.debug('Skipped a result without authorfield.')
                                no_author_count += 1
                                continue

                            if not book['name']:
                                logger.debug('Skipped a result without title.')
                                continue

                            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
                            if "All" not in valid_langs:  # don't care about languages, accept all
                                try:
                                    # skip if language is not in valid list -
                                    booklang = book['lang']
                                    if booklang not in valid_langs:
                                        logger.debug(
                                            'Skipped %s with language %s' % (book['name'], booklang))
                                        ignored += 1
                                        continue
                                except KeyError:
                                    ignored += 1
                                    logger.debug('Skipped %s where no language is found' % book['name'])
                                    continue

                            if authorname:
                                author_fuzz = fuzz.ratio(book['author'], authorname)
                            else:
                                author_fuzz = fuzz.ratio(book['author'], fullterm)

                            if title:
                                if title.endswith(')'):
                                    title = title.rsplit('(', 1)[0]
                                book_fuzz = fuzz.token_set_ratio(book['name'], title)
                                # lose a point for each extra word in the fuzzy matches so we get the closest match
                                words = len(getList(book['name']))
                                words -= len(getList(title))
                                book_fuzz -= abs(words)
                            else:
                                book_fuzz = fuzz.token_set_ratio(book['name'], fullterm)

                            isbn_fuzz = 0
                            if is_valid_isbn(fullterm):
                                isbn_fuzz = 100

                            highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz)

                            dic = {':': '.', '"': '', '\'': ''}
                            bookname = replace_all(book['name'], dic)

                            bookname = unaccented(bookname)
                            bookname = bookname.strip()  # strip whitespace

                            AuthorID = ''
                            if book['author']:
                                match = myDB.match(
                                    'SELECT AuthorID FROM authors WHERE AuthorName=?', (book['author'],))
                                if match:
                                    AuthorID = match['AuthorID']

                            resultlist.append({
                                'authorname': book['author'],
                                'authorid': AuthorID,
                                'bookid': item['id'],
                                'bookname': bookname,
                                'booksub': book['sub'],
                                'bookisbn': book['isbn'],
                                'bookpub': book['pub'],
                                'bookdate': book['date'],
                                'booklang': book['lang'],
                                'booklink': book['link'],
                                'bookrate': float(book['rate']),
                                'bookrate_count': book['rate_count'],
                                'bookimg': book['img'],
                                'bookpages': book['pages'],
                                'bookgenre': book['genre'],
                                'bookdesc': book['desc'],
                                'author_fuzz': author_fuzz,
                                'book_fuzz': book_fuzz,
                                'isbn_fuzz': isbn_fuzz,
                                'highest_fuzz': highest_fuzz,
                                'num_reviews': book['ratings']
                            })

                            resultcount += 1

                except KeyError:
                    break

                logger.debug("Returning %s result%s for (%s) with keyword: %s" %
                             (resultcount, plural(resultcount), api_value, searchterm))

            logger.debug("Found %s result%s" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored)))
            logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count)))
            logger.debug('The Google Books API was hit %s time%s for searchterm: %s' %
                         (api_hits, plural(api_hits), fullterm))
            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())
Example #48
0
def EXTRA(book=None):

    provider = "Extratorrent"
    host = lazylibrarian.EXTRA_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/rss")

    params = {"type": "search", "s_cat": "2", "search": book['searchterm']}
    searchURL = providerurl + "/?%s" % urllib.urlencode(params)

    try:
        request = urllib2.Request(searchURL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST,
                              lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', USER_AGENT)
        data = urllib2.urlopen(request, timeout=90)
    except (socket.timeout) as e:
        logger.debug('Timeout fetching data from %s' % provider)
        data = False
    except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e:
        # may return 404 if no results, not really an error
        if hasattr(e, 'code') and e.code == 404:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            if hasattr(e, 'reason'):
                errmsg = e.reason
            else:
                errmsg = str(e)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, errmsg))
        data = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])

                    try:
                        seeders = int(item['seeders'])
                    except ValueError:
                        seeders = 0

                    try:
                        size = int(item['size'])
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['href']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error(u"An error occurred in the %s parser: %s" %
                                 (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #49
0
def TDL(book=None, test=False):
    errmsg = ''
    provider = "torrentdownloads"
    host = lazylibrarian.CONFIG['TDL_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host)

    params = {
        "type": "search",
        "cid": "2",
        "search": book['searchterm']
    }
    searchURL = providerurl + "/rss.xml?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = item['title']
                    seeders = int(item['seeders'].replace(',', ''))
                    link = item['link']
                    size = int(item['size'])
                    url = None

                    try:
                        pubdate = item['published']
                    except KeyError:
                        pubdate = None

                    if link and minimumseeders < seeders:
                        # no point requesting the magnet link if not enough seeders
                        # TDL gives us a relative link
                        result, success = fetchURL(providerurl+link)
                        if success:
                            new_soup = BeautifulSoup(result, 'html5lib')
                            for link in new_soup.find_all('a'):
                                output = link.get('href')
                                if output and output.startswith('magnet'):
                                    url = output
                                    break

                        if not url or not title:
                            logger.debug('Missing url or title')
                        else:
                            res = {
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                                'tor_type': 'magnet',
                                'priority': lazylibrarian.CONFIG['TDL_DLPRIORITY']
                            }
                            if pubdate:
                                res['tor_date'] = pubdate
                            logger.debug('Found %s. Size: %s' % (title, size))
                            results.append(res)
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))

    return results, errmsg
Example #50
0
def LibraryScan(startdir=None):
    """ Scan a directory tree adding new books into database
        Return how many books you added """
    if not startdir:
        if not lazylibrarian.DESTINATION_DIR:
            return 0
        else:
            startdir = lazylibrarian.DESTINATION_DIR

    if not os.path.isdir(startdir):
        logger.warn('Cannot find directory: %s. Not scanning' % startdir)
        return 0

    myDB = database.DBConnection()

    # keep statistics of full library scans
    if startdir == lazylibrarian.DESTINATION_DIR:
        myDB.action('DELETE from stats')

    logger.info('Scanning ebook directory: %s' % startdir)

    new_book_count = 0
    file_count = 0
    author = ""

    if lazylibrarian.FULL_SCAN and startdir == lazylibrarian.DESTINATION_DIR:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"'
        )
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not (bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' %
                            (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' %
                            bookID)
                logger.warn('Book %s - %s updated as not found on disk' %
                            (bookAuthor, bookName))

    # to save repeat-scans of the same directory if it contains multiple formats of the same book,
    # keep track of which directories we've already looked at
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace(
        "\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(startdir):
        for directory in d[:]:
            # prevent magazine being scanned
            if directory.startswith("_") or directory.startswith("."):
                d.remove(directory)

        for files in f:
            file_count += 1

            if isinstance(r, str):
                r = r.decode(lazylibrarian.SYS_ENCODING)

            subdirectory = r.replace(startdir, '')
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (
                    subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0
                if is_valid_booktype(files):

                    logger.debug("[%s] Now scanning subdirectory %s" %
                                 (startdir, subdirectory))

                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    extn = os.path.splitext(files)[1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == ".epub") or (extn == ".mobi"):
                        book_filename = os.path.join(
                            r.encode(lazylibrarian.SYS_ENCODING),
                            files.encode(lazylibrarian.SYS_ENCODING))

                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if 'title' in res and 'creator' in res:  # this is the minimum we need
                            match = 1
                            book = res['title']
                            author = res['creator']
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'type' in res:
                                extn = res['type']

                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                         (isbn, language, author, book, extn))
                        else:

                            logger.debug("Book meta incomplete in %s" %
                                         book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref

                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        match = 1
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        if 'identifier' in res:
                            isbn = res['identifier']
                        logger.debug("file meta [%s] [%s] [%s] [%s]" %
                                     (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and is_valid_isbn(isbn):
                            logger.debug("Found Language [%s] ISBN [%s]" %
                                         (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action(
                                'SELECT lang FROM languages where isbn = "%s"'
                                % (isbnhead)).fetchone()
                            if not match:
                                myDB.action(
                                    'insert into languages values ("%s", "%s")'
                                    % (isbnhead, language))
                                logger.debug("Cached Lang [%s] ISBN [%s]" %
                                             (language, isbnhead))
                            else:
                                logger.debug(
                                    "Already cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(',')
                            author = words[1].strip() + ' ' + words[0].strip(
                            )  # "forename surname"
                        if author[1] == ' ':
                            author = author.replace(' ', '.')
                            author = author.replace('..', '.')

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' %
                            author).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn(
                                    "Error finding author id for [%s]" %
                                    author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr['authorname']

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace('.', '_')
                                match_auth = match_auth.replace(' ', '_')
                                match_auth = match_auth.replace('__', '_')
                                match_name = authorname.replace('.', '_')
                                match_name = match_name.replace(' ', '_')
                                match_name = match_name.replace('__', '_')
                                match_name = unaccented(match_name)
                                match_auth = unaccented(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(
                                    match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug(
                                        "Failed to match author [%s] fuzz [%d]"
                                        % (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]"
                                        % (match_auth, match_name))

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >=
                                                              90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr['authorname']
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"'
                                        % author).fetchone()
                                    if not check_exist_author:
                                        logger.info("Adding new author [%s]" %
                                                    author)
                                        try:
                                            addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"'
                                                % author).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug(
                                "Failed to match author [%s] in database" %
                                author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', '').replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)

                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)

                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"'
                                    % bookid).fetchone()
                                if check_status['Status'] != 'Open':
                                    # update status as we've got this book

                                    myDB.action(
                                        'UPDATE books set Status="Open" where BookID="%s"'
                                        % bookid)

                                    book_filename = os.path.join(r, files)

                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open

                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"'
                                        % (book_filename, bookid))

                                    # update cover file to cover.jpg in book folder (if exists)
                                    bookdir = book_filename.rsplit(os.sep,
                                                                   1)[0]
                                    coverimg = os.path.join(
                                        bookdir, 'cover.jpg')
                                    cachedir = os.path.join(
                                        str(lazylibrarian.PROG_DIR), 'data' +
                                        os.sep + 'images' + os.sep + 'cache')
                                    cacheimg = os.path.join(
                                        cachedir, bookid + '.jpg')
                                    if os.path.isfile(coverimg):
                                        copyfile(coverimg, cacheimg)

                                    new_book_count += 1
                            else:
                                logger.debug(
                                    "Failed to match book [%s] by [%s] in database"
                                    % (book, author))

    logger.info("%s new/modified book%s found and added to the database" %
                (new_book_count, plural(new_book_count)))
    logger.info("%s file%s processed" % (file_count, plural(file_count)))

    # show statistics of full library scans
    if startdir == lazylibrarian.DESTINATION_DIR:
        stats = myDB.action(
            "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
                sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats"
        ).fetchone()
        if stats['sum(GR_book_hits)'] is not None:
            # only show stats if new books added
            if lazylibrarian.BOOK_API == "GoogleBooks":
                logger.debug("GoogleBooks was hit %s time%s for books" %
                             (stats['sum(GR_book_hits)'],
                              plural(stats['sum(GR_book_hits)'])))
                logger.debug("GoogleBooks language was changed %s time%s" %
                             (stats['sum(GB_lang_change)'],
                              plural(stats['sum(GB_lang_change)'])))
            if lazylibrarian.BOOK_API == "GoodReads":
                logger.debug("GoodReads was hit %s time%s for books" %
                             (stats['sum(GR_book_hits)'],
                              plural(stats['sum(GR_book_hits)'])))
                logger.debug("GoodReads was hit %s time%s for languages" %
                             (stats['sum(GR_lang_hits)'],
                              plural(stats['sum(GR_lang_hits)'])))
            logger.debug("LibraryThing was hit %s time%s for languages" %
                         (stats['sum(LT_lang_hits)'],
                          plural(stats['sum(LT_lang_hits)'])))
            logger.debug(
                "Language cache was hit %s time%s" %
                (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)'])))
            logger.debug(
                "Unwanted language removed %s book%s" %
                (stats['sum(bad_lang)'], plural(stats['sum(bad_lang)'])))
            logger.debug(
                "Unwanted characters removed %s book%s" %
                (stats['sum(bad_char)'], plural(stats['sum(bad_char)'])))
            logger.debug(
                "Unable to cache %s book%s with missing ISBN" %
                (stats['sum(uncached)'], plural(stats['sum(uncached)'])))
            logger.debug(
                "Found %s duplicate book%s" %
                (stats['sum(duplicates)'], plural(stats['sum(duplicates)'])))
            logger.debug(
                "Cache %s hit%s, %s miss" %
                (lazylibrarian.CACHE_HIT, plural(
                    lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS))
            cachesize = myDB.action(
                "select count('ISBN') as counter from languages").fetchone()
            logger.debug("ISBN Language cache holds %s entries" %
                         cachesize['counter'])
            nolang = len(
                myDB.select(
                    'select BookID from Books where status="Open" and BookLang="Unknown"'
                ))
            if nolang:
                logger.warn(
                    "Found %s book%s in your library with unknown language" %
                    (nolang, plural(nolang)))

        authors = myDB.select('select AuthorID from authors')
        # Update bookcounts for all authors, not just new ones - refresh may have located
        # new books for existing authors especially if switched provider gb/gr
    else:
        # single author/book import
        authors = myDB.select(
            'select AuthorID from authors where AuthorName = "%s"' % author)

    logger.debug('Updating bookcounts for %i author%s' %
                 (len(authors), plural(len(authors))))
    for author in authors:
        update_totals(author['AuthorID'])

    images = myDB.select(
        'select bookid, bookimg, bookname from books where bookimg like "http%"'
    )
    if len(images):
        logger.info("Caching cover%s for %i book%s" %
                    (plural(len(images)), len(images), plural(len(images))))
        for item in images:
            bookid = item['bookid']
            bookimg = item['bookimg']
            bookname = item['bookname']
            newimg = cache_cover(bookid, bookimg)
            if newimg is not None:
                myDB.action('update books set BookImg="%s" where BookID="%s"' %
                            (newimg, bookid))

    images = myDB.select(
        'select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"'
    )
    if len(images):
        logger.info("Caching image%s for %i author%s" %
                    (plural(len(images)), len(images), plural(len(images))))
        for item in images:
            authorid = item['authorid']
            authorimg = item['authorimg']
            authorname = item['authorname']
            newimg = cache_cover(authorid, authorimg)
            if newimg is not None:
                myDB.action(
                    'update authors set AuthorImg="%s" where AuthorID="%s"' %
                    (newimg, authorid))
    setWorkPages()
    logger.info('Library scan complete')
    return new_book_count
Example #51
0
def EXTRA(book=None):

    provider = "Extratorrent"
    host = lazylibrarian.EXTRA_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/rss")

    params = {
        "type": "search",
        "s_cat": "2",
        "search": book['searchterm']
    }
    searchURL = providerurl + "/?%s" % urllib.urlencode(params)

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
        else:
            logger.debug('Error fetching data from %s: %s' % (provider, data))
        data = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])

                    try:
                        seeders = int(item['seeders'])
                    except ValueError:
                        seeders = 0

                    try:
                        size = int(item['size'])
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['href']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #52
0
def RSS(host=None, feednr=None, priority=0):
    """
    Generic RSS query function, just return all the results from the RSS feed in a list
    """
    results = []

    if not str(host)[:4] == "http":
        host = 'http://' + host

    URL = host

    result, success = fetchURL(URL)
    if success:
        data = feedparser.parse(result)
    else:
        logger.error('Error fetching data from %s: %s' % (host, result))
        BlockProvider(host, result)
        data = None

    if data:
        # to debug because of api
        logger.debug('Parsing results from %s' % URL)
        provider = data['feed']['link']
        logger.debug("RSS %s returned %i result%s" %
                     (provider, len(data.entries), plural(len(data.entries))))
        for post in data.entries:
            title = None
            magnet = None
            size = None
            torrent = None
            nzb = None
            url = None
            tortype = 'torrent'

            if 'title' in post:
                title = post.title
            if 'links' in post:
                for f in post.links:
                    if 'x-bittorrent' in f['type']:
                        size = f['length']
                        torrent = f['href']
                        break
                    if 'x-nzb' in f['type']:
                        size = f['length']
                        nzb = f['href']
                        break

            if 'torrent_magneturi' in post:
                magnet = post.torrent_magneturi

            if torrent:
                url = torrent
                tortype = 'torrent'

            if magnet:
                if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']):
                    url = magnet
                    tortype = 'magnet'

            if nzb:  # prefer nzb over torrent/magnet
                url = nzb
                tortype = 'nzb'

            if not url:
                if 'link' in post:
                    url = post.link

            tor_date = 'Fri, 01 Jan 1970 00:00:00 +0100'
            if 'newznab_attr' in post:
                if post.newznab_attr['name'] == 'usenetdate':
                    tor_date = post.newznab_attr['value']

            if not size:
                size = 1000
            if title and url:
                results.append({
                    'tor_prov': provider,
                    'tor_title': title,
                    'tor_url': url,
                    'tor_size': str(size),
                    'tor_date': tor_date,
                    'tor_feed': feednr,
                    'tor_type': tortype,
                    'priority': priority
                })
    else:
        logger.debug('No data returned from %s' % host)
    return results
Example #53
0
def TPB(book=None):

    provider = "TPB"
    host = lazylibrarian.TPB_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/s/?q=" + book['searchterm'])

    params = {
        "category": "601",
        "page": "0",
        "orderby": "99"
    }
    searchURL = providerurl + "&%s" % urllib.urlencode(params)

    result, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in result:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
            result = False
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, result))
        result = False

    results = []

    if result:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
        soup = BeautifulSoup(result)
        try:
            table = soup.findAll('table')[0]
            rows = table.findAll('tr')
        except Exception:   # no results = no table in result page
            rows = []

        c1 = []
        c2 = []

        if len(rows) > 1:
            for row in rows[1:]:
                if len(row.findAll('td')) > 2:
                    c1.append(row.findAll('td')[1])
                    c2.append(row.findAll('td')[2])

        for col1, col2 in zip(c1, c2):
            try:
                title = unaccented(str(col1).split('title=')[1].split('>')[1].split('<')[0])
                magnet = str(col1).split('href="')[1].split('"')[0]
                size = unaccented(col1.text.split(', Size ')[1].split('iB')[0])
                mult = 1
                try:
                    if 'K' in size:
                        size = size.split('K')[0]
                        mult = 1024
                    elif 'M' in size:
                        size = size.split('M')[0]
                        mult = 1024 * 1024
                    size = int(float(size) * mult)
                except (ValueError, IndexError):
                    size = 0
                try:
                    seeders = int(col2.text)
                except ValueError:
                    seeders = 0

                if magnet and minimumseeders < seeders:
                    # no point in asking for magnet link if not enough seeders
                    magurl = '%s/%s' % (host, magnet)
                    result, success = fetchURL(magurl)
                    if not success:
                        logger.debug('Error fetching url %s, %s' % (magurl, result))
                    else:
                        magnet = None
                        new_soup = BeautifulSoup(result)
                        for link in new_soup.findAll('a'):
                            output = link.get('href')
                            if output and output.startswith('magnet'):
                                magnet = output
                                break
                    if not magnet or not title:
                        logger.debug('Missing magnet or title')
                    else:
                        if minimumseeders < seeders:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': magnet,
                                'tor_size': str(size),
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                        else:
                            logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))
                else:
                    logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))
            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #54
0
def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None):
    """
    Generic NewzNabplus query function
    takes in host+key+type and returns the result set regardless of who
    based on site running NewzNab+
    ref http://usenetreviewz.com/nzb-sites/
    """

    host = provider['HOST']
    api_key = provider['API']
    logger.debug(
        '[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]'
        % (searchType, host, searchMode, api_key, str(book)))

    results = []

    params = ReturnSearchTypeStructure(provider, api_key, book, searchType,
                                       searchMode)

    if params:
        if not str(host)[:4] == "http":
            host = 'http://' + host
        if host[-1:] == '/':
            host = host[:-1]
        URL = host + '/api?' + urllib.urlencode(params)

        sterm = book['searchterm']
        if isinstance(sterm, str) and hasattr(sterm, "decode"):
            sterm = sterm.decode('utf-8')

        rootxml = None
        logger.debug("[NewzNabPlus] URL = %s" % URL)
        result, success = fetchURL(URL)
        if success:
            try:
                rootxml = ElementTree.fromstring(result)
            except Exception as e:
                logger.error('Error parsing data from %s: %s %s' %
                             (host, type(e).__name__, str(e)))
                rootxml = None
        else:
            if not result or result == "''":
                result = "Got an empty response"
            logger.error('Error reading data from %s: %s' % (host, result))
            BlockProvider(host, result)

        if rootxml is not None:
            # to debug because of api
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (URL, host))

            if rootxml.tag == 'error':
                errormsg = rootxml.get('description', default='unknown error')
                logger.error("%s - %s" % (host, errormsg))
                # maybe the host doesn't support the search type
                match = False
                if (provider['BOOKSEARCH'] and searchType in ["book", "shortbook"]) or \
                        (provider['AUDIOSEARCH'] and searchType in ["audio", "shortaudio"]):
                    errorlist = [
                        'no such function', 'unknown parameter',
                        'unknown function', 'bad request',
                        'incorrect parameter', 'does not support'
                    ]
                    for item in errorlist:
                        if item in errormsg.lower():
                            match = True
                    if match:
                        count = 0
                        if searchType in ["book", "shortbook"]:
                            msg = 'BOOKSEARCH'
                        elif searchType in ["audio", "shortaudio"]:
                            msg = 'AUDIOSEARCH'
                        else:
                            msg = ''
                        if not msg:
                            logger.error(
                                'Error trying to disable searchtype [%s] for %s'
                                % (searchType, host))
                        else:
                            while count < len(lazylibrarian.NEWZNAB_PROV):
                                if lazylibrarian.NEWZNAB_PROV[count][
                                        'HOST'] == provider['HOST']:
                                    if str(provider['MANUAL']) == 'False':
                                        logger.error("Disabled %s=%s for %s" %
                                                     (msg, provider[msg],
                                                      provider['HOST']))
                                        lazylibrarian.NEWZNAB_PROV[count][
                                            msg] = ""
                                        threadname = threading.currentThread(
                                        ).name
                                        lazylibrarian.config_write()
                                        threading.currentThread(
                                        ).name = threadname
                                    else:
                                        logger.error(
                                            "Unable to disable %s for %s [MANUAL=%s]"
                                            % (msg, provider['HOST'],
                                               provider['MANUAL']))
                                count += 1
                if not match:
                    BlockProvider(provider['HOST'], errormsg)
            else:
                resultxml = rootxml.getiterator('item')
                nzbcount = 0
                maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0)
                for nzb in resultxml:
                    try:
                        thisnzb = ReturnResultsFieldsBySearchType(
                            book, nzb, host, searchMode,
                            provider['DLPRIORITY'])
                        if not maxage:
                            nzbcount += 1
                            results.append(thisnzb)
                        else:
                            # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200
                            nzbdate = thisnzb['nzbdate']
                            try:
                                parts = nzbdate.split(' ')
                                nzbdate = ' '.join(
                                    parts[:5])  # strip the +0200
                                dt = datetime.datetime.strptime(
                                    nzbdate,
                                    "%a, %d %b %Y %H:%M:%S").timetuple()
                                nzbage = age(
                                    '%04d-%02d-%02d' %
                                    (dt.tm_year, dt.tm_mon, dt.tm_mday))
                            except Exception as e:
                                logger.debug(
                                    'Unable to get age from [%s] %s %s' %
                                    (thisnzb['nzbdate'], type(e).__name__,
                                     str(e)))
                                nzbage = 0
                            if nzbage <= maxage:
                                nzbcount += 1
                                results.append(thisnzb)
                            else:
                                logger.debug('%s is too old (%s day%s)' %
                                             (thisnzb['nzbtitle'], nzbage,
                                              plural(nzbage)))

                    except IndexError:
                        logger.debug('No results from %s for %s' %
                                     (host, sterm))
                logger.debug('Found %s nzb at %s for: %s' %
                             (nzbcount, host, sterm))
        else:
            logger.debug('No data returned from %s for %s' % (host, sterm))
    return results
Example #55
0
def GEN(book=None):

    provider = "libgen"
    host = lazylibrarian.GEN_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    searchURL = url_fix(host + "/search.php?view=simple&open=0&phrase=0&column=def&res=100&req=" +
                        book['searchterm'])

    result, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in result:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
        elif '111' in result:
            # looks like libgen has ip based access limits
            logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, result))
        result = False

    results = []

    if result:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        soup = BeautifulSoup(result)
        try:
            table = soup.findAll('table')[2]
            rows = table.findAll('tr')
        except Exception:  # no results = no table in result page
            rows = []

        c1 = []
        c2 = []
        c7 = []
        c8 = []

        if len(rows) > 1:
            for row in rows[1:]:
                if len(row.findAll('td')) > 8:
                    c1.append(row.findAll('td')[1])
                    c2.append(row.findAll('td')[2])
                    c7.append(row.findAll('td')[7])
                    c8.append(row.findAll('td')[8])

        for col1, col2, col7, col8 in zip(c1, c2, c7, c8):
            try:
                author = unaccented(col1.text)
                title = unaccented(str(col2).split('>')[2].split('<')[0].strip())
                link = str(col2).split('href="')[1].split('?')[1].split('"')[0]
                size = unaccented(col7.text).upper()
                extn = col8.text

                try:
                    mult = 1
                    if 'K' in size:
                        size = size.split('K')[0]
                        mult = 1024
                    elif 'M' in size:
                        size = size.split('M')[0]
                        mult = 1024 * 1024
                    size = int(float(size) * mult)
                except (ValueError, IndexError) as e:
                    size = 0

                if link and title:
                    if author:
                        title = author.strip() + ' ' + title.strip()
                    if extn:
                        title = title + '.' + extn

                    bookURL = url_fix(host + "/ads.php?" + link)
                    bookresult, success = fetchURL(bookURL)
                    if not success:
                        # may return 404 if no results, not really an error
                        if '404' in bookresult:
                            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
                        else:
                            logger.debug(bookURL)
                            logger.debug('Error fetching data from %s: %s' % (provider, bookresult))
                        bookresult = False
                    if bookresult:
                        url = None
                        new_soup = BeautifulSoup(bookresult)
                        for link in new_soup.findAll('a'):
                            output = link.get('href')
                            if output and output.startswith('/get.php'):
                                url = output
                                break

                        if url:
                            url = url_fix(host + url)
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                            })
                            logger.debug('Found %s, Size %s' % (title, size))

            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results
Example #56
0
def GEN(book=None, prov=None, test=False):
    errmsg = ''
    provider = "libgen.io"
    if prov is None:
        prov = 'GEN'
    host = lazylibrarian.CONFIG[prov + '_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    search = lazylibrarian.CONFIG[prov + '_SEARCH']
    if not search or not search.endswith('.php'):
        search = 'search.php'
    if 'index.php' not in search and 'search.php' not in search:
        search = 'search.php'
    if search[0] == '/':
        search = search[1:]

    sterm = makeUnicode(book['searchterm'])

    page = 1
    results = []
    next_page = True

    while next_page:
        if 'index.php' in search:
            params = {
                "s": book['searchterm'],
                "f_lang": "All",
                "f_columns": 0,
                "f_ext": "All"
            }
        else:
            params = {
                "view": "simple",
                "open": 0,
                "phrase": 0,
                "column": "def",
                "res": 100,
                "req": book['searchterm']
            }

        if page > 1:
            params['page'] = page

        providerurl = url_fix(host + "/%s" % search)
        searchURL = providerurl + "?%s" % urllib.urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" %
                             (provider, sterm))
                success = True
            elif '111' in result:
                # looks like libgen has ip based access limits
                logger.error(
                    'Access forbidden. Please wait a while before trying %s again.'
                    % provider)
                errmsg = result
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching page data from %s: %s' %
                             (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (searchURL, provider))
            try:
                soup = BeautifulSoup(result, 'html5lib')
                try:
                    table = soup.find_all('table')[2]  # un-named table
                    if table:
                        rows = table.find_all('tr')
                except IndexError:  # no results table in result page
                    rows = []

                if 'search.php' in search and len(rows) > 1:
                    rows = rows[1:]

                for row in rows:
                    author = ''
                    title = ''
                    size = ''
                    extn = ''
                    link = ''
                    td = row.find_all('td')
                    if 'index.php' in search and len(td) > 3:
                        try:
                            author = formatAuthorName(td[0].text)
                            title = td[2].text
                            newsoup = BeautifulSoup(str(td[4]), 'html5lib')
                            data = newsoup.find('a')
                            link = data.get('href')
                            extn = data.text.split('(')[0]
                            size = data.text.split('(')[1].split(')')[0]
                            size = size.upper()
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen index.php results: %s' %
                                str(e))

                    elif 'search.php' in search and len(td) > 8:
                        try:
                            author = formatAuthorName(td[1].text)
                            title = td[2].text
                            size = td[7].text.upper()
                            extn = td[8].text
                            newsoup = BeautifulSoup(str(td[2]), 'html5lib')
                            link = newsoup.get('href')
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen search.php results; %s' %
                                str(e))

                    if not size:
                        size = 0
                    else:
                        try:
                            mult = 1
                            if 'K' in size:
                                size = size.split('K')[0]
                                mult = 1024
                            elif 'M' in size:
                                size = size.split('M')[0]
                                mult = 1024 * 1024
                            elif 'G' in size:
                                size = size.split('G')[0]
                                mult = 1024 * 1024 * 1024
                            size = int(float(size) * mult)
                        except (ValueError, IndexError):
                            size = 0

                    if link and title:
                        if author:
                            title = author.strip() + ' ' + title.strip()
                        if extn:
                            title = title + '.' + extn

                        if not link.startswith('http'):
                            if "/ads.php?" in link:
                                url = url_fix(host + link)
                            else:
                                url = url_fix(host + "/ads.php?" + link)
                        else:
                            url = redirect_url(host, link)

                        bookresult, success = fetchURL(url)
                        if not success:
                            # may return 404 if no results, not really an error
                            if '404' in bookresult:
                                logger.debug(
                                    "No results found from %s for %s" %
                                    (provider, sterm))
                            else:
                                logger.debug(url)
                                logger.debug(
                                    'Error fetching link data from %s: %s' %
                                    (provider, bookresult))
                                errmsg = bookresult
                            bookresult = False

                        if bookresult:
                            url = None
                            try:
                                new_soup = BeautifulSoup(
                                    bookresult, 'html5lib')
                                for link in new_soup.find_all('a'):
                                    output = link.get('href')
                                    if output:
                                        if output.startswith(
                                                'http'
                                        ) and '/get.php' in output:
                                            url = output
                                            break
                                        elif '/get.php' in output:
                                            url = '/get.php' + output.split(
                                                '/get.php')[1]
                                            break
                                        elif '/download/book' in output:
                                            url = '/download/book' + output.split(
                                                '/download/book')[1]
                                            break

                                if url and not url.startswith('http'):
                                    url = url_fix(host + url)
                                else:
                                    url = redirect_url(host, url)
                            except Exception as e:
                                logger.debug(
                                    '%s parsing bookresult for %s: %s' %
                                    (type(e).__name__, link, str(e)))
                                url = None

                        if url:
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider + '/' + search,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                'direct',
                                'priority':
                                lazylibrarian.CONFIG[prov + '_DLPRIORITY']
                            })
                            logger.debug('Found %s, Size %s' % (title, size))
                        next_page = True

            except Exception as e:
                logger.error("An error occurred in the %s parser: %s" %
                             (provider, str(e)))
                logger.debug('%s: %s' % (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn(
                'Maximum results page search reached, still more results available'
            )
            next_page = False

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg
Example #57
0
def search_rss_book(books=None, library=None):
    """
    books is a list of new books to add, or None for backlog search
    library is "eBook" or "AudioBook" or None to search all book types
    """
    if not (lazylibrarian.USE_RSS()):
        logger.warn('RSS search is disabled')
        scheduleJob(action='Stop', target='search_rss_book')
        return
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if not books:
                threading.currentThread().name = "SEARCHALLRSS"
            else:
                threading.currentThread().name = "SEARCHRSS"

        myDB = database.DBConnection()

        searchbooks = []
        if not books:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus '
            cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") '
            cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc'
            results = myDB.select(cmd)
            for terms in results:
                searchbooks.append(terms)
        else:
            # The user has added a new book
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus '
                cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID'
                results = myDB.select(cmd, (book['bookid'],))
                for terms in results:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            logger.debug("SearchRSS - No books to search for")
            return

        resultlist, nproviders, _ = IterateOverRSSSites()
        if not nproviders:
            logger.warn('No rss providers are available')
            scheduleJob(action='Stop', target='search_rss_book')
            return  # No point in continuing

        logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks))))

        searchlist = []
        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            if library is None or library == 'eBook':
                if searchbook['Status'] == "Wanted":
                    cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="eBook" and Status="Snatched"'
                    snatched = myDB.match(cmd, (searchbook["BookID"],))
                    if snatched:
                        logger.warn('eBook %s %s already marked snatched in wanted table' %
                                    (searchbook['AuthorName'], searchbook['BookName']))
                    else:
                        searchlist.append(
                            {"bookid": searchbook['BookID'],
                             "bookName": searchbook['BookName'],
                             "bookSub": searchbook['BookSub'],
                             "authorName": searchbook['AuthorName'],
                             "library": "eBook",
                             "searchterm": searchterm})

            if library is None or library == 'AudioBook':
                if searchbook['AudioStatus'] == "Wanted":
                    cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="AudioBook" and Status="Snatched"'
                    snatched = myDB.match(cmd, (searchbook["BookID"],))
                    if snatched:
                        logger.warn('AudioBook %s %s already marked snatched in wanted table' %
                                    (searchbook['AuthorName'], searchbook['BookName']))
                    else:
                        searchlist.append(
                            {"bookid": searchbook['BookID'],
                             "bookName": searchbook['BookName'],
                             "bookSub": searchbook['BookSub'],
                             "authorName": searchbook['AuthorName'],
                             "library": "AudioBook",
                             "searchterm": searchterm})

        rss_count = 0
        for book in searchlist:
            if book['library'] == 'AudioBook':
                searchtype = 'audio'
            else:
                searchtype = 'book'
            found = processResultList(resultlist, book, searchtype, 'rss')

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:  # anything to shorten?
                searchtype = 'short' + searchtype
                found = processResultList(resultlist, book, searchtype, 'rss')

            if not found:
                logger.info("RSS Searches for %s %s returned no results." % (book['library'], book['searchterm']))
            if found > 1:
                rss_count += 1

        logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count)))

    except Exception:
        logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Example #58
0
def cleanCache():
    """ Remove unused files from the cache - delete if expired or unused.
        Check JSONCache  WorkCache  XMLCache  SeriesCache Author  Book
        Check covers and authorimages referenced in the database exist and change database entry if missing """

    myDB = database.DBConnection()
    result = []
    cache = os.path.join(lazylibrarian.CACHEDIR, "JSONCache")
    # ensure directory is unicode so we get unicode results from listdir
    if isinstance(cache, str):
        cache = cache.decode(lazylibrarian.SYS_ENCODING)
    cleaned = 0
    kept = 0
    if os.path.isdir(cache):
        for cached_file in os.listdir(cache):
            target = os.path.join(cache, cached_file)
            cache_modified_time = os.stat(target).st_mtime
            time_now = time.time()
            if cache_modified_time < time_now - (
                    lazylibrarian.CONFIG['CACHE_AGE'] * 24 * 60 *
                    60):  # expire after this many seconds
                # Cache is old, delete entry
                os.remove(target)
                cleaned += 1
            else:
                kept += 1
    msg = "Cleaned %i file%s from JSONCache, kept %i" % (cleaned,
                                                         plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)

    cache = os.path.join(lazylibrarian.CACHEDIR, "XMLCache")
    # ensure directory is unicode so we get unicode results from listdir
    if isinstance(cache, str):
        cache = cache.decode(lazylibrarian.SYS_ENCODING)
    cleaned = 0
    kept = 0
    if os.path.isdir(cache):
        for cached_file in os.listdir(cache):
            target = os.path.join(cache, cached_file)
            cache_modified_time = os.stat(target).st_mtime
            time_now = time.time()
            if cache_modified_time < time_now - (
                    lazylibrarian.CONFIG['CACHE_AGE'] * 24 * 60 *
                    60):  # expire after this many seconds
                # Cache is old, delete entry
                os.remove(target)
                cleaned += 1
            else:
                kept += 1
    msg = "Cleaned %i file%s from XMLCache, kept %i" % (cleaned,
                                                        plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)

    cache = os.path.join(lazylibrarian.CACHEDIR, "WorkCache")
    # ensure directory is unicode so we get unicode results from listdir
    if isinstance(cache, str):
        cache = cache.decode(lazylibrarian.SYS_ENCODING)
    cleaned = 0
    kept = 0
    if os.path.isdir(cache):
        for cached_file in os.listdir(cache):
            target = os.path.join(cache, cached_file)
            try:
                bookid = cached_file.split('.')[0]
            except IndexError:
                logger.error('Clean Cache: Error splitting %s' % cached_file)
                continue
            item = myDB.match('select BookID from books where BookID="%s"' %
                              bookid)
            if not item:
                # WorkPage no longer referenced in database, delete cached_file
                os.remove(target)
                cleaned += 1
            else:
                kept += 1
    msg = "Cleaned %i file%s from WorkCache, kept %i" % (cleaned,
                                                         plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)

    cache = os.path.join(lazylibrarian.CACHEDIR, "SeriesCache")
    # ensure directory is unicode so we get unicode results from listdir
    if isinstance(cache, str):
        cache = cache.decode(lazylibrarian.SYS_ENCODING)
    cleaned = 0
    kept = 0
    if os.path.isdir(cache):
        for cached_file in os.listdir(cache):
            target = os.path.join(cache, cached_file)
            try:
                seriesid = cached_file.split('.')[0]
            except IndexError:
                logger.error('Clean Cache: Error splitting %s' % cached_file)
                continue
            item = myDB.match(
                'select SeriesID from series where SeriesID="%s"' % seriesid)
            if not item:
                # SeriesPage no longer referenced in database, delete cached_file
                os.remove(target)
                cleaned += 1
            else:
                kept += 1
    msg = "Cleaned %i file%s from SeriesCache, kept %i" % (
        cleaned, plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)

    cache = lazylibrarian.CACHEDIR
    cleaned = 0
    kept = 0
    cachedir = os.path.join(cache, 'author')
    if os.path.isdir(cachedir):
        for cached_file in os.listdir(cachedir):
            target = os.path.join(cachedir, cached_file)
            if os.path.isfile(target):
                try:
                    imgid = cached_file.split('.')[0].rsplit(os.sep)[-1]
                except IndexError:
                    logger.error('Clean Cache: Error splitting %s' %
                                 cached_file)
                    continue
                item = myDB.match(
                    'select AuthorID from authors where AuthorID="%s"' % imgid)
                if not item:
                    # Author Image no longer referenced in database, delete cached_file
                    os.remove(target)
                    cleaned += 1
                else:
                    kept += 1
    cachedir = os.path.join(cache, 'book')
    if os.path.isdir(cachedir):
        for cached_file in os.listdir(cachedir):
            target = os.path.join(cachedir, cached_file)
            if os.path.isfile(target):
                try:
                    imgid = cached_file.split('.')[0].rsplit(os.sep)[-1]
                except IndexError:
                    logger.error('Clean Cache: Error splitting %s' %
                                 cached_file)
                    continue
                item = myDB.match(
                    'select BookID from books where BookID="%s"' % imgid)
                if not item:
                    # Book Image no longer referenced in database, delete cached_file
                    os.remove(target)
                    cleaned += 1
                else:
                    kept += 1

    # at this point there should be no more .jpg files in the root of the cachedir
    # any that are still there are for books/authors deleted from database
    for cached_file in os.listdir(cache):
        if cached_file.endswith('.jpg'):
            os.remove(os.path.join(cache, cached_file))
            cleaned += 1
    msg = "Cleaned %i file%s from ImageCache, kept %i" % (
        cleaned, plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)

    # verify the cover images referenced in the database are present
    images = myDB.action('select BookImg,BookName,BookID from books')
    cachedir = os.path.join(lazylibrarian.CACHEDIR, 'book')
    cleaned = 0
    kept = 0
    for item in images:
        keep = True
        imgfile = ''
        if item['BookImg'] is None or item['BookImg'] == '':
            keep = False
        if keep and not item['BookImg'].startswith(
                'http') and not item['BookImg'] == "images/nocover.png":
            # html uses '/' as separator, but os might not
            imgname = item['BookImg'].rsplit('/')[-1]
            imgfile = os.path.join(cachedir, imgname)
            if not os.path.isfile(imgfile):
                keep = False
        if keep:
            kept += 1
        else:
            cleaned += 1
            logger.debug('Cover missing for %s %s' %
                         (item['BookName'], imgfile))
            myDB.action(
                'update books set BookImg="images/nocover.png" where Bookid="%s"'
                % item['BookID'])

    msg = "Cleaned %i missing cover file%s, kept %i" % (cleaned,
                                                        plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)

    # verify the author images referenced in the database are present
    images = myDB.action('select AuthorImg,AuthorName,AuthorID from authors')
    cachedir = os.path.join(lazylibrarian.CACHEDIR, 'author')
    cleaned = 0
    kept = 0
    for item in images:
        keep = True
        imgfile = ''
        if item['AuthorImg'] is None or item['AuthorImg'] == '':
            keep = False
        if keep and not item['AuthorImg'].startswith(
                'http') and not item['AuthorImg'] == "images/nophoto.png":
            # html uses '/' as separator, but os might not
            imgname = item['AuthorImg'].rsplit('/')[-1]
            imgfile = os.path.join(cachedir, imgname)
            if not os.path.isfile(imgfile):
                keep = False
        if keep:
            kept += 1
        else:
            cleaned += 1
            logger.debug('Image missing for %s %s' %
                         (item['AuthorName'], imgfile))
            myDB.action(
                'update authors set AuthorImg="images/nophoto.png" where AuthorID="%s"'
                % item['AuthorID'])

    msg = "Cleaned %i missing author image%s, kept %i" % (
        cleaned, plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)
    return result
Example #59
0
def searchItem(item=None, bookid=None, cat=None):
    """
    Call all active search providers to search for item
    return a list of results, each entry in list containing percentage_match, title, provider, size, url
    item = searchterm to use for general search
    bookid = link to data for book/audio searches
    cat = category to search [general, book, audio]
    """
    results = []

    if not item:
        return results

    book = {}
    searchterm = unaccented_str(item)

    book['searchterm'] = searchterm
    if bookid:
        book['bookid'] = bookid
    else:
        book['bookid'] = searchterm

    if cat in ['book', 'audio']:
        myDB = database.DBConnection()
        cmd = 'SELECT authorName,bookName,bookSub from books,authors WHERE books.AuthorID=authors.AuthorID'
        cmd += ' and bookID=?'
        match = myDB.match(cmd, (bookid,))
        if match:
            book['authorName'] = match['authorName']
            book['bookName'] = match['bookName']
            book['bookSub'] = match['bookSub']
        else:
            logger.debug('Forcing general search')
            cat = 'general'

    nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR() + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT()
    logger.debug('Searching %s provider%s (%s) for %s' % (nprov, plural(nprov), cat, searchterm))

    if lazylibrarian.USE_NZB():
        resultlist, nprov = IterateOverNewzNabSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_TOR():
        resultlist, nprov = IterateOverTorrentSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_DIRECT():
        resultlist, nprov = IterateOverDirectSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_RSS():
        resultlist, nprov = IterateOverRSSSites()
        if nprov:
            results += resultlist

    # reprocess to get consistent results
    searchresults = []
    for item in results:
        provider = ''
        title = ''
        url = ''
        size = ''
        date = ''
        mode = ''
        if 'nzbtitle' in item:
            title = item['nzbtitle']
        if 'nzburl' in item:
            url = item['nzburl']
        if 'nzbprov' in item:
            provider = item['nzbprov']
        if 'nzbsize' in item:
            size = item['nzbsize']
        if 'nzbdate' in item:
            date = item['nzbdate']
        if 'nzbmode' in item:
            mode = item['nzbmode']
        if 'tor_title' in item:
            title = item['tor_title']
        if 'tor_url' in item:
            url = item['tor_url']
        if 'tor_prov' in item:
            provider = item['tor_prov']
        if 'tor_size' in item:
            size = item['tor_size']
        if 'tor_date' in item:
            date = item['tor_date']
        if 'tor_type' in item:
            mode = item['tor_type']

        if title and provider and mode and url:
            # Not all results have a date or a size
            if not date:
                date = 'Fri, 01 Jan 1970 00:00:00 +0100'
            if not size:
                size = '1000'

            # calculate match percentage - torrents might have words_with_underscore_separator
            score = fuzz.token_set_ratio(searchterm, title.replace('_', ' '))
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(searchterm))
            words -= len(getList(title))
            score -= abs(words)
            if score >= 40:  # ignore wildly wrong results?
                if not url.startswith('magnet'):
                    if not mode == 'torznab' and not mode == 'direct':  # what is this split for??
                        url = url.split('?')[0]
                result = {'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date,
                          'url': urllib.quote_plus(url), 'mode': mode}

                searchresults.append(result)

                # from operator import itemgetter
                # searchresults = sorted(searchresults, key=itemgetter('score'), reverse=True)

    logger.debug('Found %s %s results for %s' % (len(searchresults), cat, searchterm))
    return searchresults
Example #60
0
def EXTRA(book=None, test=False):
    errmsg = ''
    provider = "Extratorrent"
    host = lazylibrarian.CONFIG['EXTRA_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/rss")

    params = {"type": "search", "s_cat": "2", "search": book['searchterm']}
    searchURL = providerurl + "/?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])

                    try:
                        seeders = int(item['seeders'])
                    except ValueError:
                        seeders = 0

                    try:
                        size = int(item['size'])
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['href']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            'torrent',
                            'priority':
                            lazylibrarian.CONFIG['EXTRA_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" %
                                 (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg