Python plural Examples

Programming Language: Python

Namespace/Package Name: lazylibrarian.formatter

Method/Function: plural

Examples at hotexamples.com: 60

Python plural - 60 examples found. These are the top rated real world Python examples of lazylibrarian.formatter.plural extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: bookwork.py Project: DobyTang/LazyLibrarian

def setAllBookSeries():
    """ Try to set series details for all books """
    myDB = database.DBConnection()
    books = myDB.select('select BookID,WorkID,BookName from books where Manual is not "1"')
    counter = 0
    if books:
        logger.info('Checking series for %s book%s' % (len(books), plural(len(books))))
        for book in books:
            if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
                workid = book['WorkID']
                if not workid:
                    logger.debug("No workid for book %s: %s" % (book['BookID'], book['BookName']))
            else:
                workid = book['BookID']
                if not workid:
                    logger.debug("No bookid for book: %s" % book['BookName'])
            if workid:
                serieslist = getWorkSeries(workid)
                if serieslist:
                    counter += 1
                    setSeries(serieslist, book['BookID'])
    deleteEmptySeries()
    msg = 'Updated %s book%s' % (counter, plural(counter))
    logger.info('Series check complete: ' + msg)
    return msg

Example #2

Show file

File: bookwork.py Project: knobunc/LazyLibrarian

def setWorkPages():
    """ Set the workpage link for any books that don't already have one """

    myDB = database.DBConnection()
    cmd = 'select BookID,AuthorName,BookName from books,authors where length(WorkPage) < 4'
    cmd += ' and books.AuthorID = authors.AuthorID'
    books = myDB.select(cmd)
    if books:
        logger.debug('Setting WorkPage for %s book%s' % (len(books), plural(len(books))))
        counter = 0
        for book in books:
            bookid = book['BookID']
            worklink = getWorkPage(bookid)
            if worklink:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"WorkPage": worklink}
                myDB.upsert("books", newValueDict, controlValueDict)
                counter += 1
            else:
                logger.debug('No WorkPage found for %s: %s' % (book['AuthorName'], book['BookName']))
        msg = 'Updated %s page%s' % (counter, plural(counter))
        logger.debug("setWorkPages complete: " + msg)
    else:
        msg = 'No missing WorkPages'
        logger.debug(msg)
    return msg

Example #3

Show file

def showJobs():
        result = []
        result.append("Cache %i hit%s, %i miss" % ( int(lazylibrarian.CACHE_HIT),
                        plural(int(lazylibrarian.CACHE_HIT)), int(lazylibrarian.CACHE_MISS)))
        myDB = database.DBConnection()
        snatched = myDB.match("SELECT count('Status') as counter from wanted WHERE Status = 'Snatched'")
        wanted = myDB.match("SELECT count('Status') as counter FROM books WHERE Status = 'Wanted'")
        result.append("%i item%s marked as Snatched" % (snatched['counter'], plural(snatched['counter'])))
        result.append("%i item%s marked as Wanted" % (wanted['counter'], plural(wanted['counter'])))
        for job in lazylibrarian.SCHED.get_jobs():
            job = str(job)
            if "search_magazines" in job:
                jobname = "Magazine search"
            elif "checkForUpdates" in job:
                jobname = "Check LazyLibrarian version"
            elif "search_tor_book" in job:
                jobname = "TOR book search"
            elif "search_nzb_book" in job:
                jobname = "NZB book search"
            elif "search_rss_book" in job:
                jobname = "RSS book search"
            elif "processDir" in job:
                jobname = "Process downloads"
            else:
                jobname = job.split(' ')[0].split('.')[2]

            jobinterval = job.split('[')[1].split(']')[0]
            jobtime = job.split('at: ')[1].split('.')[0]
            jobtime = next_run(jobtime)
            jobinfo = "%s: Next run in %s" % (jobname, jobtime)
            result.append(jobinfo)
        return result

Example #4

Show file

File: images.py Project: knobunc/LazyLibrarian

def getAuthorImages():
    """ Try to get an author image for all authors without one"""
    myDB = database.DBConnection()
    cmd = 'select AuthorID, AuthorName from authors where (AuthorImg like "%nophoto%" or AuthorImg is null)'
    cmd += ' and Manual is not "1"'
    authors = myDB.select(cmd)
    if authors:
        logger.info('Checking images for %s author%s' % (len(authors), plural(len(authors))))
        counter = 0
        for author in authors:
            authorid = author['AuthorID']
            imagelink = getAuthorImage(authorid)
            newValueDict = {}
            if not imagelink:
                logger.debug('No image found for %s' % author['AuthorName'])
                newValueDict = {"AuthorImg": 'images/nophoto.png'}
            elif 'nophoto' not in imagelink:
                logger.debug('Updating %s image to %s' % (author['AuthorName'], imagelink))
                newValueDict = {"AuthorImg": imagelink}

            if newValueDict:
                counter += 1
                controlValueDict = {"AuthorID": authorid}
                myDB.upsert("authors", newValueDict, controlValueDict)

        msg = 'Updated %s image%s' % (counter, plural(counter))
        logger.info('Author Image check complete: ' + msg)
    else:
        msg = 'No missing author images'
        logger.debug(msg)
    return msg

Example #5

Show file

File: csv.py Project: forge33/LazyLibrarian

def export_CSV(search_dir=None, status="Wanted"):
    """ Write a csv file to the search_dir containing all books marked as "Wanted" """

    if not search_dir or os.path.isdir(search_dir) is False:
        logger.warn("Please check Alternate Directory setting")
        return False

    csvFile = os.path.join(search_dir, "%s - %s.csv" % (status, now().replace(':', '-')))

    myDB = database.DBConnection()

    find_status = myDB.select('SELECT * FROM books WHERE Status = "%s"' % status)

    if not find_status:
        logger.warn(u"No books marked as %s" % status)
    else:
        count = 0
        with open(csvFile, 'wb') as csvfile:
            csvwrite = csv.writer(csvfile, delimiter=',',
                                  quotechar='"', quoting=csv.QUOTE_MINIMAL)

            # write headers, change AuthorName BookName BookIsbn to match import csv names (Author, Title, ISBN10)
            csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID'])

            for resulted in find_status:
                logger.debug(u"Exported CSV for book %s" % resulted['BookName'])
                row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'],
                        resulted['BookIsbn'], resulted['AuthorID']])
                csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row])
                count = count + 1
        logger.info(u"CSV exported %s book%s to %s" % (count, plural(count), csvFile))

Example #6

Show file

File: csvfile.py Project: DobyTang/LazyLibrarian

def export_CSV(search_dir=None, status="Wanted", library='eBook'):
    """ Write a csv file to the search_dir containing all books marked as "Wanted" """
    # noinspection PyBroadException
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg
        elif not os.access(search_dir, os.W_OK | os.X_OK):
            msg = "Alternate Directory [%s] not writable" % search_dir
            logger.warn(msg)
            return msg

        csvFile = os.path.join(search_dir, "%s %s - %s.csv" % (status, library, now().replace(':', '-')))

        myDB = database.DBConnection()

        cmd = 'SELECT BookID,AuthorName,BookName,BookIsbn,books.AuthorID FROM books,authors '
        if library == 'eBook':
            cmd += 'WHERE books.Status=? and books.AuthorID = authors.AuthorID'
        else:
            cmd += 'WHERE AudioStatus=? and books.AuthorID = authors.AuthorID'
        find_status = myDB.select(cmd, (status,))

        if not find_status:
            msg = "No %s marked as %s" % (library, status)
            logger.warn(msg)
        else:
            count = 0
            if PY2:
                fmode = 'wb'
            else:
                fmode = 'w'
            with open(csvFile, fmode) as csvfile:
                csvwrite = writer(csvfile, delimiter=',',
                                  quotechar='"', quoting=QUOTE_MINIMAL)

                # write headers, change AuthorName BookName BookIsbn to match import csv names
                csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID'])

                for resulted in find_status:
                    logger.debug("Exported CSV for %s %s" % (library, resulted['BookName']))
                    row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'],
                            resulted['BookIsbn'], resulted['AuthorID']])
                    if PY2:
                        csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row])
                    else:
                        csvwrite.writerow([("%s" % s) for s in row])
                    count += 1
            msg = "CSV exported %s %s%s to %s" % (count, library, plural(count), csvFile)
            logger.info(msg)
        return msg
    except Exception:
        msg = 'Unhandled exception in exportCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg

Example #7

Show file

File: bookwork.py Project: forge33/LazyLibrarian

def getAuthorImages():
    """ Try to get an author image for all authors without one"""
    myDB = database.DBConnection()
    authors = myDB.select('select AuthorID from authors where AuthorImg like "%nophoto%"')
    if authors:
        logger.info('Checking images for %s author%s' % (len(authors), plural(len(authors))))
        counter = 0
        for author in authors:
            authorid = author['AuthorID']
            imagelink = getAuthorImage(authorid)
            if imagelink and not "nophoto" in imagelink:
                controlValueDict = {"AuthorID": authorid}
                newValueDict = {"AuthorImg": imagelink}
                myDB.upsert("authors", newValueDict, controlValueDict)
                counter += 1
        logger.info('Author Image check completed, updated %s image%s' % (counter, plural(counter)))
    else:
        logger.debug('No missing images')

Example #8

Show file

File: providers.py Project: knobunc/LazyLibrarian

def GOODREADS(host=None, feednr=None, priority=0, dispname=None, test=False):
    """
    Goodreads RSS query function, return all the results in a list, can handle multiple wishlists
    but expects goodreads format (looks for goodreads category names)
    """
    results = []
    basehost = host
    if not str(host)[:4] == "http":
        host = 'http://' + host

    URL = host

    result, success = fetchURL(URL)

    if test:
        return success

    if success:
        data = feedparser.parse(result)
    else:
        logger.error('Error fetching data from %s: %s' % (host, result))
        BlockProvider(basehost, result)
        return []

    if data:
        logger.debug('Parsing results from %s' % URL)
        provider = data['feed']['link']
        if not dispname:
            dispname = provider
        logger.debug("RSS %s returned %i result%s" % (provider, len(data.entries), plural(len(data.entries))))
        for post in data.entries:
            title = ''
            book_id = ''
            author_name = ''
            isbn = ''
            if 'title' in post:
                title = post.title
            if 'book_id' in post:
                book_id = post.book_id
            if 'author_name' in post:
                author_name = post.author_name
            if 'isbn' in post:
                isbn = post.isbn
            if title and author_name:
                results.append({
                    'rss_prov': provider,
                    'rss_feed': feednr,
                    'rss_title': title,
                    'rss_author': author_name,
                    'rss_bookid': book_id,
                    'rss_isbn': isbn,
                    'priority': priority,
                    'dispname': dispname
                })
    else:
        logger.debug('No data returned from %s' % host)
    return results

Example #9

Show file

File: opds.py Project: knobunc/LazyLibrarian

    def _Magazine(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        if 'magid' not in kwargs:
            self.data = self._error_with_message('No Magazine Provided')
            return
        links = []
        entries = []
        title = ''
        cmd = "SELECT Title,IssueID,IssueDate,IssueAcquired,IssueFile from issues "
        cmd += "WHERE Title='%s' order by IssueDate DESC"
        results = myDB.select(cmd % kwargs['magid'])
        page = results[index:(index + self.PAGE_SIZE)]
        for issue in page:
            title = makeUnicode(issue['Title'])
            entry = {'title': escape('%s (%s)' % (title, issue['IssueDate'])),
                     'id': escape('issue:%s' % issue['IssueID']),
                     'updated': opdstime(issue['IssueAcquired']),
                     'content': escape('%s - %s' % (title, issue['IssueDate'])),
                     'href': '%s?cmd=Serve&amp;issueid=%s' % (self.opdsroot, quote_plus(issue['IssueID'])),
                     'kind': 'acquisition',
                     'rel': 'file',
                     'type': mimeType(issue['IssueFile'])}
            if lazylibrarian.CONFIG['OPDS_METAINFO']:
                fname = os.path.splitext(issue['IssueFile'])[0]
                res = cache_img('magazine', issue['IssueID'], fname + '.jpg')
                entry['image'] = self.searchroot + '/' + res[0]
            entries.append(entry)

        feed = {}
        title = '%s (%s)' % (escape(title), len(entries))
        feed['title'] = 'LazyLibrarian OPDS - %s' % title
        feed['id'] = 'magazine:%s' % escape(kwargs['magid'])
        feed['updated'] = now()
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Magazine&amp;magid=%s' % (self.opdsroot, quote_plus(kwargs['magid'])),
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Magazine&amp;magid=%s&amp;index=%s' % (self.opdsroot, quote_plus(kwargs['magid']),
                                                                            index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Magazine&amp;magid=%s&amp;index=%s' % (self.opdsroot, quote_plus(kwargs['magid']),
                                                                            index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s issue%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return

Example #10

Show file

def getBookCovers():
    """ Try to get a cover image for all books """

    myDB = database.DBConnection()
    books = myDB.select('select BookID,BookImg from books where BookImg like "%nocover%"')
    if books:
        logger.info('Checking covers for %s book%s' % (len(books), plural(len(books))))
        counter = 0
        for book in books:
            bookid = book['BookID']
            coverlink = getBookCover(bookid)
            if coverlink and "nocover" not in coverlink:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": coverlink}
                myDB.upsert("books", newValueDict, controlValueDict)
                counter += 1
        logger.info('Cover check complete, updated %s cover%s' % (counter, plural(counter)))
    else:
        logger.debug('No missing book covers')

Example #11

Show file

File: opds.py Project: knobunc/LazyLibrarian

    def _RecentAudio(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Recent AudioBooks', 'id': 'Recent AudioBooks', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=RecentAudio' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Books'))

        cmd = "select BookName,BookID,AudioLibrary,BookDate,BookImg,BookDesc,BookAdded,AuthorID from books WHERE "
        if 'query' in kwargs:
            cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND "
        cmd += "AudioStatus='Open' order by AudioLibrary DESC"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for book in page:
            title = makeUnicode(book['BookName'])
            entry = {'title': escape(title),
                     'id': escape('audio:%s' % book['BookID']),
                     'updated': opdstime(book['AudioLibrary']),
                     'href': '%s?cmd=Serve&amp;audioid=%s' % (self.opdsroot, quote_plus(book['BookID'])),
                     'kind': 'acquisition',
                     'rel': 'file',
                     'type': mimeType("we_send.zip")}
            if lazylibrarian.CONFIG['OPDS_METAINFO']:
                author = myDB.match("SELECT AuthorName from authors WHERE AuthorID='%s'" % book['AuthorID'])
                author = makeUnicode(author['AuthorName'])
                entry['image'] = self.searchroot + '/' + book['BookImg']
                entry['content'] = escape('%s - %s' % (title, book['BookDesc']))
                entry['author'] = escape('%s' % author)
            else:
                entry['content'] = escape('%s (%s)' % (title, book['BookAdded']))
            entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=RecentAudio&amp;index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=RecentAudio&amp;index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s result%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return

Example #12

Show file

File: opds.py Project: knobunc/LazyLibrarian

    def _Authors(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Authors', 'id': 'Authors', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Authors' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchauthors.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Authors'))
        cmd = "SELECT AuthorName,AuthorID,HaveBooks,TotalBooks,DateAdded from Authors WHERE "
        if 'query' in kwargs:
            cmd += "AuthorName LIKE '%" + kwargs['query'] + "%' AND "
        cmd += "CAST(HaveBooks AS INTEGER) > 0 order by AuthorName"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for author in page:
            totalbooks = check_int(author['TotalBooks'], 0)
            havebooks = check_int(author['HaveBooks'], 0)
            lastupdated = author['DateAdded']
            name = makeUnicode(author['AuthorName'])
            entry = {
                    'title': escape('%s (%s/%s)' % (name, havebooks, totalbooks)),
                    'id': escape('author:%s' % author['AuthorID']),
                    'updated': opdstime(lastupdated),
                    'content': escape('%s (%s)' % (name, havebooks)),
                    'href': '%s?cmd=Author&amp;authorid=%s' % (self.opdsroot, author['AuthorID']),
                    'author': escape('%s' % name),
                    'kind': 'navigation',
                    'rel': 'subsection',
                }
            # removed authorimg as it stops navigation ??
            # if lazylibrarian.CONFIG['OPDS_METAINFO']:
            #    entry['image'] = self.searchroot + '/' + author['AuthorImg']
            entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Authors&amp;index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Authors&amp;index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s author%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return

Example #13

Show file

File: opds.py Project: knobunc/LazyLibrarian

    def _Magazines(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Magazines', 'id': 'Magazines', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Magazines' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines'))
        cmd = 'select magazines.*,(select count(*) as counter from issues where magazines.title = issues.title)'
        cmd += ' as Iss_Cnt from magazines '
        if 'query' in kwargs:
            cmd += "WHERE magazines.title LIKE '%" + kwargs['query'] + "%' "
        cmd += 'order by magazines.title'
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for mag in page:
            if mag['Iss_Cnt'] > 0:
                title = makeUnicode(mag['Title'])
                entry = {
                    'title': escape('%s (%s)' % (title, mag['Iss_Cnt'])),
                    'id': escape('magazine:%s' % title),
                    'updated': opdstime(mag['LastAcquired']),
                    'content': escape('%s' % title),
                    'href': '%s?cmd=Magazine&amp;magid=%s' % (self.opdsroot, quote_plus(title)),
                    'kind': 'navigation',
                    'rel': 'subsection',
                }
                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    res = cache_img('magazine', md5_utf8(mag['LatestCover']), mag['LatestCover'], refresh=True)
                    entry['image'] = self.searchroot + '/' + res[0]
                entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Magazines&amp;index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Magazines&amp;index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s magazine%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return

Example #14

Show file

File: opds.py Project: knobunc/LazyLibrarian

    def _RecentMags(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Recent Magazines', 'id': 'Recent Magazines', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=RecentMags' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines'))
        cmd = "select Title,IssueID,IssueAcquired,IssueDate,IssueFile from issues "
        cmd += "where IssueFile != '' "
        if 'query' in kwargs:
            cmd += "AND Title LIKE '%" + kwargs['query'] + "%' "
        cmd += "order by IssueAcquired DESC"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for mag in page:
            title = makeUnicode(mag['Title'])
            entry = {'title': escape('%s' % mag['IssueDate']),
                     'id': escape('issue:%s' % mag['IssueID']),
                     'updated': opdstime(mag['IssueAcquired']),
                     'content': escape('%s - %s' % (title, mag['IssueDate'])),
                     'href': '%s?cmd=Serve&amp;issueid=%s' % (self.opdsroot, quote_plus(mag['IssueID'])),
                     'kind': 'acquisition',
                     'rel': 'file',
                     'author': escape(title),
                     'type': mimeType(mag['IssueFile'])}
            if lazylibrarian.CONFIG['OPDS_METAINFO']:
                fname = os.path.splitext(mag['IssueFile'])[0]
                res = cache_img('magazine', mag['IssueID'], fname + '.jpg')
                entry['image'] = self.searchroot + '/' + res[0]
            entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=RecentMags&amp;index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=RecentMags&amp;index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s issue%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return

Example #15

Show file

File: images.py Project: knobunc/LazyLibrarian

def createMagCovers(refresh=False):
    if not lazylibrarian.CONFIG['IMP_MAGCOVER']:
        logger.info('Cover creation is disabled in config')
        return
    myDB = database.DBConnection()
    #  <> '' ignores empty string or NULL
    issues = myDB.select("SELECT IssueFile from issues WHERE IssueFile <> ''")
    if refresh:
        logger.info("Creating covers for %s issue%s" % (len(issues), plural(len(issues))))
    else:
        logger.info("Checking covers for %s issue%s" % (len(issues), plural(len(issues))))
    cnt = 0
    for item in issues:
        try:
            createMagCover(item['IssueFile'], refresh=refresh)
            cnt += 1
        except Exception as why:
            logger.warn('Unable to create cover for %s, %s %s' % (item['IssueFile'], type(why).__name__, str(why)))
    logger.info("Cover creation completed")
    if refresh:
        return "Created covers for %s issue%s" % (cnt, plural(cnt))
    return "Checked covers for %s issue%s" % (cnt, plural(cnt))

Example #16

Show file

File: csvfile.py Project: DobyTang/LazyLibrarian

def dump_table(table, savedir=None, status=None):
    myDB = database.DBConnection()
    # noinspection PyBroadException
    try:
        columns = myDB.select('PRAGMA table_info(%s)' % table)
        if not columns:  # no such table
            logger.warn("No such table [%s]" % table)
            return 0

        if not os.path.isdir(savedir):
            savedir = lazylibrarian.DATADIR

        headers = ''
        for item in columns:
            if headers:
                headers += ','
            headers += item[1]
        if status:
            cmd = 'SELECT %s from %s WHERE status="%s"' % (headers, table, status)
        else:
            cmd = 'SELECT %s from %s' % (headers, table)
        data = myDB.select(cmd)
        count = 0
        if data is not None:
            label = table
            if status:
                label += '_%s' % status
            csvFile = os.path.join(savedir, "%s.csv" % label)

            if PY2:
                fmode = 'wb'
            else:
                fmode = 'w'
            with open(csvFile, fmode) as csvfile:
                csvwrite = writer(csvfile, delimiter=',', quotechar='"', quoting=QUOTE_MINIMAL)
                headers = headers.split(',')
                csvwrite.writerow(headers)
                for item in data:
                    if PY2:
                        csvwrite.writerow([makeBytestr(s) if s else '' for s in item])
                    else:
                        csvwrite.writerow([str(s) if s else '' for s in item])
                    count += 1
            msg = "Exported %s item%s to %s" % (count, plural(count), csvFile)
            logger.info(msg)
        return count

    except Exception:
        msg = 'Unhandled exception in dump_table: %s' % traceback.format_exc()
        logger.error(msg)
        return 0

Example #17

Show file

File: grsync.py Project: andyuc88/LazyLibrarian

def sync_to_gr():
    msg = ''
    try:
        threading.currentThread().name = 'GRSync'
        if lazylibrarian.CONFIG['GR_WANTED']:
            to_read_shelf, ll_wanted = grsync('Wanted', lazylibrarian.CONFIG['GR_WANTED'])
            msg += "%s change%s to %s shelf\n" % (to_read_shelf, plural(to_read_shelf),
                                                  lazylibrarian.CONFIG['GR_WANTED'])
            msg += "%s change%s to Wanted from GoodReads\n" % (ll_wanted, plural(ll_wanted))
        else:
            msg += "Sync Wanted books is disabled\n"
        if lazylibrarian.CONFIG['GR_OWNED']:
            to_owned_shelf, ll_have = grsync('Open', lazylibrarian.CONFIG['GR_OWNED'])
            msg += "%s change%s to %s shelf\n" % (to_owned_shelf, plural(to_owned_shelf),
                                                  lazylibrarian.CONFIG['GR_OWNED'])
            msg += "%s change%s to Owned from GoodReads\n" % (ll_have, plural(ll_have))
        else:
            msg += "Sync Owned books is disabled\n"
        logger.info(msg.strip('\n').replace('\n', ', '))
    except Exception as e:
        logger.error("Exception in sync_to_gr: %s %s" % (type(e).__name__, str(e)))
    finally:
        threading.currentThread().name = 'WEBSERVER'
        return msg

Example #18

Show file

def dbUpdate(refresh=False):
    try:
        myDB = database.DBConnection()

        activeauthors = myDB.select('SELECT AuthorName from authors WHERE Status="Active" \
                                    or Status="Loading" order by DateAdded ASC')
        logger.info('Starting update for %i active author%s' % (len(activeauthors), plural(len(activeauthors))))

        for author in activeauthors:
            authorname = author[0]
            importer.addAuthorToDB(authorname, refresh=refresh)

        logger.info('Active author update complete')
    except Exception as e:
        logger.error('Unhandled exception in dbUpdate: %s' % traceback.format_exc())

Example #19

Show file

File: bookwork.py Project: forge33/LazyLibrarian

def setWorkPages():
    """ Set the workpage link for any books that don't already have one """

    myDB = database.DBConnection()

    books = myDB.select('select BookID,AuthorName,BookName from books where length(WorkPage) < 4')
    if books:
        logger.debug('Setting WorkPage for %s book%s' % (len(books), plural(len(books))))
        for book in books:
            bookid = book['BookID']
            worklink = getWorkPage(bookid)
            if worklink:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"WorkPage": worklink}
                myDB.upsert("books", newValueDict, controlValueDict)
            else:
               logger.debug('No WorkPage found for %s: %s' % (book['AuthorName'], book['BookName']))
        logger.debug('setWorkPages completed')

Example #20

Show file

File: torrentparser.py Project: cdancette/LazyLibrarian

def LIME(book=None, test=False):
    errmsg = ''
    provider = "Limetorrent"
    host = lazylibrarian.CONFIG['LIME_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    params = {"q": book['searchterm']}
    providerurl = url_fix(host + "/searchrss/other")
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(
                            seeders.split('Seeds:')[1].split(',')[0].strip())
                    except (IndexError, ValueError):
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            'torrent',
                            'priority':
                            lazylibrarian.CONFIG['LIME_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Example #21

Show file

def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None, test=False):
    """
    Generic NewzNabplus query function
    takes in host+key+type and returns the result set regardless of who
    based on site running NewzNab+
    ref http://usenetreviewz.com/nzb-sites/
    """

    host = provider['HOST']
    api_key = provider['API']
    logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % (
        searchType, host, searchMode, api_key, str(book)))

    results = []

    params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode)

    if params:
        if not str(host)[:4] == "http":
            host = 'http://' + host
        if host[-1:] == '/':
            host = host[:-1]
        URL = host + '/api?' + urllib.urlencode(params)

        sterm = makeUnicode(book['searchterm'])

        rootxml = None
        logger.debug("[NewzNabPlus] URL = %s" % URL)
        result, success = fetchURL(URL)

        if test:
            if result.startswith('<') and result.endswith('/>') and "error code" in result:
                result = result[1:-2]
                success = False
            if not success:
                logger.debug(result)
            return success

        if success:
            try:
                rootxml = ElementTree.fromstring(result)
            except Exception as e:
                logger.error('Error parsing data from %s: %s %s' % (host, type(e).__name__, str(e)))
                rootxml = None
        else:
            if not result or result == "''":
                result = "Got an empty response"
            logger.error('Error reading data from %s: %s' % (host, result))
            # maybe the host doesn't support the search type
            cancelled = cancelSearchType(searchType, result, provider)
            if not cancelled:  # it was some other problem
                BlockProvider(provider['HOST'], result)

        if rootxml is not None:
            # to debug because of api
            logger.debug('Parsing results from <a href="%s">%s</a>' % (URL, host))

            if rootxml.tag == 'error':
                errormsg = rootxml.get('description', default='unknown error')
                logger.error("%s - %s" % (host, errormsg))
                # maybe the host doesn't support the search type
                cancelled = cancelSearchType(searchType, errormsg, provider)
                if not cancelled:  # it was some other problem
                    BlockProvider(provider['HOST'], errormsg)
            else:
                resultxml = rootxml.getiterator('item')
                nzbcount = 0
                maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0)
                for nzb in resultxml:
                    try:
                        thisnzb = ReturnResultsFieldsBySearchType(book, nzb, host, searchMode, provider['DLPRIORITY'])
                        if not maxage:
                            nzbcount += 1
                            results.append(thisnzb)
                        else:
                            # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200
                            nzbdate = thisnzb['nzbdate']
                            try:
                                parts = nzbdate.split(' ')
                                nzbdate = ' '.join(parts[:5])  # strip the +0200
                                dt = datetime.datetime.strptime(nzbdate, "%a, %d %b %Y %H:%M:%S").timetuple()
                                nzbage = age('%04d-%02d-%02d' % (dt.tm_year, dt.tm_mon, dt.tm_mday))
                            except Exception as e:
                                logger.debug('Unable to get age from [%s] %s %s' %
                                             (thisnzb['nzbdate'], type(e).__name__, str(e)))
                                nzbage = 0
                            if nzbage <= maxage:
                                nzbcount += 1
                                results.append(thisnzb)
                            else:
                                logger.debug('%s is too old (%s day%s)' % (thisnzb['nzbtitle'], nzbage, plural(nzbage)))

                    except IndexError:
                        logger.debug('No results from %s for %s' % (host, sterm))
                logger.debug('Found %s nzb at %s for: %s' % (nzbcount, host, sterm))
        else:
            logger.debug('No data returned from %s for %s' % (host, sterm))
    return results

Example #22

Show file

    def _Members(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        if 'seriesid' not in kwargs:
            self.data = self._error_with_message('No Series Provided')
            return
        links = []
        entries = []
        series = myDB.match("SELECT SeriesName from Series WHERE SeriesID=?", (kwargs['seriesid'],))
        cmd = "SELECT BookName,BookDate,BookAdded,BookDesc,BookImg,BookFile,AudioFile,books.BookID,SeriesNum "
        cmd += "from books,member where (Status='Open' or AudioStatus='Open') and SeriesID=? "
        cmd += "and books.bookid = member.bookid order by CAST(SeriesNum AS INTEGER)"
        results = myDB.select(cmd, (kwargs['seriesid'],))
        cmd = 'SELECT AuthorName from authors,books WHERE authors.authorid = books.authorid AND '
        cmd += 'books.bookid=?'
        res = myDB.match(cmd, (results[0]['BookID'],))
        author = res['AuthorName']
        page = results[index:(index + self.PAGE_SIZE)]
        for book in page:
            mime_type = None
            if book['BookFile']:
                mime_type = mimeType(book['BookFile'])
            elif book['AudioFile']:
                mime_type = mimeType(book['AudioFile'])
            if mime_type:
                if book['SeriesNum']:
                    snum = ' (%s)' % book['SeriesNum']
                else:
                    snum = ''
                entry = {'title': escape('%s%s' % (book['BookName'], snum)),
                         'id': escape('book:%s' % book['BookID']),
                         'updated': opdstime(book['BookAdded']),
                         'href': '%s?cmd=Serve&amp;bookid=%s' % (self.opdsroot, book['BookID']),
                         'kind': 'acquisition',
                         'rel': 'file',
                         'author': escape("%s" % author),
                         'type': mime_type}

                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    entry['image'] = self.searchroot + '/' + book['BookImg']
                    entry['content'] = escape('%s (%s %s) %s' % (book['BookName'], series['SeriesName'],
                                                                 book['SeriesNum'], book['BookDesc']))
                else:
                    entry['content'] = escape('%s (%s %s) %s' % (book['BookName'], series['SeriesName'],
                                                                 book['SeriesNum'], book['BookAdded']))
                entries.append(entry)

        feed = {}
        seriesname = '%s (%s) %s' % (escape(series['SeriesName']), len(entries), author)
        feed['title'] = 'LazyLibrarian OPDS - %s' % seriesname
        feed['id'] = 'series:%s' % escape(kwargs['seriesid'])
        feed['updated'] = now()
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Series' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Members&amp;seriesid=%s&amp;index=%s' % (self.opdsroot, kwargs['seriesid'],
                                                                              index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Members&amp;seriesid=%s&amp;index=%s' % (self.opdsroot, kwargs['seriesid'],
                                                                              index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s book%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return

Example #23

Show file

File: postprocess.py Project: Finch106/LazyLibrarian

def processDir(reset=False):

    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(
            lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError as why:
        logger.error('Could not access [%s] directory [%s]' %
                     (processpath, why.strerror))
        return

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if len(snatched) == 0:
        logger.info('Nothing marked as snatched.')
        scheduleJob(action='Stop', target='processDir')
        return

    if len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
        return

    logger.info("Checking %s download%s for %s snatched file%s" %
                (len(downloads), plural(
                    len(downloads)), len(snatched), plural(len(snatched))))
    ppcount = 0
    for book in snatched:
        matches = []
        for fname in downloads:
            if not fname.endswith('.fail'):  # has this failed before?
                # this is to get round differences in torrent filenames.
                # Torrents aren't always returned with the name we searched for
                # there might be a better way...
                if isinstance(fname, str):
                    matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                else:
                    matchname = fname
                if ' LL.(' in matchname:
                    matchname = matchname.split(' LL.(')[0]
                matchtitle = book['NZBtitle']
                match = 0
                if matchtitle:
                    if ' LL.(' in matchtitle:
                        matchtitle = matchtitle.split(' LL.(')[0]
                    match = fuzz.token_set_ratio(matchtitle, matchname)
                if match >= lazylibrarian.DLOAD_RATIO:
                    fname = matchname
                    if os.path.isfile(os.path.join(processpath, fname)):
                        # not a directory, handle single file downloads here. Book/mag file in download root.
                        # move the file into it's own subdirectory so we don't move/delete things that aren't ours
                        if is_valid_booktype(fname, booktype="book") \
                                or is_valid_booktype(fname, booktype="mag"):
                            fname = os.path.splitext(fname)[0]
                            dirname = os.path.join(processpath, fname)
                            if not os.path.exists(dirname):
                                try:
                                    os.makedirs(dirname)
                                except OSError as why:
                                    logger.debug(
                                        'Failed to create directory %s, %s' %
                                        (dirname, why.strerror))
                            if os.path.exists(dirname):
                                # move the book and any related files too
                                # ie other book formats, or opf, jpg with same title
                                # can't move metadata.opf or cover.jpg or similar
                                # as can't be sure they are ours
                                # not sure if we need a new listdir here, or whether we can use the old one
                                list_dir = os.listdir(processpath)
                                for ourfile in list_dir:
                                    if ourfile.startswith(fname):
                                        if is_valid_booktype(ourfile, booktype="book") \
                                            or is_valid_booktype(ourfile, booktype="mag") \
                                                or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']:
                                            try:
                                                shutil.move(
                                                    os.path.join(
                                                        processpath, ourfile),
                                                    os.path.join(
                                                        dirname, ourfile))
                                            except Exception as why:
                                                logger.debug(
                                                    "Failed to move file %s to %s, %s"
                                                    % (ourfile, dirname,
                                                       str(why)))

                    if os.path.isdir(os.path.join(processpath, fname)):
                        pp_path = os.path.join(processpath, fname)
                        logger.debug('Found folder (%s%%) %s for %s' %
                                     (match, pp_path, book['NZBtitle']))
                        matches.append([match, pp_path, book])
                else:
                    logger.debug('No match (%s%%) %s for %s' %
                                 (match, matchname, matchtitle))
            else:
                logger.debug('Skipping %s' % fname)

        if matches:
            highest = max(matches, key=lambda x: x[0])
            match = highest[0]
            pp_path = highest[1]
            book = highest[2]
            logger.debug(u'Best match (%s%%): %s for %s' %
                         (match, pp_path, book['NZBtitle']))

            data = myDB.match('SELECT * from books WHERE BookID="%s"' %
                              book['BookID'])
            if data:
                logger.debug(u'Processing book %s' % book['BookID'])
                authorname = data['AuthorName']
                bookname = data['BookName']
                if 'windows' in platform.system().lower(
                ) and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                    logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                    lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                        '/', '\\')
                # Default destination path, should be allowed change per config file.
                dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                    '$Author', authorname).replace('$Title', bookname)
                global_name = lazylibrarian.EBOOK_DEST_FILE.replace(
                    '$Author', authorname).replace('$Title', bookname)
                global_name = unaccented(global_name)
                # dest_path = authorname+'/'+bookname
                # global_name = bookname + ' - ' + authorname
                # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                dic = {
                    '<': '',
                    '>': '',
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': '',
                    ':': '',
                    ';': '',
                    '\'': ''
                }
                dest_path = unaccented_str(replace_all(dest_path, dic))
                dest_path = os.path.join(lazylibrarian.DESTINATION_DIR,
                                         dest_path).encode(
                                             lazylibrarian.SYS_ENCODING)
            else:
                data = myDB.match('SELECT * from magazines WHERE Title="%s"' %
                                  book['BookID'])
                if data:
                    logger.debug(u'Processing magazine %s' % book['BookID'])
                    # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                    # files are downloading, there will be an error in post-processing, trying to go to the
                    # same directory.
                    mostrecentissue = data[
                        'IssueDate']  # keep for processing issues arriving out of order
                    # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {
                        '<': '',
                        '>': '',
                        '...': '',
                        ' & ': ' ',
                        ' = ': ' ',
                        '?': '',
                        '$': 's',
                        ' + ': ' ',
                        '"': '',
                        ',': '',
                        '*': '',
                        ':': '',
                        ';': '',
                        '\'': ''
                    }
                    mag_name = unaccented_str(replace_all(book['BookID'], dic))
                    # book auxinfo is a cleaned date, eg 2015-01-01
                    dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                    if lazylibrarian.MAG_RELATIVE:
                        if dest_path[0] not in '._':
                            dest_path = '_' + dest_path
                        dest_path = os.path.join(
                            lazylibrarian.DESTINATION_DIR,
                            dest_path).encode(lazylibrarian.SYS_ENCODING)
                    else:
                        dest_path = dest_path.encode(
                            lazylibrarian.SYS_ENCODING)
                    authorname = None
                    bookname = None
                    global_name = lazylibrarian.MAG_DEST_FILE.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    global_name = unaccented(global_name)
                    # global_name = book['AuxInfo']+' - '+title
                else:
                    logger.debug(
                        "Snatched magazine %s is not in download directory" %
                        (book['BookID']))
                    continue
        else:
            logger.debug("Snatched %s %s is not in download directory" %
                         (book['NZBmode'], book['NZBtitle']))
            continue

        processBook = processDestination(pp_path, dest_path, authorname,
                                         bookname, global_name,
                                         book['NZBmode'])

        if processBook:
            logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
            # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {
                "Status": "Processed",
                "NZBDate": now()
            }  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)

            if bookname is not None:  # it's a book, if None it's a magazine
                if len(lazylibrarian.IMP_CALIBREDB):
                    logger.debug(
                        'Calibre should have created the extras for us')
                else:
                    processExtras(myDB, dest_path, global_name, data)
            else:
                # update mags
                controlValueDict = {"Title": book['BookID']}
                if mostrecentissue:
                    if mostrecentissue.isdigit() and str(
                            book['AuxInfo']).isdigit():
                        older = int(mostrecentissue) > int(
                            book['AuxInfo'])  # issuenumber
                    else:
                        older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                else:
                    older = False
                if older:  # check this in case processing issues arriving out of order
                    newValueDict = {
                        "LastAcquired": today(),
                        "IssueStatus": "Open"
                    }
                else:
                    newValueDict = {
                        "IssueDate": book['AuxInfo'],
                        "LastAcquired": today(),
                        "IssueStatus": "Open"
                    }
                myDB.upsert("magazines", newValueDict, controlValueDict)
                # dest_path is where we put the magazine after processing, but we don't have the full filename
                # so look for any "book" in that directory
                dest_file = book_file(dest_path, booktype='mag')
                controlValueDict = {
                    "Title": book['BookID'],
                    "IssueDate": book['AuxInfo']
                }
                newValueDict = {
                    "IssueAcquired":
                    today(),
                    "IssueFile":
                    dest_file,
                    "IssueID":
                    create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                }
                myDB.upsert("issues", newValueDict, controlValueDict)

                # create a thumbnail cover for the new issue
                create_cover(dest_file)

            logger.info('Successfully processed: %s' % global_name)
            ppcount = ppcount + 1
            notify_download("%s from %s at %s" %
                            (global_name, book['NZBprov'], now()))
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' %
                         pp_path)
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Failed", "NZBDate": now()}
            myDB.upsert("wanted", newValueDict, controlValueDict)
            # if it's a book, reset status so we try for a different version
            # if it's a magazine, user can select a different one from pastissues table
            if bookname is not None:
                myDB.action(
                    'UPDATE books SET status = "Wanted" WHERE BookID="%s"' %
                    book['BookID'])

            # at this point, as it failed we should move it or it will get postprocessed
            # again (and fail again)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except Exception as e:
                logger.debug("Unable to rename %s, %s" % (pp_path, str(e)))

    downloads = os.listdir(
        processpath)  # check in case we processed/deleted some above
    for directory in downloads:
        if "LL.(" in directory and not directory.endswith('.fail'):
            bookID = str(directory).split("LL.(")[1].split(")")[0]
            logger.debug("Book with id: " + str(bookID) + " is in downloads")
            pp_path = os.path.join(processpath, directory)

            if os.path.isfile(pp_path):
                pp_path = os.path.join(processpath)

            if (os.path.isdir(pp_path)):
                logger.debug('Found LL folder %s.' % pp_path)
            if import_book(pp_path, bookID):
                ppcount = ppcount + 1

    if ppcount == 0:
        logger.info('No snatched books/mags have been found')
    else:
        logger.info('%s book%s/mag%s processed.' %
                    (ppcount, plural(ppcount), plural(ppcount)))

    if reset:
        scheduleJob(action='Restart', target='processDir')

Example #24

Show file

File: searchmag.py Project: steflavoie/LazyLibrarian

def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
                                     )
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select(
                    'SELECT Title, Regex, LastAcquired, IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"',
                    (magazine['bookid'], ))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        logger.debug("Removing old magazine search results")
        myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' %
                    (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']

            if not searchterm:
                dic = {
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': ''
                }
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless it's not a latin-1 encodable name
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm})

        if not searchlist:
            logger.warn(
                'There is nothing to search for.  Mark some magazines as active.'
            )

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    logger.warn(
                        'No nzb providers are available. Check config and blocklist'
                    )

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(
                    book, 'mag')
                if not nproviders:
                    logger.warn(
                        'No direct providers are available. Check config and blocklist'
                    )

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(
                    book, 'mag')
                if not nproviders:
                    logger.warn(
                        'No torrent providers are available. Check config and blocklist'
                    )

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    logger.warn(
                        'No rss providers are available. Check config and blocklist'
                    )

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': item[
                                'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace(
                        "'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(
                        nzbsize_temp, 1000
                    )  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    results = myDB.match(
                        'SELECT * from magazines WHERE Title=?', (bookid, ))
                    if not results:
                        logger.debug(
                            'Magazine [%s] does not match search term [%s].' %
                            (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(
                            lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(
                                lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" %
                                             nzbtitle)
                                rejected = True

                        if not rejected:
                            dic = {
                                '.': ' ',
                                '-': ' ',
                                '/': ' ',
                                '+': ' ',
                                '_': ' ',
                                '(': '',
                                ')': ''
                            }
                            nzbtitle_formatted = replace_all(nzbtitle,
                                                             dic).strip()
                            # Need to make sure that substrings of magazine titles don't get found
                            # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this
                            # remove extra spaces if they're in a row
                            if nzbtitle_formatted and nzbtitle_formatted[
                                    0] == '[' and nzbtitle_formatted[-1] == ']':
                                nzbtitle_formatted = nzbtitle_formatted[1:-1]
                            nzbtitle_exploded_temp = " ".join(
                                nzbtitle_formatted.split())
                            nzbtitle_exploded = nzbtitle_exploded_temp.split(
                                ' ')

                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz
                                mag_title_match = fuzz.token_set_ratio(
                                    unaccented(bookid),
                                    unaccented(nzbtitle_formatted))

                                if mag_title_match < check_int(
                                        lazylibrarian.CONFIG['MATCH_RATIO'],
                                        90):
                                    logger.debug(
                                        u"Magazine token set Match failed: " +
                                        str(mag_title_match) + "% for " +
                                        nzbtitle_formatted)
                                    rejected = True
                                else:
                                    logger.debug(u"Magazine matched: " +
                                                 str(mag_title_match) + "% " +
                                                 bookid + " for " +
                                                 nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" %
                                             len(nzbtitle_exploded))
                                rejected = True

                        if not rejected:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(
                                str(results['Reject']).lower())
                            reject_list += getList(
                                lazylibrarian.CONFIG['REJECT_MAGS'])
                            lower_title = unaccented(
                                nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Reject: %s' %
                                                 str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" %
                                                 (nzbtitle_formatted, word))
                                    break

                        regex_pass = 0
                        if not rejected:
                            # Magazine names have many different styles of date
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            # MonthName DD YYYY or MonthName DD, YYYY
                            # YYYY MM or YYYY MM DD
                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            # nn YYYY issue number without "Nr" before it
                            # issue and year as a single 6 digit string eg 222015
                            newdatish = "none"
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            pos = 0
                            while pos < len(nzbtitle_exploded):
                                year = check_year(nzbtitle_exploded[pos])
                                if year and pos:
                                    month = month2num(nzbtitle_exploded[pos -
                                                                        1])
                                    if month:
                                        if pos - 1:
                                            day = check_int(
                                                nzbtitle_exploded[pos - 2], 1)
                                            if day > 31:  # probably issue number nn
                                                day = 1
                                        else:
                                            day = 1
                                        newdatish = "%04d-%02d-%02d" % (
                                            year, month, day)
                                        try:
                                            _ = datetime.date(year, month, day)
                                            regex_pass = 1
                                            break
                                        except ValueError:
                                            regex_pass = 0
                                pos += 1

                            # MonthName DD YYYY or MonthName DD, YYYY
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and (pos - 1):
                                        month = month2num(
                                            nzbtitle_exploded[pos - 2])
                                        if month:
                                            day = check_int(
                                                nzbtitle_exploded[
                                                    pos - 1].rstrip(','), 1)
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 2
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # YYYY MM or YYYY MM DD
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and pos + 1 < len(
                                            nzbtitle_exploded):
                                        month = check_int(
                                            nzbtitle_exploded[pos + 1], 0)
                                        if month:
                                            if pos + 2 < len(
                                                    nzbtitle_exploded):
                                                day = check_int(
                                                    nzbtitle_exploded[pos + 2],
                                                    1)
                                            else:
                                                day = 1
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 3
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    if nzbtitle_exploded[pos].lower() in [
                                            "issue", "no", "nr", "vol"
                                    ]:
                                        if pos + 1 < len(nzbtitle_exploded):
                                            issue = check_int(
                                                nzbtitle_exploded[pos + 1], 0)
                                            if issue:
                                                newdatish = str(
                                                    issue)  # 4 == 04 == 004
                                                if pos + 2 < len(
                                                        nzbtitle_exploded):
                                                    year = check_year(
                                                        nzbtitle_exploded[pos +
                                                                          2])
                                                    if year and year < int(
                                                            datetime.date.
                                                            today().year):
                                                        newdatish = '0'  # it's old
                                                    regex_pass = 4  # Issue/No/Nr/Vol nn, YYYY
                                                else:
                                                    regex_pass = 5  # Issue/No/Nr/Vol nn
                                                break
                                    pos += 1

                            # nn YYYY issue number without "Nr" before it
                            if not regex_pass:
                                pos = 1
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year:
                                        issue = check_int(
                                            nzbtitle_exploded[pos - 1], 0)
                                        if issue:
                                            newdatish = str(
                                                issue)  # 4 == 04 == 004
                                            regex_pass = 6
                                            if year < int(datetime.date.today(
                                            ).year):
                                                newdatish = '0'  # it's old
                                            break
                                    pos += 1

                            # issue and year as a single 6 digit string eg 222015
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    issue = nzbtitle_exploded[pos]
                                    if issue.isdigit() and len(issue) == 6:
                                        year = int(issue[2:])
                                        issue = int(issue[:2])
                                        newdatish = str(
                                            issue)  # 4 == 04 == 004
                                        regex_pass = 7
                                        if year < int(
                                                datetime.date.today().year):
                                            newdatish = '0'  # it's old
                                        break
                                    pos += 1

                            if not regex_pass:
                                logger.debug(
                                    'Magazine %s not in a recognised date format.'
                                    % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                regex_pass = 99

                        if rejected:
                            rejects += 1
                        else:
                            if lazylibrarian.LOGLEVEL > 2:
                                logger.debug("regex %s [%s] %s" %
                                             (regex_pass, nzbtitle_formatted,
                                              newdatish))
                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped"
                            insert_table = "pastissues"
                            insert_status = "Skipped"

                            control_date = results['IssueDate']
                            if control_date is None:  # we haven't got any copies of this magazine yet
                                # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                                # could perhaps calc differently for weekly, biweekly etc
                                # or for magazines with only an issue number, use zero

                                if str(newdatish).isdigit():
                                    logger.debug(
                                        'Magazine comparing issue numbers (%s)'
                                        % newdatish)
                                    control_date = 0
                                elif re.match('\d+-\d\d-\d\d', str(newdatish)):
                                    start_time = time.time()
                                    start_time -= int(
                                        lazylibrarian.CONFIG['MAG_AGE']
                                    ) * 24 * 60 * 60  # number of seconds in days
                                    if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                        start_time = 0
                                    control_date = time.strftime(
                                        "%Y-%m-%d", time.localtime(start_time))
                                    logger.debug(
                                        'Magazine date comparing to %s' %
                                        control_date)
                                else:
                                    logger.debug(
                                        'Magazine unable to find comparison type [%s]'
                                        % newdatish)
                                    control_date = 0

                            if str(control_date).isdigit() and str(
                                    newdatish).isdigit():
                                # for issue numbers, check if later than last one we have
                                comp_date = int(newdatish) - int(control_date)
                                newdatish = "%s" % newdatish
                                newdatish = newdatish.zfill(
                                    4)  # pad so we sort correctly
                            elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                    re.match('\d+-\d\d-\d\d', str(newdatish)):
                                # only grab a copy if it's newer than the most recent we have,
                                # or newer than a month ago if we have none
                                comp_date = datecompare(
                                    newdatish, control_date)
                            else:
                                # invalid comparison of date and issue number
                                if re.match('\d+-\d\d-\d\d',
                                            str(control_date)):
                                    logger.debug(
                                        'Magazine %s failed: Expecting a date'
                                        % nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        'Magazine %s failed: Expecting issue number'
                                        % nzbtitle_formatted)
                                bad_date += 1
                                newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                                comp_date = 0

                            if comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + newdatish
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug(
                                        'This issue of %s is new, downloading'
                                        % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' %
                                                 len(issues))
                                    if lazylibrarian.LOGLEVEL > 2:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    insert_status = "Wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug(
                                        'This issue of %s is already flagged for download'
                                        % issue)
                            else:
                                if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                    logger.debug(
                                        'This issue of %s is old; skipping.' %
                                        nzbtitle_formatted)
                                    old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match(
                                'SELECT * from %s WHERE NZBtitle=? and NZBprov=?'
                                % insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug(
                                        '%s is already in %s marked %s' %
                                        (nzbtitle, insert_table,
                                         insert_status))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": newdatish,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict,
                                            controlValueDict)
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Added %s to %s marked %s' %
                                                 (nzbtitle, insert_table,
                                                  insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (
                    total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (
                    old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch = TORDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    else:
                        snatch = NZBDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    if snatch:
                        logger.info(
                            'Downloading %s from %s' %
                            (magazine['nzbtitle'], magazine["nzbprov"]))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']),
                                       magazine["nzbprov"], now()))
                        custom_notify_snatch(magazine['bookid'])
                        scheduleJob(action='Start', target='processDir')

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"

Example #25

Show file

File: torrentparser.py Project: knobunc/LazyLibrarian

def LIME(book=None, test=False):
    errmsg = ''
    provider = "Limetorrent"
    host = lazylibrarian.CONFIG['LIME_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    params = {
        "q": book['searchterm']
    }
    providerurl = url_fix(host + "/searchrss/other")
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(seeders.split('Seeds:')[1].split(' ,')[0].replace(',', '').strip())
                    except (IndexError, ValueError):
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    try:
                        pubdate = item['published']
                    except KeyError:
                        pubdate = None

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        res = {
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                            'tor_type': 'torrent',
                            'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY']
                        }
                        if pubdate:
                            res['tor_date'] = pubdate
                        results.append(res)
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                        logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Example #26

Show file

File: gb.py Project: kuuratsanik/LazyLibrarian-1

    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped",
                         audiostatus="Skipped", entrystatus='Active', refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' % authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            locked_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = gb_json_request(URL, useCache=not refresh)
                        if not jsonresults:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1
                        book = bookdict(item)
                        # skip if no author, no author is no book.
                        if not book['author']:
                            logger.debug('Skipped a result without authorfield.')
                            continue

                        isbnhead = ""
                        if len(book['isbn']) == 10:
                            isbnhead = book['isbn'][0:3]
                        elif len(book['isbn']) == 13:
                            isbnhead = book['isbn'][3:6]

                        booklang = book['lang']
                        # do we care about language?
                        if "All" not in valid_langs:
                            if book['isbn']:
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(book['isbn']) == 13 and book['isbn'].startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_979_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            elif (len(book['isbn']) == 10) or \
                                                    (len(book['isbn']) == 13 and book['isbn'].startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_978_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                controlValueDict = {"isbn": isbnhead}
                                                newValueDict = {"lang": booklang}
                                                myDB.upsert("languages", newValueDict, controlValueDict)

                                    if not match:
                                        booklang = thingLang(book['isbn'])
                                        lt_lang_hits += 1
                                        if booklang:
                                            match = True
                                            myDB.action('insert into languages values (?, ?)', (isbnhead, booklang))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]:
                                            # these are all english, may need to expand this list
                                            logger.debug("%s Google thinks [%s], we think [%s]" %
                                                         (book['name'], googlelang, booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                logger.debug('Skipped [%s] with language %s' % (book['name'], booklang))
                                ignored += 1
                                continue

                        ignorable = ['future', 'date', 'isbn']
                        if lazylibrarian.CONFIG['NO_LANG']:
                            ignorable.append('lang')
                        rejected = None
                        check_status = False
                        existing_book = None
                        bookname = book['name']
                        bookid = item['id']
                        if not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname))
                            rejected = 'name', 'No bookname'
                        else:
                            bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip()
                            if re.match('[^\w-]', bookname):  # remove books with bad characters in title
                                logger.debug("[%s] removed book for bad characters" % bookname)
                                rejected = 'chars', 'Bad characters in bookname'

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if book['date'] > today()[:len(book['date'])]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, book['date']))
                                rejected = 'future', 'Future publication date [%s]' % book['date']

                        if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']:
                            if not book['date']:
                                logger.debug('Rejecting %s, no publication date' % bookname)
                                rejected = 'date', 'No publication date'

                        if not rejected and lazylibrarian.CONFIG['NO_ISBN']:
                            if not isbnhead:
                                logger.debug('Rejecting %s, no isbn' % bookname)
                                rejected = 'isbn', 'No ISBN'

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname, authorname))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorname, bookname, bookid))
                                    rejected = 'bookid', 'Got under different bookid %s' % bookid
                                    duplicates += 1

                        cmd = 'SELECT AuthorName,BookName,AudioStatus,books.Status FROM books,authors'
                        cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                        match = myDB.match(cmd, (bookid,))
                        if match:  # we have a book with this bookid already
                            if bookname != match['BookName'] or authorname != match['AuthorName']:
                                logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                             (bookid, authorname, bookname, match['AuthorName'], match['BookName']))
                            else:
                                logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                             (bookid, authorname, bookname))
                                check_status = True
                            duplicates += 1
                            rejected = 'got', 'Already got this book in database'

                            # Make sure we don't reject books we have got
                            if match['Status'] in ['Open', 'Have'] or match['AudioStatus'] in ['Open', 'Have']:
                                rejected = None

                        if rejected and rejected[0] not in ignorable:
                            removedResults += 1
                        if check_status or rejected is None or (
                                lazylibrarian.CONFIG['IMP_IGNORE'] and rejected[0] in ignorable):  # dates, isbn

                            cmd = 'SELECT Status,AudioStatus,BookFile,AudioFile,Manual,BookAdded,BookName '
                            cmd += 'FROM books WHERE BookID=?'
                            existing = myDB.match(cmd, (bookid,))
                            if existing:
                                book_status = existing['Status']
                                audio_status = existing['AudioStatus']
                                if lazylibrarian.CONFIG['FOUND_STATUS'] == 'Open':
                                    if book_status == 'Have' and existing['BookFile']:
                                        book_status = 'Open'
                                    if audio_status == 'Have' and existing['AudioFile']:
                                        audio_status = 'Open'
                                locked = existing['Manual']
                                added = existing['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                book_status = bookstatus  # new_book status, or new_author status
                                audio_status = audiostatus
                                added = today()
                                locked = False

                            if rejected:
                                reason = rejected[1]
                                if rejected[0] in ignorable:
                                    book_status = 'Ignored'
                                    audio_status = 'Ignored'
                                    book_ignore_count += 1
                            else:
                                reason = ''

                            if locked:
                                locked_count += 1
                            else:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": book['sub'],
                                    "BookDesc": book['desc'],
                                    "BookIsbn": book['isbn'],
                                    "BookPub": book['pub'],
                                    "BookGenre": book['genre'],
                                    "BookImg": book['img'],
                                    "BookLink": book['link'],
                                    "BookRate": float(book['rate']),
                                    "BookPages": book['pages'],
                                    "BookDate": book['date'],
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": audio_status,
                                    "BookAdded": added,
                                    "WorkID": '',
                                    "ScanResult": reason
                                }

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug("Book found: " + bookname + " " + book['date'])
                                if 'nocover' in book['img'] or 'nophoto' in book['img']:
                                    # try to get a cover from another source
                                    workcover, source = getBookCover(bookid)
                                    if workcover:
                                        logger.debug('Updated cover for %s using %s' % (bookname, source))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)

                                elif book['img'] and book['img'].startswith('http'):
                                    link, success, _ = cache_img("book", bookid, book['img'], refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                    else:
                                        logger.debug('Failed to cache image for %s' % book['img'])

                                serieslist = []
                                if book['series']:
                                    serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))]
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    newserieslist = getWorkSeries(bookid)
                                    if newserieslist:
                                        serieslist = newserieslist
                                        logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
                                    setSeries(serieslist, bookid)

                                new_status = setStatus(bookid, serieslist, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug("[%s] Added book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    added_count += 1
                                else:
                                    logger.debug("[%s] Updated book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    updated_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' %
                         (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg, BookID from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid,))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookid = lastbook['BookID']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookid = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookID": lastbookid,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)
            resultcount = added_count + updated_count
            logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
            logger.debug("Found %s locked book%s" % (locked_count, plural(locked_count)))
            logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored)))
            logger.debug("Removed %s incorrect/incomplete result%s" % (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s" % (duplicates, plural(duplicates)))
            logger.debug("Ignored %s book%s" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount)))

            myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                        (authorname, api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())

Example #27

Show file

    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped",
                         entrystatus='Active', refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' % authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = get_json_request(URL, useCache=not refresh)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1

                        # skip if no author, no author is no book.
                        try:
                            _ = item['volumeInfo']['authors'][0]
                        except KeyError:
                            logger.debug('Skipped a result without authorfield.')
                            continue

                        try:
                            if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                                bookisbn = item['volumeInfo'][
                                    'industryIdentifiers'][0]['identifier']
                            else:
                                bookisbn = ""
                        except KeyError:
                            bookisbn = ""

                        isbnhead = ""
                        if len(bookisbn) == 10:
                            isbnhead = bookisbn[0:3]
                        elif len(bookisbn) == 13:
                            isbnhead = bookisbn[3:6]

                        try:
                            booklang = item['volumeInfo']['language']
                        except KeyError:
                            booklang = "Unknown"

                        # do we care about language?
                        if "All" not in valid_langs:
                            if bookisbn != "":
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(bookisbn) == 13 and bookisbn.startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_979_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            elif (len(bookisbn) == 10) or \
                                                    (len(bookisbn) == 13 and bookisbn.startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_978_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                myDB.action('insert into languages values (?, ?)',
                                                            (isbnhead, booklang))
                                                logger.debug("GB language: " + booklang)

                                    if not match:
                                        # try searching librarything for a language code using the isbn
                                        # if no language found, librarything return value is "invalid" or "unknown"
                                        # librarything returns plain text, not xml
                                        BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + bookisbn
                                        proxies = proxyList()
                                        try:
                                            librarything_wait()
                                            timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
                                            r = requests.get(BOOK_URL, timeout=timeout, proxies=proxies)
                                            resp = r.text
                                            lt_lang_hits += 1
                                            logger.debug(
                                                "LibraryThing reports language [%s] for %s" % (resp, isbnhead))
                                            if resp != 'invalid' and resp != 'unknown':
                                                booklang = resp  # found a language code
                                                match = True
                                                myDB.action('insert into languages values (?, ?)',
                                                            (isbnhead, booklang))
                                                logger.debug("LT language: " + booklang)
                                        except Exception as e:
                                            booklang = ""
                                            logger.error("%s finding language: %s" % (type(e).__name__, str(e)))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]:
                                            # these are all english, may need to expand this list
                                            booknamealt = item['volumeInfo']['title']
                                            logger.debug("%s Google thinks [%s], we think [%s]" %
                                                         (booknamealt, googlelang, booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                booknamealt = item['volumeInfo']['title']
                                logger.debug(
                                    'Skipped [%s] with language %s' %
                                    (booknamealt, booklang))
                                ignored += 1
                                continue

                        try:
                            bookpub = item['volumeInfo']['publisher']
                        except KeyError:
                            bookpub = ""

                        try:
                            booksub = item['volumeInfo']['subtitle']
                        except KeyError:
                            booksub = ""

                        if not booksub:
                            series = ""
                            seriesNum = ""
                        else:
                            try:
                                series = booksub.split('(')[1].split(' Series ')[0]
                            except IndexError:
                                series = ""
                            if series.endswith(')'):
                                series = series[:-1]
                            try:
                                seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0]
                                if seriesNum[0] == '#':
                                    seriesNum = seriesNum[1:]
                            except IndexError:
                                seriesNum = ""

                            if not seriesNum and '#' in series:
                                words = series.rsplit('#', 1)
                                series = words[0].strip()
                                seriesNum = words[1].strip()
                            if not seriesNum and ' ' in series:
                                words = series.rsplit(' ', 1)
                                # has to be unicode for isnumeric()
                                if (u"%s" % words[1]).isnumeric():
                                    series = words[0]
                                    seriesNum = words[1]

                        try:
                            bookdate = item['volumeInfo']['publishedDate']
                        except KeyError:
                            bookdate = '0000-00-00'

                        try:
                            bookimg = item['volumeInfo']['imageLinks']['thumbnail']
                        except KeyError:
                            bookimg = 'images/nocover.png'

                        try:
                            bookrate = item['volumeInfo']['averageRating']
                        except KeyError:
                            bookrate = 0

                        try:
                            bookpages = item['volumeInfo']['pageCount']
                        except KeyError:
                            bookpages = 0

                        try:
                            bookgenre = item['volumeInfo']['categories'][0]
                        except KeyError:
                            bookgenre = ""

                        try:
                            bookdesc = item['volumeInfo']['description']
                        except KeyError:
                            bookdesc = ""

                        rejected = False
                        check_status = False

                        bookname = item['volumeInfo']['title']

                        if not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname))
                            removedResults += 1
                            rejected = True
                        else:
                            bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip()
                            booklink = item['volumeInfo']['canonicalVolumeLink']
                            bookrate = float(bookrate)
                            bookid = item['id']

                            # GoodReads sometimes has multiple bookids for the same book (same author/title, different
                            # editions) and sometimes uses the same bookid if the book is the same but the title is
                            # slightly different. Not sure if googlebooks does too, but we only want one...
                            existing_book = myDB.match('SELECT Status,Manual,BookAdded FROM books WHERE BookID=?',
                                                       (bookid,))
                            if existing_book:
                                book_status = existing_book['Status']
                                locked = existing_book['Manual']
                                added = existing_book['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                book_status = bookstatus  # new_book status, or new_author status
                                added = today()
                                locked = False

                        if not rejected and re.match('[^\w-]', bookname):  # remove books with bad characters in title
                            logger.debug("[%s] removed book for bad characters" % bookname)
                            removedResults += 1
                            rejected = True

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if bookdate > today()[:len(bookdate)]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, bookdate))
                                removedResults += 1
                                rejected = True

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""')))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorname, bookname, bookid))
                                    rejected = True
                                    duplicates += 1

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                            match = myDB.match(cmd, (bookid,))
                            if match:  # we have a book with this bookid already
                                if bookname != match['BookName'] or authorname != match['AuthorName']:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                                 (bookid, authorname, bookname, match['AuthorName'], match['BookName']))
                                else:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                                 (bookid, authorname, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = True

                        if check_status or not rejected:
                            if book_status != "Ignored" and not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": booksub,
                                    "BookDesc": bookdesc,
                                    "BookIsbn": bookisbn,
                                    "BookPub": bookpub,
                                    "BookGenre": bookgenre,
                                    "BookImg": bookimg,
                                    "BookLink": booklink,
                                    "BookRate": bookrate,
                                    "BookPages": bookpages,
                                    "BookDate": bookdate,
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'],
                                    "BookAdded": added
                                }
                                resultcount += 1

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug("Book found: " + bookname + " " + bookdate)
                                updated = False
                                if 'nocover' in bookimg or 'nophoto' in bookimg:
                                    # try to get a cover from librarything
                                    workcover = getBookCover(bookid)
                                    if workcover:
                                        logger.debug('Updated cover for %s to %s' % (bookname, workcover))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True

                                elif bookimg and bookimg.startswith('http'):
                                    link, success = cache_img("book", bookid, bookimg, refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug('Failed to cache image for %s' % bookimg)

                                seriesdict = {}
                                if lazylibrarian.CONFIG['ADD_SERIES']:  # prefer series info from librarything
                                    seriesdict = getWorkSeries(bookid)
                                    if seriesdict:
                                        logger.debug('Updated series: %s [%s]' % (bookid, seriesdict))
                                        updated = True
                                    # librarything doesn't have series info. Any in the title?
                                    elif series:
                                        seriesdict = {cleanName(unaccented(series)): seriesNum}
                                    setSeries(seriesdict, bookid)

                                new_status = setStatus(bookid, seriesdict, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug("[%s] Added book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug("[%s] Updated book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' %
                         (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid,))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)

            logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored)))
            logger.debug("Removed %s bad character or no-name result%s for author" %
                         (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount)))

            myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                        (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())

Example #28

Show file

def TDL(book=None):

    provider = "torrentdownloads"
    host = lazylibrarian.TDL_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host)

    params = {"type": "search", "cid": "2", "search": book['searchterm']}
    searchURL = providerurl + "/rss.xml?%s" % urllib.urlencode(params)

    try:
        request = urllib2.Request(searchURL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST,
                              lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', USER_AGENT)
        data = urllib2.urlopen(request, timeout=90)
    except (socket.timeout) as e:
        logger.debug('Timeout fetching data from %s' % provider)
        data = False
    except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e:
        # may return 404 if no results, not really an error
        if hasattr(e, 'code') and e.code == 404:
            logger.debug(searchURL)
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            if hasattr(e, 'reason'):
                errmsg = e.reason
            else:
                errmsg = str(e)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, errmsg))
        data = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = item['title']
                    seeders = int(item['seeders'])
                    link = item['link']
                    size = int(item['size'])
                    url = None

                    if link and minimumseeders < seeders:
                        # no point requesting the magnet link if not enough seeders
                        request = urllib2.Request(link)
                        if lazylibrarian.PROXY_HOST:
                            request.set_proxy(lazylibrarian.PROXY_HOST,
                                              lazylibrarian.PROXY_TYPE)
                        request.add_header('User-Agent', USER_AGENT)

                        conn = urllib2.urlopen(request, timeout=90)
                        result = conn.read()
                        url = None
                        new_soup = BeautifulSoup(result)
                        for link in new_soup.findAll('a'):
                            output = link.get('href')
                            if output and output.startswith('magnet'):
                                url = output
                                break

                    if minimumseeders < int(seeders):
                        if not url or not title:
                            logger.debug('Missing url or title')
                        else:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error(u"An error occurred in the %s parser: %s" %
                                 (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #29

Show file

def GEN(book=None):

    provider = "libgen"
    host = lazylibrarian.GEN_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    searchURL = url_fix(
        host +
        "/search.php?view=simple&open=0&phrase=0&column=def&res=100&req=" +
        book['searchterm'])

    result, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in result:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
        elif '111' in result:
            # looks like libgen has ip based access limits
            logger.error(
                'Access forbidden. Please wait a while before trying %s again.'
                % provider)
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, result))
        result = False

    results = []

    if result:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        soup = BeautifulSoup(result)
        try:
            table = soup.findAll('table')[2]
            rows = table.findAll('tr')
        except Exception:  # no results = no table in result page
            rows = []

        c1 = []
        c2 = []
        c7 = []
        c8 = []

        if len(rows) > 1:
            for row in rows[1:]:
                if len(row.findAll('td')) > 8:
                    c1.append(row.findAll('td')[1])
                    c2.append(row.findAll('td')[2])
                    c7.append(row.findAll('td')[7])
                    c8.append(row.findAll('td')[8])

        for col1, col2, col7, col8 in zip(c1, c2, c7, c8):
            try:
                author = unaccented(col1.text)
                title = unaccented(
                    str(col2).split('>')[2].split('<')[0].strip())
                link = str(col2).split('href="')[1].split('?')[1].split('"')[0]
                size = unaccented(col7.text).upper()
                extn = col8.text

                try:
                    mult = 1
                    if 'K' in size:
                        size = size.split('K')[0]
                        mult = 1024
                    elif 'M' in size:
                        size = size.split('M')[0]
                        mult = 1024 * 1024
                    size = int(float(size) * mult)
                except (ValueError, IndexError) as e:
                    size = 0

                if link and title:
                    if author:
                        title = author.strip() + ' ' + title.strip()
                    if extn:
                        title = title + '.' + extn

                    bookURL = url_fix(host + "/ads.php?" + link)
                    bookresult, success = fetchURL(bookURL)
                    if not success:
                        # may return 404 if no results, not really an error
                        if '404' in bookresult:
                            logger.debug(u"No results found from %s for %s" %
                                         (provider, book['searchterm']))
                            bookresult = False
                        else:
                            logger.debug(bookURL)
                            logger.debug('Error fetching data from %s: %s' %
                                         (provider, bookresult))
                        bookresult = False
                    if bookresult:
                        url = None
                        new_soup = BeautifulSoup(bookresult)
                        for link in new_soup.findAll('a'):
                            output = link.get('href')
                            if output and output.startswith('/get.php'):
                                url = output
                                break

                        if url:
                            url = url_fix(host + url)
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                            })
                            logger.debug('Found %s, Size %s' % (title, size))

            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" %
                             (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #30

Show file

def LIME(book=None):

    provider = "Limetorrent"
    host = lazylibrarian.LIME_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    searchURL = url_fix(host + "/searchrss/other/?q=" + book['searchterm'])

    try:
        request = urllib2.Request(searchURL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST,
                              lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', USER_AGENT)
        data = urllib2.urlopen(request, timeout=90)
    except (socket.timeout) as e:
        logger.debug('Timeout fetching data from %s' % provider)
        data = False
    except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e:
        # may return 404 if no results, not really an error
        if hasattr(e, 'code') and e.code == 404:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            if hasattr(e, 'reason'):
                errmsg = e.reason
            else:
                errmsg = str(e)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, errmsg))
        data = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(
                            seeders.split('Seeds:')[1].split(',')[0].strip())
                    except (IndexError, ValueError) as e:
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error(
                            u"An error occurred in the %s parser: %s" %
                            (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #31

Show file

def ZOO(book=None):

    provider = "zooqle"
    host = lazylibrarian.ZOO_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/search?q=" + book['searchterm'])

    params = {"category": "books", "fmt": "rss"}
    searchURL = providerurl + "&%s" % urllib.urlencode(params)

    try:
        request = urllib2.Request(searchURL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST,
                              lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', USER_AGENT)
        data = urllib2.urlopen(request, timeout=90)
    except (socket.timeout) as e:
        logger.debug('Timeout fetching data from %s' % provider)
        data = False
    except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e:
        # may return 404 if no results, not really an error
        if hasattr(e, 'code') and e.code == 404:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            if hasattr(e, 'reason'):
                errmsg = e.reason
            else:
                errmsg = str(e)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, errmsg))
        data = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    seeders = int(item['torrent_seeds'])
                    link = item['links'][1]['href']
                    size = int(item['links'][1]['length'])
                    magnet = item['torrent_magneturi']

                    url = None
                    if link:
                        url = link
                    if magnet:  # if both, prefer magnet over torrent
                        url = magnet

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # looks like zooqle has ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error(
                            u"An error occurred in the %s parser: %s" %
                            (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #32

Show file

def oldKAT(book=None):

    provider = "KAT"
    host = lazylibrarian.KAT_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/usearch/" + book['searchterm'])
    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1

    params = {
        "category": "books",
        "field": "seeders",
        "sorder": "desc",
        "rss": "1"
    }
    searchURL = providerurl + "/?%s" % urllib.urlencode(params)

    try:
        request = urllib2.Request(searchURL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST,
                              lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', USER_AGENT)
        data = urllib2.urlopen(request, timeout=90)
    except (socket.timeout) as e:
        logger.debug('Timeout fetching data from %s' % provider)
        data = False
    except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e:
        # seems KAT returns 404 if no results, not really an error
        if hasattr(e, 'code') and e.code == 404:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            if hasattr(e, 'reason'):
                errmsg = e.reason
            else:
                errmsg = str(e)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, errmsg))
        data = False

    results = []

    if data:
        logger.debug(u'Parsing results from <a href="%s">KAT</a>' % searchURL)
        d = feedparser.parse(data)
        if len(d.entries):
            logger.debug(
                u"Found %i result%s from %s for %s, checking seeders" %
                (len(d.entries), plural(len(
                    d.entries)), provider, book['searchterm']))
            for item in d.entries:
                try:
                    title = item['title']

                    seeders = item['torrent_seeds']
                    url = item['links'][1]['href']
                    size = int(item['links'][1]['length'])

                    if minimumseeders < int(seeders):
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': "KAT",
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })

                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug(
                            'Found %s but %s seeder%s' %
                            (title, int(seeders), plural(int(seeders))))

                except Exception as e:
                    logger.error(
                        u"An unknown error occurred in the KAT parser: %s" %
                        str(e))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #33

Show file

def KAT(book=None):

    provider = "KAT"
    host = lazylibrarian.KAT_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/usearch/" + book['searchterm'])

    params = {"category": "books", "field": "seeders", "sorder": "desc"}
    searchURL = providerurl + "/?%s" % urllib.urlencode(params)

    result, success = fetchURL(searchURL)
    if not success:
        # seems KAT returns 404 if no results, not really an error
        if '404' in result:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
            result = False
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, result))
        result = False

    results = []

    if result:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
        soup = BeautifulSoup(result)

        try:
            table = soup.findAll('table')[1]
            rows = table.findAll('tr')
        except Exception:  # no results = no table in result page
            rows = []

        c0 = []
        c1 = []
        c3 = []

        if len(rows) > 1:
            for row in rows[1:]:
                if len(row.findAll('td')) > 3:
                    c0.append(row.findAll('td')[0])
                    c1.append(row.findAll('td')[1])
                    c3.append(row.findAll('td')[3])

        for col0, col1, col3 in zip(c0, c1, c3):
            try:
                title = unaccented(
                    str(col0).split('cellMainLink">')[1].split('<')[0])
                # kat can return magnet or torrent or both. If both, prefer magnet...
                try:
                    url = 'magnet' + str(col0).split('href="magnet')[1].split(
                        '"')[0]
                except IndexError:
                    url = 'http' + str(col0).split('href="http')[1].split(
                        '.torrent?')[0] + '.torrent'

                try:
                    size = str(col1.text).replace('&nbsp;', '').upper()
                    mult = 1
                    if 'K' in size:
                        size = size.split('K')[0]
                        mult = 1024
                    elif 'M' in size:
                        size = size.split('M')[0]
                        mult = 1024 * 1024
                    size = int(float(size) * mult)
                except (ValueError, IndexError):
                    size = 0
                try:
                    seeders = int(col3.text)
                except ValueError:
                    seeders = 0

                if not url or not title:
                    logger.debug('Missing url or title')
                elif minimumseeders < seeders:
                    results.append({
                        'bookid': book['bookid'],
                        'tor_prov': provider,
                        'tor_title': title,
                        'tor_url': url,
                        'tor_size': str(size),
                    })
                    logger.debug('Found %s. Size: %s' % (title, size))
                else:
                    logger.debug('Found %s but %s seeder%s' %
                                 (title, seeders, plural(seeders)))
            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" %
                             (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #34

Show file

def KAT(book=None):

    provider = "KAT"
    host = lazylibrarian.KAT_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/usearch/" + book['searchterm'])

    params = {
        "category": "books",
        "field": "seeders",
        "sorder": "desc"
    }
    searchURL = providerurl + "/?%s" % urllib.urlencode(params)

    result, success = fetchURL(searchURL)
    if not success:
        # seems KAT returns 404 if no results, not really an error
        if '404' in result:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
            result = False
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, result))
        result = False

    results = []

    if result:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
        soup = BeautifulSoup(result)

        try:
            table = soup.findAll('table')[1]
            rows = table.findAll('tr')
        except Exception:  # no results = no table in result page
            rows = []

        c0 = []
        c1 = []
        c3 = []

        if len(rows) > 1:
            for row in rows[1:]:
                if len(row.findAll('td')) > 3:
                    c0.append(row.findAll('td')[0])
                    c1.append(row.findAll('td')[1])
                    c3.append(row.findAll('td')[3])

        for col0, col1, col3 in zip(c0, c1, c3):
            try:
                title = unaccented(str(col0).split('cellMainLink">')[1].split('<')[0])
                # kat can return magnet or torrent or both. If both, prefer magnet...
                try:
                    url = 'magnet' + str(col0).split('href="magnet')[1].split('"')[0]
                except IndexError:
                    url = 'http' + str(col0).split('href="http')[1].split('.torrent?')[0] + '.torrent'

                try:
                    size = str(col1.text).replace('&nbsp;', '').upper()
                    mult = 1
                    if 'K' in size:
                        size = size.split('K')[0]
                        mult = 1024
                    elif 'M' in size:
                        size = size.split('M')[0]
                        mult = 1024 * 1024
                    size = int(float(size) * mult)
                except (ValueError, IndexError):
                    size = 0
                try:
                    seeders = int(col3.text)
                except ValueError:
                    seeders = 0

                if not url or not title:
                    logger.debug('Missing url or title')
                elif minimumseeders < seeders:
                    results.append({
                        'bookid': book['bookid'],
                        'tor_prov': provider,
                        'tor_title': title,
                        'tor_url': url,
                        'tor_size': str(size),
                    })
                    logger.debug('Found %s. Size: %s' % (title, size))
                else:
                    logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))
            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #35

Show file

    def get_shelf_list(self):
        global consumer, client, token, user_id
        if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \
                lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.warn("Goodreads get shelf error: Please authorise first")
            return []
        else:
            #
            # loop over each page of shelves
            #     loop over each shelf
            #         add shelf to list
            #
            consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']),
                                      secret=str(
                                          lazylibrarian.CONFIG['GR_SECRET']))
            token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'],
                                lazylibrarian.CONFIG['GR_OAUTH_SECRET'])
            client = oauth.Client(consumer, token)
            user_id = self.getUserId()

            current_page = 0
            shelves = []
            page_shelves = 1
            while page_shelves:
                current_page = current_page + 1
                page_shelves = 0
                shelf_template = Template(
                    '${base}/shelf/list.xml?user_id=${user_id}&key=${key}&page=${page}'
                )
                body = urllib.urlencode({})
                headers = {'Content-Type': 'application/x-www-form-urlencoded'}
                request_url = shelf_template.substitute(
                    base='https://www.goodreads.com',
                    user_id=user_id,
                    page=current_page,
                    key=lazylibrarian.CONFIG['GR_API'])
                time_now = int(time.time())
                if time_now <= lazylibrarian.LAST_GOODREADS:
                    time.sleep(1)
                    lazylibrarian.LAST_GOODREADS = time_now
                try:
                    response, content = client.request(request_url, 'GET',
                                                       body, headers)
                except Exception as e:
                    logger.error("Exception in client.request: %s %s" %
                                 (type(e).__name__, str(e)))
                    return shelves

                if response['status'] != '200':
                    raise Exception('Failure status: %s for page %s' %
                                    (response['status'], current_page))
                xmldoc = xml.dom.minidom.parseString(content)

                shelf_list = xmldoc.getElementsByTagName('shelves')[0]
                for item in shelf_list.getElementsByTagName('user_shelf'):
                    shelf_name = item.getElementsByTagName(
                        'name')[0].firstChild.nodeValue
                    shelf_count = item.getElementsByTagName(
                        'book_count')[0].firstChild.nodeValue
                    shelf_exclusive = item.getElementsByTagName(
                        'exclusive_flag')[0].firstChild.nodeValue
                    shelves.append({
                        'name': shelf_name,
                        'books': shelf_count,
                        'exclusive': shelf_exclusive
                    })
                    page_shelves += 1

                    if lazylibrarian.LOGLEVEL > 2:
                        logger.debug(
                            'Shelf %s : %s: Exclusive %s' %
                            (shelf_name, shelf_count, shelf_exclusive))

                if lazylibrarian.LOGLEVEL > 2:
                    logger.debug('Found %s shelves on page %s' %
                                 (page_shelves, current_page))

            logger.debug(
                'Found %s shelves on %s page%s' %
                (len(shelves), current_page - 1, plural(current_page - 1)))
            # print shelves
            return shelves

Example #36

Show file

def ZOO(book=None):

    provider = "zooqle"
    host = lazylibrarian.ZOO_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/search?q=" + book['searchterm'])

    params = {
        "category": "books",
        "fmt": "rss"
    }
    searchURL = providerurl + "&%s" % urllib.urlencode(params)

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
        result = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    seeders = int(item['torrent_seeds'])
                    link = item['links'][1]['href']
                    size = int(item['links'][1]['length'])
                    magnet = item['torrent_magneturi']

                    url = None
                    if link:
                        url = link
                    if magnet:  # if both, prefer magnet over torrent
                        url = magnet

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # looks like zooqle has ip based access limits
                        logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                    else:
                        logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #37

Show file

def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select(
                'SELECT Title, Regex, DateType, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
            )
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select(
                    'SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"',
                    (magazine['bookid'], ))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        # logger.debug("Removing old magazine search results")
        # myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' %
                    (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']
            datetype = searchmag['DateType']
            if not datetype:
                datetype = ''

            if not searchterm:
                dic = {
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': ''
                }
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                if PY2:
                    searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({
                "bookid": bookid,
                "searchterm": searchterm,
                "datetype": datetype
            })

        if not searchlist:
            logger.warn(
                'There is nothing to search for.  Mark some magazines as active.'
            )

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No nzb providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG,
                                 0) + 1200 < timenow:
                        logger.warn(
                            'No direct providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No tor providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No rss providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': item[
                                'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace(
                        "'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(
                        nzbsize_temp, 1000
                    )  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) so split into "words"
                    dic = {
                        '.': ' ',
                        '-': ' ',
                        '/': ' ',
                        '+': ' ',
                        '_': ' ',
                        '(': '',
                        ')': '',
                        '[': ' ',
                        ']': ' ',
                        '#': '# '
                    }
                    nzbtitle_formatted = replace_all(nzbtitle, dic).strip()
                    # remove extra spaces if they're in a row
                    nzbtitle_formatted = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_formatted.split(' ')

                    results = myDB.match(
                        'SELECT * from magazines WHERE Title=?', (bookid, ))
                    if not results:
                        logger.debug(
                            'Magazine [%s] does not match search term [%s].' %
                            (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(
                            lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(
                                lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" %
                                             nzbtitle)
                                rejected = True

                        if not rejected:
                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # Check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    word = unaccented(word).lower()
                                    if word:
                                        wlist.append(word)
                                for word in bookid_exploded:
                                    word = unaccented(word).lower()
                                    if word and word not in wlist:
                                        logger.debug(
                                            "Rejecting %s, missing %s" %
                                            (nzbtitle, word))
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        "Magazine title match failed " +
                                        bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug("Magazine title matched " +
                                                 bookid + " for " +
                                                 nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" %
                                             len(nzbtitle_exploded))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG[
                                'BLACKLIST_FAILED']:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG[
                                'BLACKLIST_PROCESSED']:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=?',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(
                                str(results['Reject']).lower())
                            reject_list += getList(
                                lazylibrarian.CONFIG['REJECT_MAGS'])
                            lower_title = unaccented(
                                nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Reject: %s' %
                                                 str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" %
                                                 (nzbtitle_formatted, word))
                                    break

                        if rejected:
                            rejects += 1
                        else:
                            regex_pass, issuedate, year = get_issue_date(
                                nzbtitle_exploded)
                            if regex_pass:
                                logger.debug('Issue %s (regex %s) for %s ' %
                                             (issuedate, regex_pass,
                                              nzbtitle_formatted))
                                datetype_ok = True
                                datetype = book['datetype']
                                if datetype:
                                    # check all wanted parts are in the regex result
                                    # Day Month Year Vol Iss (MM needs two months)

                                    if 'M' in datetype and regex_pass not in [
                                            1, 2, 3, 4, 5, 6, 7, 12
                                    ]:
                                        datetype_ok = False
                                    elif 'D' in datetype and regex_pass not in [
                                            3, 5, 6
                                    ]:
                                        datetype_ok = False
                                    elif 'MM' in datetype and regex_pass not in [
                                            1
                                    ]:  # bi monthly
                                        datetype_ok = False
                                    elif 'V' in datetype and 'I' in datetype and regex_pass not in [
                                            8, 9, 17, 18
                                    ]:
                                        datetype_ok = False
                                    elif 'V' in datetype and regex_pass not in [
                                            2, 10, 11, 12, 13, 14, 17, 18
                                    ]:
                                        datetype_ok = False
                                    elif 'I' in datetype and regex_pass not in [
                                            2, 10, 11, 12, 13, 14, 16, 17, 18
                                    ]:
                                        datetype_ok = False
                                    elif 'Y' in datetype and regex_pass not in [
                                            1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13,
                                            15, 16, 18
                                    ]:
                                        datetype_ok = False
                            else:
                                datetype_ok = False
                                logger.debug(
                                    'Magazine %s not in a recognised date format.'
                                    % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                issuedate = "1970-01-01"  # provide a fake date for bad-date issues

                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped" or "Have"
                            insert_table = "pastissues"
                            comp_date = 0
                            if datetype_ok:
                                control_date = results['IssueDate']
                                logger.debug("Control date: [%s]" %
                                             control_date)
                                if not control_date:  # we haven't got any copies of this magazine yet
                                    # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd
                                    # could perhaps calc differently for weekly, biweekly etc
                                    # For magazines with only an issue number use zero as we can't tell age

                                    if str(issuedate).isdigit():
                                        logger.debug(
                                            'Magazine comparing issue numbers (%s)'
                                            % issuedate)
                                        control_date = 0
                                    elif re.match('\d+-\d\d-\d\d',
                                                  str(issuedate)):
                                        start_time = time.time()
                                        start_time -= int(
                                            lazylibrarian.CONFIG['MAG_AGE']
                                        ) * 24 * 60 * 60  # number of seconds in days
                                        if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                            start_time = 0
                                        control_date = time.strftime(
                                            "%Y-%m-%d",
                                            time.localtime(start_time))
                                        logger.debug(
                                            'Magazine date comparing to %s' %
                                            control_date)
                                    else:
                                        logger.debug(
                                            'Magazine unable to find comparison type [%s]'
                                            % issuedate)
                                        control_date = 0

                                if str(control_date).isdigit() and str(
                                        issuedate).isdigit():
                                    # for issue numbers, check if later than last one we have
                                    if regex_pass in [10, 12, 13] and year:
                                        issuedate = "%s%04d" % (year,
                                                                int(issuedate))
                                    else:
                                        issuedate = str(issuedate).zfill(4)
                                    if not control_date:
                                        comp_date = 1
                                    else:
                                        comp_date = int(issuedate) - int(
                                            control_date)
                                elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                        re.match('\d+-\d\d-\d\d', str(issuedate)):
                                    # only grab a copy if it's newer than the most recent we have,
                                    # or newer than a month ago if we have none
                                    comp_date = datecompare(
                                        issuedate, control_date)
                                else:
                                    # invalid comparison of date and issue number
                                    comp_date = 0
                                    if re.match('\d+-\d\d-\d\d',
                                                str(control_date)):
                                        if regex_pass > 9 and year:
                                            # we assumed it was an issue number, but it could be a date
                                            year = check_int(year, 0)
                                            if regex_pass in [10, 12, 13]:
                                                issuedate = int(issuedate[:4])
                                            issuenum = check_int(issuedate, 0)
                                            if year and 1 <= issuenum <= 12:
                                                issuedate = "%04d-%02d-01" % (
                                                    year, issuenum)
                                                comp_date = datecompare(
                                                    issuedate, control_date)
                                        if not comp_date:
                                            logger.debug(
                                                'Magazine %s failed: Expecting a date'
                                                % nzbtitle_formatted)
                                    else:
                                        logger.debug(
                                            'Magazine %s failed: Expecting issue number'
                                            % nzbtitle_formatted)
                                    if not comp_date:
                                        bad_date += 1
                                        issuedate = "1970-01-01"

                            if issuedate == "1970-01-01":
                                logger.debug(
                                    'This issue of %s is unknown age; skipping.'
                                    % nzbtitle_formatted)
                            elif not datetype_ok:
                                logger.debug(
                                    'This issue of %s not in a wanted date format.'
                                    % nzbtitle_formatted)
                            elif comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + issuedate
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug(
                                        'This issue of %s is new, downloading'
                                        % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' %
                                                 len(issues))
                                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug(
                                        'This issue of %s is already flagged for download'
                                        % issue)
                            else:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug(
                                        'This issue of %s is old; skipping.' %
                                        nzbtitle_formatted)
                                old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match(
                                'SELECT Status from %s WHERE NZBtitle=? and NZBprov=?'
                                % insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug(
                                        '%s is already in %s marked %s' %
                                        (nzbtitle, insert_table,
                                         mag_entry['Status']))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                if insert_table == 'pastissues':
                                    # try to mark ones we've already got
                                    match = myDB.match(
                                        "SELECT * from issues WHERE Title=? AND IssueDate=?",
                                        (bookid, issuedate))
                                    if match:
                                        insert_status = "Have"
                                    else:
                                        insert_status = "Skipped"
                                else:
                                    insert_status = "Wanted"
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": issuedate,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict,
                                            controlValueDict)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Added %s to %s marked %s' %
                                                 (nzbtitle, insert_table,
                                                  insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (
                    total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (
                    old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch, res = TORDownloadMethod(
                            magazine['bookid'], magazine['nzbtitle'],
                            magazine['nzburl'], 'Magazine')
                    elif magazine['nzbmode'] == 'direct':
                        snatch, res = DirectDownloadMethod(
                            magazine['bookid'], magazine['nzbtitle'],
                            magazine['nzburl'], 'Magazine')
                    elif magazine['nzbmode'] == 'nzb':
                        snatch, res = NZBDownloadMethod(
                            magazine['bookid'], magazine['nzbtitle'],
                            magazine['nzburl'], 'Magazine')
                    else:
                        res = 'Unhandled NZBmode [%s] for %s' % (
                            magazine['nzbmode'], magazine["nzburl"])
                        logger.error(res)
                        snatch = 0

                    if snatch:
                        logger.info(
                            'Downloading %s from %s' %
                            (magazine['nzbtitle'], magazine["nzbprov"]))
                        custom_notify_snatch(
                            "%s %s" % (magazine['bookid'], magazine['nzburl']))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']),
                                       magazine["nzbprov"], now()))
                        scheduleJob(action='Start', target='processDir')
                    else:
                        myDB.action(
                            'UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?',
                            (res, magazine["nzburl"]))

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"

Example #38

Show file

def LIME(book=None):

    provider = "Limetorrent"
    host = lazylibrarian.LIME_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    searchURL = url_fix(host + "/searchrss/other/?q=" + book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
        result = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(seeders.split('Seeds:')[1].split(',')[0].strip())
                    except (IndexError, ValueError) as e:
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                    else:
                        logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #39

Show file

    def _RecentBooks(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Recent Books', 'id': 'Recent Books', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=RecentBooks' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Books'))
        cmd = "select BookName,BookID,BookLibrary,BookDate,BookImg,BookDesc,BookAdded,BookFile,AuthorID "
        cmd += "from books where Status='Open' "
        if 'query' in kwargs:
            cmd += "AND BookName LIKE '%" + kwargs['query'] + "%' "
        cmd += "order by BookLibrary DESC"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for book in page:
            mime_type = None
            if book['BookFile']:
                mime_type = mimeType(book['BookFile'])
            elif book['AudioFile']:
                mime_type = mimeType(book['AudioFile'])
            if mime_type:
                title = makeUnicode(book['BookName'])
                entry = {'title': escape(title),
                         'id': escape('book:%s' % book['BookID']),
                         'updated': opdstime(book['BookLibrary']),
                         'href': '%s?cmd=Serve&amp;bookid=%s' % (self.opdsroot, quote_plus(book['BookID'])),
                         'kind': 'acquisition',
                         'rel': 'file',
                         'type': mime_type}
                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    author = myDB.match("SELECT AuthorName from authors WHERE AuthorID='%s'" % book['AuthorID'])
                    author = makeUnicode(author['AuthorName'])
                    entry['image'] = self.searchroot + '/' + book['BookImg']
                    entry['content'] = escape('%s - %s' % (title, book['BookDesc']))
                    entry['author'] = escape('%s' % author)
                else:
                    entry['content'] = escape('%s (%s)' % (title, book['BookAdded']))
                entries.append(entry)

            """
                <link type="application/epub+zip" rel="http://opds-spec.org/acquisition"
                title="EPUB (no images)" length="18552" href="//www.gutenberg.org/ebooks/57490.epub.noimages"/>
                <link type="application/x-mobipocket-ebook" rel="http://opds-spec.org/acquisition"
                title="Kindle (no images)" length="110360" href="//www.gutenberg.org/ebooks/57490.kindle.noimages"/>
            """

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=RecentBooks&amp;index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=RecentBooks&amp;index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s book%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return

Example #40

Show file

def TDL(book=None):

    provider = "torrentdownloads"
    host = lazylibrarian.TDL_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host)

    params = {
        "type": "search",
        "cid": "2",
        "search": book['searchterm']
    }
    searchURL = providerurl + "/rss.xml?%s" % urllib.urlencode(params)

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
        result = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = item['title']
                    seeders = int(item['seeders'])
                    link = item['link']
                    size = int(item['size'])
                    url = None

                    if link and minimumseeders < seeders:
                        # no point requesting the magnet link if not enough seeders
                        # TDL gives us a relative link
                        result, success = fetchURL(providerurl+link)
                        url = None
                        if success:
                            new_soup = BeautifulSoup(result)
                            for link in new_soup.findAll('a'):
                                output = link.get('href')
                                if output and output.startswith('magnet'):
                                    url = output
                                    break

                    if minimumseeders < int(seeders):
                        if not url or not title:
                            logger.debug('Missing url or title')
                        else:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #41

Show file

File: searchrss.py Project: DobyTang/LazyLibrarian

def search_wishlist():
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "SEARCHWISHLIST"

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
            scheduleJob(action='Stop', target='search_wishlist')
            return  # No point in continuing

        # for each item in resultlist, add to database if necessary, and mark as wanted
        logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist))))
        for book in resultlist:
            # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
            # we can just use bookid if goodreads, or try isbn and name matching on author/title if not
            # eg NYTimes wishlist
            if 'E' in book['types']:
                ebook_status = "Wanted"
            else:
                ebook_status = "Skipped"
            if 'A' in book['types']:
                audio_status = "Wanted"
            else:
                audio_status = "Skipped"
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book['rss_bookid']:
                cmd = 'select books.Status as Status,AudioStatus,authors.Status as AuthorStatus,'
                cmd += 'AuthorName,BookName,Requester,AudioRequester from books,authors '
                cmd += 'where books.AuthorID = authors.AuthorID and bookid=?'
                bookmatch = myDB.match(cmd, (book['rss_bookid'],))
                if bookmatch:
                    cmd = 'SELECT SeriesName,Status from series,member '
                    cmd += 'where series.SeriesID=member.SeriesID and member.BookID=?'
                    series = myDB.select(cmd, (book['rss_bookid'],))
                    reject_series = None
                    for ser in series:
                        if ser['Status'] in ['Paused', 'Ignored']:
                            reject_series = {"Name": ser['SeriesName'], "Status": ser['Status']}
                            break
                    bookname = bookmatch['BookName']
                    if bookmatch['Status'] in ['Open', 'Wanted', 'Have']:
                        logger.info('Found book %s, already marked %s' % (bookname, bookmatch['Status']))
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif bookmatch['AuthorStatus'] in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is %s' % (bookname, bookmatch['AuthorStatus']))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif ebook_status == "Wanted":  # skipped/ignored
                        logger.info('Found book %s, marking as "Wanted"' % bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']:
                        logger.info('Found audiobook %s, already marked %s' % (bookname, bookmatch['AudioStatus']))
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif bookmatch['AuthorStatus'] in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is %s' % (bookname, bookmatch['AuthorStatus']))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif audio_status == "Wanted":  # skipped/ignored
                        logger.info('Found audiobook %s, marking as "Wanted"' % bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"AudioStatus": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    import_book(book['rss_bookid'], ebook_status, audio_status)
                    new_books += 1
                    newValueDict = {"Requester": book["dispname"] + ' '}
                    controlValueDict = {"BookID": book['rss_bookid']}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    newValueDict = {"AudioRequester": book["dispname"] + ' '}
                    controlValueDict = {"BookID": book['rss_bookid']}
                    myDB.upsert("books", newValueDict, controlValueDict)
            else:
                item = {}
                results = None
                item['Title'] = book['rss_title']
                if book['rss_bookid']:
                    item['BookID'] = book['rss_bookid']
                if book['rss_isbn']:
                    item['ISBN'] = book['rss_isbn']
                bookmatch = finditem(item, book['rss_author'])
                if bookmatch:  # it's already in the database
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    auth_res = myDB.match('SELECT Status from authors WHERE authorname=?', (authorname,))
                    if auth_res:
                        auth_status = auth_res['Status']
                    else:
                        auth_status = 'Unknown'
                    cmd = 'SELECT SeriesName,Status from series,member '
                    cmd += 'where series.SeriesID=member.SeriesID and member.BookID=?'
                    series = myDB.select(cmd, (book['rss_bookid'],))
                    reject_series = None
                    for ser in series:
                        if ser['Status'] in ['Paused', 'Ignored']:
                            reject_series = {"Name": ser['SeriesName'], "Status": ser['Status']}
                            break
                    if bookmatch['Status'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookmatch['Status']))
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif auth_status in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is "%s"' % (bookname, auth_status))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif ebook_status == 'Wanted':  # skipped/ignored
                        logger.info('Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found audiobook %s by %s, already marked as "%s"' %
                            (bookname, authorname, bookmatch['AudioStatus']))
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif auth_status in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is "%s"' % (bookname, auth_status))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif audio_status == 'Wanted':  # skipped/ignored
                        logger.info('Found audiobook %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"AudioStatus": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                else:  # not in database yet
                    if book['rss_isbn']:
                        results = search_for(book['rss_isbn'])
                    if results:
                        result = results[0]  # type: dict
                        if result['isbn_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info("Found (%s%%) %s: %s" %
                                        (result['isbn_fuzz'], result['authorname'], result['bookname']))
                            import_book(result['bookid'], ebook_status, audio_status)
                            new_books += 1
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": result['bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            bookmatch = True
                    if not results:
                        searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName(book['rss_author']))
                        results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]  # type: dict
                        if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                      result['authorname'], result['bookname']))
                            import_book(result['bookid'], ebook_status, audio_status)
                            new_books += 1
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": result['bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'], book['rss_author'])
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            result = results[0]  # type: dict
                            msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                        result['authorname'], result['bookname'])
                        logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books)))

    except Exception:
        logger.error('Unhandled exception in search_wishlist: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"

Example #42

Show file

File: torrentparser.py Project: cdancette/LazyLibrarian

def ZOO(book=None, test=False):
    errmsg = ''
    provider = "zooqle"
    host = lazylibrarian.CONFIG['ZOO_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/search")

    params = {"q": book['searchterm'], "category": "books", "fmt": "rss"}
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    seeders = int(item['torrent_seeds'])
                    link = item['links'][1]['href']
                    size = int(item['links'][1]['length'])
                    magnet = item['torrent_magneturi']

                    url = None
                    mode = 'torrent'
                    if link:
                        url = link
                        mode = 'torrent'
                    if magnet:
                        if not url or (url and
                                       lazylibrarian.CONFIG['PREFER_MAGNET']):
                            url = magnet
                            mode = 'magnet'

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            mode,
                            'priority':
                            lazylibrarian.CONFIG['ZOO_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # looks like zooqle has ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Example #43

Show file

    def _Author(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        if 'authorid' not in kwargs:
            self.data = self._error_with_message('No Author Provided')
            return
        links = []
        entries = []
        links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Books'))
        author = myDB.match("SELECT AuthorName from authors WHERE AuthorID=?", (kwargs['authorid'],))
        author = makeUnicode(author['AuthorName'])
        cmd = "SELECT BookName,BookDate,BookID,BookAdded,BookDesc,BookImg,BookFile,AudioFile from books WHERE "
        if 'query' in kwargs:
            cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND "
        cmd += "(Status='Open' or AudioStatus='Open') and AuthorID=? order by BookDate DESC"
        results = myDB.select(cmd, (kwargs['authorid'],))
        page = results[index:(index + self.PAGE_SIZE)]

        for book in page:
            mime_type = None
            if book['BookFile']:
                mime_type = mimeType(book['BookFile'])
            elif book['AudioFile']:
                mime_type = mimeType(book['AudioFile'])

            if mime_type:
                entry = {'title': escape('%s (%s)' % (book['BookName'], book['BookDate'])),
                         'id': escape('book:%s' % book['BookID']),
                         'updated': opdstime(book['BookAdded']),
                         'href': '%s?cmd=Serve&amp;bookid=%s' % (self.opdsroot, book['BookID']),
                         'kind': 'acquisition',
                         'rel': 'file',
                         'type': mime_type}
                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    entry['image'] = self.searchroot + '/' + book['BookImg']
                    entry['content'] = escape('%s - %s' % (book['BookName'], book['BookDesc']))
                    entry['author'] = escape('%s' % author)
                else:
                    entry['content'] = escape('%s (%s)' % (book['BookName'], book['BookAdded']))
                entries.append(entry)

        feed = {}
        authorname = '%s (%s)' % (escape(author), len(entries))
        feed['title'] = 'LazyLibrarian OPDS - %s' % authorname
        feed['id'] = 'author:%s' % escape(kwargs['authorid'])
        feed['updated'] = now()
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Authors' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Author&amp;authorid=%s&amp;index=%s' % (self.opdsroot,
                                                                             quote_plus(kwargs['authorid']),
                                                                             index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Author&amp;authorid=%s&amp;index=%s' % (self.opdsroot,
                                                                             quote_plus(kwargs['authorid']),
                                                                             index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))
        feed['links'] = links
        feed['entries'] = entries
        self.data = feed
        logger.debug("Returning %s book%s" % (len(entries), plural(len(entries))))
        return

Example #44

Show file

File: searchnzb.py Project: forge33/LazyLibrarian

def search_nzb_book(books=None, reset=False):
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHNZB"

    if not lazylibrarian.USE_NZB():
        logger.warn('No NEWZNAB/TORZNAB providers set, check config')
        return
    myDB = database.DBConnection()
    searchlist = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select(
            'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" order by BookAdded desc'
        )
    else:
        # The user has added a new book
        searchbooks = []
        for book in books:
            searchbook = myDB.select(
                'SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \
                                     AND Status="Wanted"' % book['bookid'])
            for terms in searchbook:
                searchbooks.append(terms)

    if len(searchbooks) == 0:
        logger.debug("NZB search requested for no books or invalid BookID")
        return
    else:
        logger.info('NZB Searching for %i book%s' %
                    (len(searchbooks), plural(len(searchbooks))))

    for searchbook in searchbooks:
        # searchterm is only used for display purposes
        searchterm = searchbook['AuthorName'] + ' "' + searchbook['BookName']
        if searchbook['BookSub']:
            searchterm = searchterm + ': ' + searchbook['BookSub']
        searchterm = searchterm + '"'

        searchlist.append({
            "bookid": searchbook['BookID'],
            "bookName": searchbook['BookName'],
            "bookSub": searchbook['BookSub'],
            "authorName": searchbook['AuthorName'],
            "searchterm": searchterm
        })

    if not lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE and not lazylibrarian.NZBGET_HOST:
        logger.warn(
            'No download method is set, use SABnzbd/NZBGet or blackhole, check config'
        )

    nzb_count = 0
    for book in searchlist:
        # first attempt, try author/title in category "book"
        resultlist, nproviders = providers.IterateOverNewzNabSites(
            book, 'book')

        if not nproviders:
            logger.warn(
                'No NewzNab or TorzNab providers are set, check config')
            return  # no point in continuing

        found = processResultList(resultlist, book, "book")

        # if you can't find the book, try author/title without any "(extended details, series etc)"
        if not found and '(' in book['bookName']:
            resultlist, nproviders = providers.IterateOverNewzNabSites(
                book, 'shortbook')
            found = processResultList(resultlist, book, "shortbook")

        # if you can't find the book under "books", you might find under general search
        if not found:
            resultlist, nproviders = providers.IterateOverNewzNabSites(
                book, 'general')
            found = processResultList(resultlist, book, "general")

        if not found:
            logger.debug("NZB Searches for %s returned no results." %
                         book['searchterm'])
        if found > True:
            nzb_count = nzb_count + 1  # we found it

    logger.info("NZBSearch for Wanted items complete, found %s book%s" %
                (nzb_count, plural(nzb_count)))

    if reset:
        scheduleJob(action='Restart', target='search_nzb_book')

Example #45

Show file

File: torrentparser.py Project: cdancette/LazyLibrarian

def TDL(book=None, test=False):
    errmsg = ''
    provider = "torrentdownloads"
    host = lazylibrarian.CONFIG['TDL_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host)

    params = {"type": "search", "cid": "2", "search": book['searchterm']}
    searchURL = providerurl + "/rss.xml?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = item['title']
                    seeders = int(item['seeders'])
                    link = item['link']
                    size = int(item['size'])
                    url = None

                    if link and minimumseeders < int(seeders):
                        # no point requesting the magnet link if not enough seeders
                        # TDL gives us a relative link
                        result, success = fetchURL(providerurl + link)
                        if success:
                            new_soup = BeautifulSoup(result, 'html5lib')
                            for link in new_soup.find_all('a'):
                                output = link.get('href')
                                if output and output.startswith('magnet'):
                                    url = output
                                    break

                        if not url or not title:
                            logger.debug('Missing url or title')
                        else:
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                'magnet',
                                'priority':
                                lazylibrarian.CONFIG['TDL_DLPRIORITY']
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" %
                                 (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Example #46

Show file

def TPB(book=None):

    provider = "TPB"
    host = lazylibrarian.TPB_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/s/?q=" + book['searchterm'])

    params = {"category": "601", "page": "0", "orderby": "99"}
    searchURL = providerurl + "&%s" % urllib.urlencode(params)

    result, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in result:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
            result = False
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, result))
        result = False

    results = []

    if result:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
        soup = BeautifulSoup(result)
        try:
            table = soup.findAll('table')[0]
            rows = table.findAll('tr')
        except Exception:  # no results = no table in result page
            rows = []

        c1 = []
        c2 = []

        if len(rows) > 1:
            for row in rows[1:]:
                if len(row.findAll('td')) > 2:
                    c1.append(row.findAll('td')[1])
                    c2.append(row.findAll('td')[2])

        for col1, col2 in zip(c1, c2):
            try:
                title = unaccented(
                    str(col1).split('title=')[1].split('>')[1].split('<')[0])
                magnet = str(col1).split('href="')[1].split('"')[0]
                size = unaccented(col1.text.split(', Size ')[1].split('iB')[0])
                mult = 1
                try:
                    if 'K' in size:
                        size = size.split('K')[0]
                        mult = 1024
                    elif 'M' in size:
                        size = size.split('M')[0]
                        mult = 1024 * 1024
                    size = int(float(size) * mult)
                except (ValueError, IndexError):
                    size = 0
                try:
                    seeders = int(col2.text)
                except ValueError:
                    seeders = 0

                if magnet and minimumseeders < seeders:
                    # no point in asking for magnet link if not enough seeders
                    magurl = '%s/%s' % (host, magnet)
                    result, success = fetchURL(magurl)
                    if not success:
                        logger.debug('Error fetching url %s, %s' %
                                     (magurl, result))
                    else:
                        magnet = None
                        new_soup = BeautifulSoup(result)
                        for link in new_soup.findAll('a'):
                            output = link.get('href')
                            if output and output.startswith('magnet'):
                                magnet = output
                                break
                    if not magnet or not title:
                        logger.debug('Missing magnet or title')
                    else:
                        if minimumseeders < seeders:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': magnet,
                                'tor_size': str(size),
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                        else:
                            logger.debug('Found %s but %s seeder%s' %
                                         (title, seeders, plural(seeders)))
                else:
                    logger.debug('Found %s but %s seeder%s' %
                                 (title, seeders, plural(seeders)))
            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" %
                             (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #47

Show file

File: gb.py Project: kuuratsanik/LazyLibrarian-1

    def find_results(self, searchterm=None, queue=None):
        """ GoogleBooks performs much better if we search for author OR title
            not both at once, so if searchterm is not isbn, two searches needed.
            Lazylibrarian searches use <ll> to separate title from author in searchterm
            If this token isn't present, it's an isbn or searchterm as supplied by user
        """
        try:
            myDB = database.DBConnection()
            resultlist = []
            # See if we should check ISBN field, otherwise ignore it
            api_strings = ['inauthor:', 'intitle:']
            if is_valid_isbn(searchterm):
                api_strings = ['isbn:']

            api_hits = 0

            ignored = 0
            total_count = 0
            no_author_count = 0
            title = ''
            authorname = ''

            if ' <ll> ' in searchterm:  # special token separates title from author
                title, authorname = searchterm.split(' <ll> ')

            fullterm = searchterm.replace(' <ll> ', ' ')
            logger.debug('Now searching Google Books API with searchterm: %s' % fullterm)

            for api_value in api_strings:
                set_url = self.url
                if api_value == "isbn:":
                    set_url = set_url + quote(api_value + searchterm)
                elif api_value == 'intitle:':
                    searchterm = fullterm
                    if title:  # just search for title
                        # noinspection PyUnresolvedReferences
                        title = title.split(' (')[0]  # without any series info
                        searchterm = title
                    searchterm = searchterm.replace("'", "").replace('"', '').strip()  # and no quotes
                    if PY2:
                        searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote(api_value + '"' + searchterm + '"')
                elif api_value == 'inauthor:':
                    searchterm = fullterm
                    if authorname:
                        searchterm = authorname  # just search for author
                    searchterm = searchterm.strip()
                    if PY2:
                        searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote_plus(api_value + '"' + searchterm + '"')

                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0
                try:
                    while startindex < number_results:

                        self.params['startIndex'] = startindex
                        URL = set_url + '&' + urlencode(self.params)

                        try:
                            jsonresults, in_cache = gb_json_request(URL)
                            if not jsonresults:
                                number_results = 0
                            else:
                                if not in_cache:
                                    api_hits += 1
                                number_results = jsonresults['totalItems']
                                logger.debug('Searching url: ' + URL)
                            if number_results == 0:
                                logger.warn('Found no results for %s with value: %s' % (api_value, searchterm))
                                break
                            else:
                                pass
                        except Exception as err:
                            if hasattr(err, 'reason'):
                                errmsg = err.reason
                            else:
                                errmsg = str(err)
                            logger.warn(
                                'Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                            break

                        startindex += 40

                        for item in jsonresults['items']:
                            total_count += 1

                            book = bookdict(item)
                            if not book['author']:
                                logger.debug('Skipped a result without authorfield.')
                                no_author_count += 1
                                continue

                            if not book['name']:
                                logger.debug('Skipped a result without title.')
                                continue

                            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
                            if "All" not in valid_langs:  # don't care about languages, accept all
                                try:
                                    # skip if language is not in valid list -
                                    booklang = book['lang']
                                    if booklang not in valid_langs:
                                        logger.debug(
                                            'Skipped %s with language %s' % (book['name'], booklang))
                                        ignored += 1
                                        continue
                                except KeyError:
                                    ignored += 1
                                    logger.debug('Skipped %s where no language is found' % book['name'])
                                    continue

                            if authorname:
                                author_fuzz = fuzz.ratio(book['author'], authorname)
                            else:
                                author_fuzz = fuzz.ratio(book['author'], fullterm)

                            if title:
                                if title.endswith(')'):
                                    title = title.rsplit('(', 1)[0]
                                book_fuzz = fuzz.token_set_ratio(book['name'], title)
                                # lose a point for each extra word in the fuzzy matches so we get the closest match
                                words = len(getList(book['name']))
                                words -= len(getList(title))
                                book_fuzz -= abs(words)
                            else:
                                book_fuzz = fuzz.token_set_ratio(book['name'], fullterm)

                            isbn_fuzz = 0
                            if is_valid_isbn(fullterm):
                                isbn_fuzz = 100

                            highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz)

                            dic = {':': '.', '"': '', '\'': ''}
                            bookname = replace_all(book['name'], dic)

                            bookname = unaccented(bookname)
                            bookname = bookname.strip()  # strip whitespace

                            AuthorID = ''
                            if book['author']:
                                match = myDB.match(
                                    'SELECT AuthorID FROM authors WHERE AuthorName=?', (book['author'],))
                                if match:
                                    AuthorID = match['AuthorID']

                            resultlist.append({
                                'authorname': book['author'],
                                'authorid': AuthorID,
                                'bookid': item['id'],
                                'bookname': bookname,
                                'booksub': book['sub'],
                                'bookisbn': book['isbn'],
                                'bookpub': book['pub'],
                                'bookdate': book['date'],
                                'booklang': book['lang'],
                                'booklink': book['link'],
                                'bookrate': float(book['rate']),
                                'bookrate_count': book['rate_count'],
                                'bookimg': book['img'],
                                'bookpages': book['pages'],
                                'bookgenre': book['genre'],
                                'bookdesc': book['desc'],
                                'author_fuzz': author_fuzz,
                                'book_fuzz': book_fuzz,
                                'isbn_fuzz': isbn_fuzz,
                                'highest_fuzz': highest_fuzz,
                                'num_reviews': book['ratings']
                            })

                            resultcount += 1

                except KeyError:
                    break

                logger.debug("Returning %s result%s for (%s) with keyword: %s" %
                             (resultcount, plural(resultcount), api_value, searchterm))

            logger.debug("Found %s result%s" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored)))
            logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count)))
            logger.debug('The Google Books API was hit %s time%s for searchterm: %s' %
                         (api_hits, plural(api_hits), fullterm))
            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())

Example #48

Show file

def EXTRA(book=None):

    provider = "Extratorrent"
    host = lazylibrarian.EXTRA_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/rss")

    params = {"type": "search", "s_cat": "2", "search": book['searchterm']}
    searchURL = providerurl + "/?%s" % urllib.urlencode(params)

    try:
        request = urllib2.Request(searchURL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST,
                              lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', USER_AGENT)
        data = urllib2.urlopen(request, timeout=90)
    except (socket.timeout) as e:
        logger.debug('Timeout fetching data from %s' % provider)
        data = False
    except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e:
        # may return 404 if no results, not really an error
        if hasattr(e, 'code') and e.code == 404:
            logger.debug(u"No results found from %s for %s" %
                         (provider, book['searchterm']))
        else:
            logger.debug(searchURL)
            if hasattr(e, 'reason'):
                errmsg = e.reason
            else:
                errmsg = str(e)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, errmsg))
        data = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])

                    try:
                        seeders = int(item['seeders'])
                    except ValueError:
                        seeders = 0

                    try:
                        size = int(item['size'])
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['href']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error(u"An error occurred in the %s parser: %s" %
                                 (provider, str(e)))

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #49

Show file

File: torrentparser.py Project: knobunc/LazyLibrarian

def TDL(book=None, test=False):
    errmsg = ''
    provider = "torrentdownloads"
    host = lazylibrarian.CONFIG['TDL_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host)

    params = {
        "type": "search",
        "cid": "2",
        "search": book['searchterm']
    }
    searchURL = providerurl + "/rss.xml?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = item['title']
                    seeders = int(item['seeders'].replace(',', ''))
                    link = item['link']
                    size = int(item['size'])
                    url = None

                    try:
                        pubdate = item['published']
                    except KeyError:
                        pubdate = None

                    if link and minimumseeders < seeders:
                        # no point requesting the magnet link if not enough seeders
                        # TDL gives us a relative link
                        result, success = fetchURL(providerurl+link)
                        if success:
                            new_soup = BeautifulSoup(result, 'html5lib')
                            for link in new_soup.find_all('a'):
                                output = link.get('href')
                                if output and output.startswith('magnet'):
                                    url = output
                                    break

                        if not url or not title:
                            logger.debug('Missing url or title')
                        else:
                            res = {
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                                'tor_type': 'magnet',
                                'priority': lazylibrarian.CONFIG['TDL_DLPRIORITY']
                            }
                            if pubdate:
                                res['tor_date'] = pubdate
                            logger.debug('Found %s. Size: %s' % (title, size))
                            results.append(res)
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Example #50

Show file

File: librarysync.py Project: forge33/LazyLibrarian

def LibraryScan(startdir=None):
    """ Scan a directory tree adding new books into database
        Return how many books you added """
    if not startdir:
        if not lazylibrarian.DESTINATION_DIR:
            return 0
        else:
            startdir = lazylibrarian.DESTINATION_DIR

    if not os.path.isdir(startdir):
        logger.warn('Cannot find directory: %s. Not scanning' % startdir)
        return 0

    myDB = database.DBConnection()

    # keep statistics of full library scans
    if startdir == lazylibrarian.DESTINATION_DIR:
        myDB.action('DELETE from stats')

    logger.info('Scanning ebook directory: %s' % startdir)

    new_book_count = 0
    file_count = 0
    author = ""

    if lazylibrarian.FULL_SCAN and startdir == lazylibrarian.DESTINATION_DIR:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"'
        )
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not (bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' %
                            (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' %
                            bookID)
                logger.warn('Book %s - %s updated as not found on disk' %
                            (bookAuthor, bookName))

    # to save repeat-scans of the same directory if it contains multiple formats of the same book,
    # keep track of which directories we've already looked at
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace(
        "\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(startdir):
        for directory in d[:]:
            # prevent magazine being scanned
            if directory.startswith("_") or directory.startswith("."):
                d.remove(directory)

        for files in f:
            file_count += 1

            if isinstance(r, str):
                r = r.decode(lazylibrarian.SYS_ENCODING)

            subdirectory = r.replace(startdir, '')
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (
                    subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0
                if is_valid_booktype(files):

                    logger.debug("[%s] Now scanning subdirectory %s" %
                                 (startdir, subdirectory))

                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    extn = os.path.splitext(files)[1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == ".epub") or (extn == ".mobi"):
                        book_filename = os.path.join(
                            r.encode(lazylibrarian.SYS_ENCODING),
                            files.encode(lazylibrarian.SYS_ENCODING))

                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if 'title' in res and 'creator' in res:  # this is the minimum we need
                            match = 1
                            book = res['title']
                            author = res['creator']
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'type' in res:
                                extn = res['type']

                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                         (isbn, language, author, book, extn))
                        else:

                            logger.debug("Book meta incomplete in %s" %
                                         book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref

                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        match = 1
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        if 'identifier' in res:
                            isbn = res['identifier']
                        logger.debug("file meta [%s] [%s] [%s] [%s]" %
                                     (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and is_valid_isbn(isbn):
                            logger.debug("Found Language [%s] ISBN [%s]" %
                                         (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action(
                                'SELECT lang FROM languages where isbn = "%s"'
                                % (isbnhead)).fetchone()
                            if not match:
                                myDB.action(
                                    'insert into languages values ("%s", "%s")'
                                    % (isbnhead, language))
                                logger.debug("Cached Lang [%s] ISBN [%s]" %
                                             (language, isbnhead))
                            else:
                                logger.debug(
                                    "Already cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(',')
                            author = words[1].strip() + ' ' + words[0].strip(
                            )  # "forename surname"
                        if author[1] == ' ':
                            author = author.replace(' ', '.')
                            author = author.replace('..', '.')

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' %
                            author).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn(
                                    "Error finding author id for [%s]" %
                                    author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr['authorname']

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace('.', '_')
                                match_auth = match_auth.replace(' ', '_')
                                match_auth = match_auth.replace('__', '_')
                                match_name = authorname.replace('.', '_')
                                match_name = match_name.replace(' ', '_')
                                match_name = match_name.replace('__', '_')
                                match_name = unaccented(match_name)
                                match_auth = unaccented(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(
                                    match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug(
                                        "Failed to match author [%s] fuzz [%d]"
                                        % (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]"
                                        % (match_auth, match_name))

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >=
                                                              90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr['authorname']
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"'
                                        % author).fetchone()
                                    if not check_exist_author:
                                        logger.info("Adding new author [%s]" %
                                                    author)
                                        try:
                                            addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"'
                                                % author).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug(
                                "Failed to match author [%s] in database" %
                                author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', '').replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)

                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)

                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"'
                                    % bookid).fetchone()
                                if check_status['Status'] != 'Open':
                                    # update status as we've got this book

                                    myDB.action(
                                        'UPDATE books set Status="Open" where BookID="%s"'
                                        % bookid)

                                    book_filename = os.path.join(r, files)

                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open

                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"'
                                        % (book_filename, bookid))

                                    # update cover file to cover.jpg in book folder (if exists)
                                    bookdir = book_filename.rsplit(os.sep,
                                                                   1)[0]
                                    coverimg = os.path.join(
                                        bookdir, 'cover.jpg')
                                    cachedir = os.path.join(
                                        str(lazylibrarian.PROG_DIR), 'data' +
                                        os.sep + 'images' + os.sep + 'cache')
                                    cacheimg = os.path.join(
                                        cachedir, bookid + '.jpg')
                                    if os.path.isfile(coverimg):
                                        copyfile(coverimg, cacheimg)

                                    new_book_count += 1
                            else:
                                logger.debug(
                                    "Failed to match book [%s] by [%s] in database"
                                    % (book, author))

    logger.info("%s new/modified book%s found and added to the database" %
                (new_book_count, plural(new_book_count)))
    logger.info("%s file%s processed" % (file_count, plural(file_count)))

    # show statistics of full library scans
    if startdir == lazylibrarian.DESTINATION_DIR:
        stats = myDB.action(
            "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
                sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats"
        ).fetchone()
        if stats['sum(GR_book_hits)'] is not None:
            # only show stats if new books added
            if lazylibrarian.BOOK_API == "GoogleBooks":
                logger.debug("GoogleBooks was hit %s time%s for books" %
                             (stats['sum(GR_book_hits)'],
                              plural(stats['sum(GR_book_hits)'])))
                logger.debug("GoogleBooks language was changed %s time%s" %
                             (stats['sum(GB_lang_change)'],
                              plural(stats['sum(GB_lang_change)'])))
            if lazylibrarian.BOOK_API == "GoodReads":
                logger.debug("GoodReads was hit %s time%s for books" %
                             (stats['sum(GR_book_hits)'],
                              plural(stats['sum(GR_book_hits)'])))
                logger.debug("GoodReads was hit %s time%s for languages" %
                             (stats['sum(GR_lang_hits)'],
                              plural(stats['sum(GR_lang_hits)'])))
            logger.debug("LibraryThing was hit %s time%s for languages" %
                         (stats['sum(LT_lang_hits)'],
                          plural(stats['sum(LT_lang_hits)'])))
            logger.debug(
                "Language cache was hit %s time%s" %
                (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)'])))
            logger.debug(
                "Unwanted language removed %s book%s" %
                (stats['sum(bad_lang)'], plural(stats['sum(bad_lang)'])))
            logger.debug(
                "Unwanted characters removed %s book%s" %
                (stats['sum(bad_char)'], plural(stats['sum(bad_char)'])))
            logger.debug(
                "Unable to cache %s book%s with missing ISBN" %
                (stats['sum(uncached)'], plural(stats['sum(uncached)'])))
            logger.debug(
                "Found %s duplicate book%s" %
                (stats['sum(duplicates)'], plural(stats['sum(duplicates)'])))
            logger.debug(
                "Cache %s hit%s, %s miss" %
                (lazylibrarian.CACHE_HIT, plural(
                    lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS))
            cachesize = myDB.action(
                "select count('ISBN') as counter from languages").fetchone()
            logger.debug("ISBN Language cache holds %s entries" %
                         cachesize['counter'])
            nolang = len(
                myDB.select(
                    'select BookID from Books where status="Open" and BookLang="Unknown"'
                ))
            if nolang:
                logger.warn(
                    "Found %s book%s in your library with unknown language" %
                    (nolang, plural(nolang)))

        authors = myDB.select('select AuthorID from authors')
        # Update bookcounts for all authors, not just new ones - refresh may have located
        # new books for existing authors especially if switched provider gb/gr
    else:
        # single author/book import
        authors = myDB.select(
            'select AuthorID from authors where AuthorName = "%s"' % author)

    logger.debug('Updating bookcounts for %i author%s' %
                 (len(authors), plural(len(authors))))
    for author in authors:
        update_totals(author['AuthorID'])

    images = myDB.select(
        'select bookid, bookimg, bookname from books where bookimg like "http%"'
    )
    if len(images):
        logger.info("Caching cover%s for %i book%s" %
                    (plural(len(images)), len(images), plural(len(images))))
        for item in images:
            bookid = item['bookid']
            bookimg = item['bookimg']
            bookname = item['bookname']
            newimg = cache_cover(bookid, bookimg)
            if newimg is not None:
                myDB.action('update books set BookImg="%s" where BookID="%s"' %
                            (newimg, bookid))

    images = myDB.select(
        'select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"'
    )
    if len(images):
        logger.info("Caching image%s for %i author%s" %
                    (plural(len(images)), len(images), plural(len(images))))
        for item in images:
            authorid = item['authorid']
            authorimg = item['authorimg']
            authorname = item['authorname']
            newimg = cache_cover(authorid, authorimg)
            if newimg is not None:
                myDB.action(
                    'update authors set AuthorImg="%s" where AuthorID="%s"' %
                    (newimg, authorid))
    setWorkPages()
    logger.info('Library scan complete')
    return new_book_count

Example #51

Show file

def EXTRA(book=None):

    provider = "Extratorrent"
    host = lazylibrarian.EXTRA_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/rss")

    params = {
        "type": "search",
        "s_cat": "2",
        "search": book['searchterm']
    }
    searchURL = providerurl + "/?%s" % urllib.urlencode(params)

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
        else:
            logger.debug('Error fetching data from %s: %s' % (provider, data))
        data = False

    results = []

    minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
    if data:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])

                    try:
                        seeders = int(item['seeders'])
                    except ValueError:
                        seeders = 0

                    try:
                        size = int(item['size'])
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['href']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #52

Show file

def RSS(host=None, feednr=None, priority=0):
    """
    Generic RSS query function, just return all the results from the RSS feed in a list
    """
    results = []

    if not str(host)[:4] == "http":
        host = 'http://' + host

    URL = host

    result, success = fetchURL(URL)
    if success:
        data = feedparser.parse(result)
    else:
        logger.error('Error fetching data from %s: %s' % (host, result))
        BlockProvider(host, result)
        data = None

    if data:
        # to debug because of api
        logger.debug('Parsing results from %s' % URL)
        provider = data['feed']['link']
        logger.debug("RSS %s returned %i result%s" %
                     (provider, len(data.entries), plural(len(data.entries))))
        for post in data.entries:
            title = None
            magnet = None
            size = None
            torrent = None
            nzb = None
            url = None
            tortype = 'torrent'

            if 'title' in post:
                title = post.title
            if 'links' in post:
                for f in post.links:
                    if 'x-bittorrent' in f['type']:
                        size = f['length']
                        torrent = f['href']
                        break
                    if 'x-nzb' in f['type']:
                        size = f['length']
                        nzb = f['href']
                        break

            if 'torrent_magneturi' in post:
                magnet = post.torrent_magneturi

            if torrent:
                url = torrent
                tortype = 'torrent'

            if magnet:
                if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']):
                    url = magnet
                    tortype = 'magnet'

            if nzb:  # prefer nzb over torrent/magnet
                url = nzb
                tortype = 'nzb'

            if not url:
                if 'link' in post:
                    url = post.link

            tor_date = 'Fri, 01 Jan 1970 00:00:00 +0100'
            if 'newznab_attr' in post:
                if post.newznab_attr['name'] == 'usenetdate':
                    tor_date = post.newznab_attr['value']

            if not size:
                size = 1000
            if title and url:
                results.append({
                    'tor_prov': provider,
                    'tor_title': title,
                    'tor_url': url,
                    'tor_size': str(size),
                    'tor_date': tor_date,
                    'tor_feed': feednr,
                    'tor_type': tortype,
                    'priority': priority
                })
    else:
        logger.debug('No data returned from %s' % host)
    return results

Example #53

Show file

def TPB(book=None):

    provider = "TPB"
    host = lazylibrarian.TPB_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    providerurl = url_fix(host + "/s/?q=" + book['searchterm'])

    params = {
        "category": "601",
        "page": "0",
        "orderby": "99"
    }
    searchURL = providerurl + "&%s" % urllib.urlencode(params)

    result, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in result:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
            result = False
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, result))
        result = False

    results = []

    if result:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1
        soup = BeautifulSoup(result)
        try:
            table = soup.findAll('table')[0]
            rows = table.findAll('tr')
        except Exception:   # no results = no table in result page
            rows = []

        c1 = []
        c2 = []

        if len(rows) > 1:
            for row in rows[1:]:
                if len(row.findAll('td')) > 2:
                    c1.append(row.findAll('td')[1])
                    c2.append(row.findAll('td')[2])

        for col1, col2 in zip(c1, c2):
            try:
                title = unaccented(str(col1).split('title=')[1].split('>')[1].split('<')[0])
                magnet = str(col1).split('href="')[1].split('"')[0]
                size = unaccented(col1.text.split(', Size ')[1].split('iB')[0])
                mult = 1
                try:
                    if 'K' in size:
                        size = size.split('K')[0]
                        mult = 1024
                    elif 'M' in size:
                        size = size.split('M')[0]
                        mult = 1024 * 1024
                    size = int(float(size) * mult)
                except (ValueError, IndexError):
                    size = 0
                try:
                    seeders = int(col2.text)
                except ValueError:
                    seeders = 0

                if magnet and minimumseeders < seeders:
                    # no point in asking for magnet link if not enough seeders
                    magurl = '%s/%s' % (host, magnet)
                    result, success = fetchURL(magurl)
                    if not success:
                        logger.debug('Error fetching url %s, %s' % (magurl, result))
                    else:
                        magnet = None
                        new_soup = BeautifulSoup(result)
                        for link in new_soup.findAll('a'):
                            output = link.get('href')
                            if output and output.startswith('magnet'):
                                magnet = output
                                break
                    if not magnet or not title:
                        logger.debug('Missing magnet or title')
                    else:
                        if minimumseeders < seeders:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': magnet,
                                'tor_size': str(size),
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                        else:
                            logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))
                else:
                    logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))
            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #54

Show file

def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None):
    """
    Generic NewzNabplus query function
    takes in host+key+type and returns the result set regardless of who
    based on site running NewzNab+
    ref http://usenetreviewz.com/nzb-sites/
    """

    host = provider['HOST']
    api_key = provider['API']
    logger.debug(
        '[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]'
        % (searchType, host, searchMode, api_key, str(book)))

    results = []

    params = ReturnSearchTypeStructure(provider, api_key, book, searchType,
                                       searchMode)

    if params:
        if not str(host)[:4] == "http":
            host = 'http://' + host
        if host[-1:] == '/':
            host = host[:-1]
        URL = host + '/api?' + urllib.urlencode(params)

        sterm = book['searchterm']
        if isinstance(sterm, str) and hasattr(sterm, "decode"):
            sterm = sterm.decode('utf-8')

        rootxml = None
        logger.debug("[NewzNabPlus] URL = %s" % URL)
        result, success = fetchURL(URL)
        if success:
            try:
                rootxml = ElementTree.fromstring(result)
            except Exception as e:
                logger.error('Error parsing data from %s: %s %s' %
                             (host, type(e).__name__, str(e)))
                rootxml = None
        else:
            if not result or result == "''":
                result = "Got an empty response"
            logger.error('Error reading data from %s: %s' % (host, result))
            BlockProvider(host, result)

        if rootxml is not None:
            # to debug because of api
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (URL, host))

            if rootxml.tag == 'error':
                errormsg = rootxml.get('description', default='unknown error')
                logger.error("%s - %s" % (host, errormsg))
                # maybe the host doesn't support the search type
                match = False
                if (provider['BOOKSEARCH'] and searchType in ["book", "shortbook"]) or \
                        (provider['AUDIOSEARCH'] and searchType in ["audio", "shortaudio"]):
                    errorlist = [
                        'no such function', 'unknown parameter',
                        'unknown function', 'bad request',
                        'incorrect parameter', 'does not support'
                    ]
                    for item in errorlist:
                        if item in errormsg.lower():
                            match = True
                    if match:
                        count = 0
                        if searchType in ["book", "shortbook"]:
                            msg = 'BOOKSEARCH'
                        elif searchType in ["audio", "shortaudio"]:
                            msg = 'AUDIOSEARCH'
                        else:
                            msg = ''
                        if not msg:
                            logger.error(
                                'Error trying to disable searchtype [%s] for %s'
                                % (searchType, host))
                        else:
                            while count < len(lazylibrarian.NEWZNAB_PROV):
                                if lazylibrarian.NEWZNAB_PROV[count][
                                        'HOST'] == provider['HOST']:
                                    if str(provider['MANUAL']) == 'False':
                                        logger.error("Disabled %s=%s for %s" %
                                                     (msg, provider[msg],
                                                      provider['HOST']))
                                        lazylibrarian.NEWZNAB_PROV[count][
                                            msg] = ""
                                        threadname = threading.currentThread(
                                        ).name
                                        lazylibrarian.config_write()
                                        threading.currentThread(
                                        ).name = threadname
                                    else:
                                        logger.error(
                                            "Unable to disable %s for %s [MANUAL=%s]"
                                            % (msg, provider['HOST'],
                                               provider['MANUAL']))
                                count += 1
                if not match:
                    BlockProvider(provider['HOST'], errormsg)
            else:
                resultxml = rootxml.getiterator('item')
                nzbcount = 0
                maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0)
                for nzb in resultxml:
                    try:
                        thisnzb = ReturnResultsFieldsBySearchType(
                            book, nzb, host, searchMode,
                            provider['DLPRIORITY'])
                        if not maxage:
                            nzbcount += 1
                            results.append(thisnzb)
                        else:
                            # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200
                            nzbdate = thisnzb['nzbdate']
                            try:
                                parts = nzbdate.split(' ')
                                nzbdate = ' '.join(
                                    parts[:5])  # strip the +0200
                                dt = datetime.datetime.strptime(
                                    nzbdate,
                                    "%a, %d %b %Y %H:%M:%S").timetuple()
                                nzbage = age(
                                    '%04d-%02d-%02d' %
                                    (dt.tm_year, dt.tm_mon, dt.tm_mday))
                            except Exception as e:
                                logger.debug(
                                    'Unable to get age from [%s] %s %s' %
                                    (thisnzb['nzbdate'], type(e).__name__,
                                     str(e)))
                                nzbage = 0
                            if nzbage <= maxage:
                                nzbcount += 1
                                results.append(thisnzb)
                            else:
                                logger.debug('%s is too old (%s day%s)' %
                                             (thisnzb['nzbtitle'], nzbage,
                                              plural(nzbage)))

                    except IndexError:
                        logger.debug('No results from %s for %s' %
                                     (host, sterm))
                logger.debug('Found %s nzb at %s for: %s' %
                             (nzbcount, host, sterm))
        else:
            logger.debug('No data returned from %s for %s' % (host, sterm))
    return results

Example #55

Show file

def GEN(book=None):

    provider = "libgen"
    host = lazylibrarian.GEN_HOST
    if not str(host)[:4] == "http":
        host = 'http://' + host

    searchURL = url_fix(host + "/search.php?view=simple&open=0&phrase=0&column=def&res=100&req=" +
                        book['searchterm'])

    result, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in result:
            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
        elif '111' in result:
            # looks like libgen has ip based access limits
            logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, result))
        result = False

    results = []

    if result:
        logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        soup = BeautifulSoup(result)
        try:
            table = soup.findAll('table')[2]
            rows = table.findAll('tr')
        except Exception:  # no results = no table in result page
            rows = []

        c1 = []
        c2 = []
        c7 = []
        c8 = []

        if len(rows) > 1:
            for row in rows[1:]:
                if len(row.findAll('td')) > 8:
                    c1.append(row.findAll('td')[1])
                    c2.append(row.findAll('td')[2])
                    c7.append(row.findAll('td')[7])
                    c8.append(row.findAll('td')[8])

        for col1, col2, col7, col8 in zip(c1, c2, c7, c8):
            try:
                author = unaccented(col1.text)
                title = unaccented(str(col2).split('>')[2].split('<')[0].strip())
                link = str(col2).split('href="')[1].split('?')[1].split('"')[0]
                size = unaccented(col7.text).upper()
                extn = col8.text

                try:
                    mult = 1
                    if 'K' in size:
                        size = size.split('K')[0]
                        mult = 1024
                    elif 'M' in size:
                        size = size.split('M')[0]
                        mult = 1024 * 1024
                    size = int(float(size) * mult)
                except (ValueError, IndexError) as e:
                    size = 0

                if link and title:
                    if author:
                        title = author.strip() + ' ' + title.strip()
                    if extn:
                        title = title + '.' + extn

                    bookURL = url_fix(host + "/ads.php?" + link)
                    bookresult, success = fetchURL(bookURL)
                    if not success:
                        # may return 404 if no results, not really an error
                        if '404' in bookresult:
                            logger.debug(u"No results found from %s for %s" % (provider, book['searchterm']))
                        else:
                            logger.debug(bookURL)
                            logger.debug('Error fetching data from %s: %s' % (provider, bookresult))
                        bookresult = False
                    if bookresult:
                        url = None
                        new_soup = BeautifulSoup(bookresult)
                        for link in new_soup.findAll('a'):
                            output = link.get('href')
                            if output and output.startswith('/get.php'):
                                url = output
                                break

                        if url:
                            url = url_fix(host + url)
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                            })
                            logger.debug('Found %s, Size %s' % (title, size))

            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e)))

    logger.debug(u"Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, book['searchterm']))
    return results

Example #56

Show file

def GEN(book=None, prov=None, test=False):
    errmsg = ''
    provider = "libgen.io"
    if prov is None:
        prov = 'GEN'
    host = lazylibrarian.CONFIG[prov + '_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    search = lazylibrarian.CONFIG[prov + '_SEARCH']
    if not search or not search.endswith('.php'):
        search = 'search.php'
    if 'index.php' not in search and 'search.php' not in search:
        search = 'search.php'
    if search[0] == '/':
        search = search[1:]

    sterm = makeUnicode(book['searchterm'])

    page = 1
    results = []
    next_page = True

    while next_page:
        if 'index.php' in search:
            params = {
                "s": book['searchterm'],
                "f_lang": "All",
                "f_columns": 0,
                "f_ext": "All"
            }
        else:
            params = {
                "view": "simple",
                "open": 0,
                "phrase": 0,
                "column": "def",
                "res": 100,
                "req": book['searchterm']
            }

        if page > 1:
            params['page'] = page

        providerurl = url_fix(host + "/%s" % search)
        searchURL = providerurl + "?%s" % urllib.urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" %
                             (provider, sterm))
                success = True
            elif '111' in result:
                # looks like libgen has ip based access limits
                logger.error(
                    'Access forbidden. Please wait a while before trying %s again.'
                    % provider)
                errmsg = result
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching page data from %s: %s' %
                             (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (searchURL, provider))
            try:
                soup = BeautifulSoup(result, 'html5lib')
                try:
                    table = soup.find_all('table')[2]  # un-named table
                    if table:
                        rows = table.find_all('tr')
                except IndexError:  # no results table in result page
                    rows = []

                if 'search.php' in search and len(rows) > 1:
                    rows = rows[1:]

                for row in rows:
                    author = ''
                    title = ''
                    size = ''
                    extn = ''
                    link = ''
                    td = row.find_all('td')
                    if 'index.php' in search and len(td) > 3:
                        try:
                            author = formatAuthorName(td[0].text)
                            title = td[2].text
                            newsoup = BeautifulSoup(str(td[4]), 'html5lib')
                            data = newsoup.find('a')
                            link = data.get('href')
                            extn = data.text.split('(')[0]
                            size = data.text.split('(')[1].split(')')[0]
                            size = size.upper()
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen index.php results: %s' %
                                str(e))

                    elif 'search.php' in search and len(td) > 8:
                        try:
                            author = formatAuthorName(td[1].text)
                            title = td[2].text
                            size = td[7].text.upper()
                            extn = td[8].text
                            newsoup = BeautifulSoup(str(td[2]), 'html5lib')
                            link = newsoup.get('href')
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen search.php results; %s' %
                                str(e))

                    if not size:
                        size = 0
                    else:
                        try:
                            mult = 1
                            if 'K' in size:
                                size = size.split('K')[0]
                                mult = 1024
                            elif 'M' in size:
                                size = size.split('M')[0]
                                mult = 1024 * 1024
                            elif 'G' in size:
                                size = size.split('G')[0]
                                mult = 1024 * 1024 * 1024
                            size = int(float(size) * mult)
                        except (ValueError, IndexError):
                            size = 0

                    if link and title:
                        if author:
                            title = author.strip() + ' ' + title.strip()
                        if extn:
                            title = title + '.' + extn

                        if not link.startswith('http'):
                            if "/ads.php?" in link:
                                url = url_fix(host + link)
                            else:
                                url = url_fix(host + "/ads.php?" + link)
                        else:
                            url = redirect_url(host, link)

                        bookresult, success = fetchURL(url)
                        if not success:
                            # may return 404 if no results, not really an error
                            if '404' in bookresult:
                                logger.debug(
                                    "No results found from %s for %s" %
                                    (provider, sterm))
                            else:
                                logger.debug(url)
                                logger.debug(
                                    'Error fetching link data from %s: %s' %
                                    (provider, bookresult))
                                errmsg = bookresult
                            bookresult = False

                        if bookresult:
                            url = None
                            try:
                                new_soup = BeautifulSoup(
                                    bookresult, 'html5lib')
                                for link in new_soup.find_all('a'):
                                    output = link.get('href')
                                    if output:
                                        if output.startswith(
                                                'http'
                                        ) and '/get.php' in output:
                                            url = output
                                            break
                                        elif '/get.php' in output:
                                            url = '/get.php' + output.split(
                                                '/get.php')[1]
                                            break
                                        elif '/download/book' in output:
                                            url = '/download/book' + output.split(
                                                '/download/book')[1]
                                            break

                                if url and not url.startswith('http'):
                                    url = url_fix(host + url)
                                else:
                                    url = redirect_url(host, url)
                            except Exception as e:
                                logger.debug(
                                    '%s parsing bookresult for %s: %s' %
                                    (type(e).__name__, link, str(e)))
                                url = None

                        if url:
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider + '/' + search,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                'direct',
                                'priority':
                                lazylibrarian.CONFIG[prov + '_DLPRIORITY']
                            })
                            logger.debug('Found %s, Size %s' % (title, size))
                        next_page = True

            except Exception as e:
                logger.error("An error occurred in the %s parser: %s" %
                             (provider, str(e)))
                logger.debug('%s: %s' % (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn(
                'Maximum results page search reached, still more results available'
            )
            next_page = False

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

Example #57

Show file

File: searchrss.py Project: DobyTang/LazyLibrarian

def search_rss_book(books=None, library=None):
    """
    books is a list of new books to add, or None for backlog search
    library is "eBook" or "AudioBook" or None to search all book types
    """
    if not (lazylibrarian.USE_RSS()):
        logger.warn('RSS search is disabled')
        scheduleJob(action='Stop', target='search_rss_book')
        return
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if not books:
                threading.currentThread().name = "SEARCHALLRSS"
            else:
                threading.currentThread().name = "SEARCHRSS"

        myDB = database.DBConnection()

        searchbooks = []
        if not books:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus '
            cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") '
            cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc'
            results = myDB.select(cmd)
            for terms in results:
                searchbooks.append(terms)
        else:
            # The user has added a new book
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus '
                cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID'
                results = myDB.select(cmd, (book['bookid'],))
                for terms in results:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            logger.debug("SearchRSS - No books to search for")
            return

        resultlist, nproviders, _ = IterateOverRSSSites()
        if not nproviders:
            logger.warn('No rss providers are available')
            scheduleJob(action='Stop', target='search_rss_book')
            return  # No point in continuing

        logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks))))

        searchlist = []
        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            if library is None or library == 'eBook':
                if searchbook['Status'] == "Wanted":
                    cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="eBook" and Status="Snatched"'
                    snatched = myDB.match(cmd, (searchbook["BookID"],))
                    if snatched:
                        logger.warn('eBook %s %s already marked snatched in wanted table' %
                                    (searchbook['AuthorName'], searchbook['BookName']))
                    else:
                        searchlist.append(
                            {"bookid": searchbook['BookID'],
                             "bookName": searchbook['BookName'],
                             "bookSub": searchbook['BookSub'],
                             "authorName": searchbook['AuthorName'],
                             "library": "eBook",
                             "searchterm": searchterm})

            if library is None or library == 'AudioBook':
                if searchbook['AudioStatus'] == "Wanted":
                    cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="AudioBook" and Status="Snatched"'
                    snatched = myDB.match(cmd, (searchbook["BookID"],))
                    if snatched:
                        logger.warn('AudioBook %s %s already marked snatched in wanted table' %
                                    (searchbook['AuthorName'], searchbook['BookName']))
                    else:
                        searchlist.append(
                            {"bookid": searchbook['BookID'],
                             "bookName": searchbook['BookName'],
                             "bookSub": searchbook['BookSub'],
                             "authorName": searchbook['AuthorName'],
                             "library": "AudioBook",
                             "searchterm": searchterm})

        rss_count = 0
        for book in searchlist:
            if book['library'] == 'AudioBook':
                searchtype = 'audio'
            else:
                searchtype = 'book'
            found = processResultList(resultlist, book, searchtype, 'rss')

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:  # anything to shorten?
                searchtype = 'short' + searchtype
                found = processResultList(resultlist, book, searchtype, 'rss')

            if not found:
                logger.info("RSS Searches for %s %s returned no results." % (book['library'], book['searchterm']))
            if found > 1:
                rss_count += 1

        logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count)))

    except Exception:
        logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"

Example #58

Show file

File: common.py Project: kuuratsanik/LazyLibrarian

def cleanCache():
    """ Remove unused files from the cache - delete if expired or unused.
        Check JSONCache  WorkCache  XMLCache  SeriesCache Author  Book
        Check covers and authorimages referenced in the database exist and change database entry if missing """

    myDB = database.DBConnection()
    result = []
    cache = os.path.join(lazylibrarian.CACHEDIR, "JSONCache")
    # ensure directory is unicode so we get unicode results from listdir
    if isinstance(cache, str):
        cache = cache.decode(lazylibrarian.SYS_ENCODING)
    cleaned = 0
    kept = 0
    if os.path.isdir(cache):
        for cached_file in os.listdir(cache):
            target = os.path.join(cache, cached_file)
            cache_modified_time = os.stat(target).st_mtime
            time_now = time.time()
            if cache_modified_time < time_now - (
                    lazylibrarian.CONFIG['CACHE_AGE'] * 24 * 60 *
                    60):  # expire after this many seconds
                # Cache is old, delete entry
                os.remove(target)
                cleaned += 1
            else:
                kept += 1
    msg = "Cleaned %i file%s from JSONCache, kept %i" % (cleaned,
                                                         plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)

    cache = os.path.join(lazylibrarian.CACHEDIR, "XMLCache")
    # ensure directory is unicode so we get unicode results from listdir
    if isinstance(cache, str):
        cache = cache.decode(lazylibrarian.SYS_ENCODING)
    cleaned = 0
    kept = 0
    if os.path.isdir(cache):
        for cached_file in os.listdir(cache):
            target = os.path.join(cache, cached_file)
            cache_modified_time = os.stat(target).st_mtime
            time_now = time.time()
            if cache_modified_time < time_now - (
                    lazylibrarian.CONFIG['CACHE_AGE'] * 24 * 60 *
                    60):  # expire after this many seconds
                # Cache is old, delete entry
                os.remove(target)
                cleaned += 1
            else:
                kept += 1
    msg = "Cleaned %i file%s from XMLCache, kept %i" % (cleaned,
                                                        plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)

    cache = os.path.join(lazylibrarian.CACHEDIR, "WorkCache")
    # ensure directory is unicode so we get unicode results from listdir
    if isinstance(cache, str):
        cache = cache.decode(lazylibrarian.SYS_ENCODING)
    cleaned = 0
    kept = 0
    if os.path.isdir(cache):
        for cached_file in os.listdir(cache):
            target = os.path.join(cache, cached_file)
            try:
                bookid = cached_file.split('.')[0]
            except IndexError:
                logger.error('Clean Cache: Error splitting %s' % cached_file)
                continue
            item = myDB.match('select BookID from books where BookID="%s"' %
                              bookid)
            if not item:
                # WorkPage no longer referenced in database, delete cached_file
                os.remove(target)
                cleaned += 1
            else:
                kept += 1
    msg = "Cleaned %i file%s from WorkCache, kept %i" % (cleaned,
                                                         plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)

    cache = os.path.join(lazylibrarian.CACHEDIR, "SeriesCache")
    # ensure directory is unicode so we get unicode results from listdir
    if isinstance(cache, str):
        cache = cache.decode(lazylibrarian.SYS_ENCODING)
    cleaned = 0
    kept = 0
    if os.path.isdir(cache):
        for cached_file in os.listdir(cache):
            target = os.path.join(cache, cached_file)
            try:
                seriesid = cached_file.split('.')[0]
            except IndexError:
                logger.error('Clean Cache: Error splitting %s' % cached_file)
                continue
            item = myDB.match(
                'select SeriesID from series where SeriesID="%s"' % seriesid)
            if not item:
                # SeriesPage no longer referenced in database, delete cached_file
                os.remove(target)
                cleaned += 1
            else:
                kept += 1
    msg = "Cleaned %i file%s from SeriesCache, kept %i" % (
        cleaned, plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)

    cache = lazylibrarian.CACHEDIR
    cleaned = 0
    kept = 0
    cachedir = os.path.join(cache, 'author')
    if os.path.isdir(cachedir):
        for cached_file in os.listdir(cachedir):
            target = os.path.join(cachedir, cached_file)
            if os.path.isfile(target):
                try:
                    imgid = cached_file.split('.')[0].rsplit(os.sep)[-1]
                except IndexError:
                    logger.error('Clean Cache: Error splitting %s' %
                                 cached_file)
                    continue
                item = myDB.match(
                    'select AuthorID from authors where AuthorID="%s"' % imgid)
                if not item:
                    # Author Image no longer referenced in database, delete cached_file
                    os.remove(target)
                    cleaned += 1
                else:
                    kept += 1
    cachedir = os.path.join(cache, 'book')
    if os.path.isdir(cachedir):
        for cached_file in os.listdir(cachedir):
            target = os.path.join(cachedir, cached_file)
            if os.path.isfile(target):
                try:
                    imgid = cached_file.split('.')[0].rsplit(os.sep)[-1]
                except IndexError:
                    logger.error('Clean Cache: Error splitting %s' %
                                 cached_file)
                    continue
                item = myDB.match(
                    'select BookID from books where BookID="%s"' % imgid)
                if not item:
                    # Book Image no longer referenced in database, delete cached_file
                    os.remove(target)
                    cleaned += 1
                else:
                    kept += 1

    # at this point there should be no more .jpg files in the root of the cachedir
    # any that are still there are for books/authors deleted from database
    for cached_file in os.listdir(cache):
        if cached_file.endswith('.jpg'):
            os.remove(os.path.join(cache, cached_file))
            cleaned += 1
    msg = "Cleaned %i file%s from ImageCache, kept %i" % (
        cleaned, plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)

    # verify the cover images referenced in the database are present
    images = myDB.action('select BookImg,BookName,BookID from books')
    cachedir = os.path.join(lazylibrarian.CACHEDIR, 'book')
    cleaned = 0
    kept = 0
    for item in images:
        keep = True
        imgfile = ''
        if item['BookImg'] is None or item['BookImg'] == '':
            keep = False
        if keep and not item['BookImg'].startswith(
                'http') and not item['BookImg'] == "images/nocover.png":
            # html uses '/' as separator, but os might not
            imgname = item['BookImg'].rsplit('/')[-1]
            imgfile = os.path.join(cachedir, imgname)
            if not os.path.isfile(imgfile):
                keep = False
        if keep:
            kept += 1
        else:
            cleaned += 1
            logger.debug('Cover missing for %s %s' %
                         (item['BookName'], imgfile))
            myDB.action(
                'update books set BookImg="images/nocover.png" where Bookid="%s"'
                % item['BookID'])

    msg = "Cleaned %i missing cover file%s, kept %i" % (cleaned,
                                                        plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)

    # verify the author images referenced in the database are present
    images = myDB.action('select AuthorImg,AuthorName,AuthorID from authors')
    cachedir = os.path.join(lazylibrarian.CACHEDIR, 'author')
    cleaned = 0
    kept = 0
    for item in images:
        keep = True
        imgfile = ''
        if item['AuthorImg'] is None or item['AuthorImg'] == '':
            keep = False
        if keep and not item['AuthorImg'].startswith(
                'http') and not item['AuthorImg'] == "images/nophoto.png":
            # html uses '/' as separator, but os might not
            imgname = item['AuthorImg'].rsplit('/')[-1]
            imgfile = os.path.join(cachedir, imgname)
            if not os.path.isfile(imgfile):
                keep = False
        if keep:
            kept += 1
        else:
            cleaned += 1
            logger.debug('Image missing for %s %s' %
                         (item['AuthorName'], imgfile))
            myDB.action(
                'update authors set AuthorImg="images/nophoto.png" where AuthorID="%s"'
                % item['AuthorID'])

    msg = "Cleaned %i missing author image%s, kept %i" % (
        cleaned, plural(cleaned), kept)
    result.append(msg)
    logger.debug(msg)
    return result

Example #59

Show file

File: manualbook.py Project: steflavoie/LazyLibrarian

def searchItem(item=None, bookid=None, cat=None):
    """
    Call all active search providers to search for item
    return a list of results, each entry in list containing percentage_match, title, provider, size, url
    item = searchterm to use for general search
    bookid = link to data for book/audio searches
    cat = category to search [general, book, audio]
    """
    results = []

    if not item:
        return results

    book = {}
    searchterm = unaccented_str(item)

    book['searchterm'] = searchterm
    if bookid:
        book['bookid'] = bookid
    else:
        book['bookid'] = searchterm

    if cat in ['book', 'audio']:
        myDB = database.DBConnection()
        cmd = 'SELECT authorName,bookName,bookSub from books,authors WHERE books.AuthorID=authors.AuthorID'
        cmd += ' and bookID=?'
        match = myDB.match(cmd, (bookid,))
        if match:
            book['authorName'] = match['authorName']
            book['bookName'] = match['bookName']
            book['bookSub'] = match['bookSub']
        else:
            logger.debug('Forcing general search')
            cat = 'general'

    nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR() + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT()
    logger.debug('Searching %s provider%s (%s) for %s' % (nprov, plural(nprov), cat, searchterm))

    if lazylibrarian.USE_NZB():
        resultlist, nprov = IterateOverNewzNabSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_TOR():
        resultlist, nprov = IterateOverTorrentSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_DIRECT():
        resultlist, nprov = IterateOverDirectSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_RSS():
        resultlist, nprov = IterateOverRSSSites()
        if nprov:
            results += resultlist

    # reprocess to get consistent results
    searchresults = []
    for item in results:
        provider = ''
        title = ''
        url = ''
        size = ''
        date = ''
        mode = ''
        if 'nzbtitle' in item:
            title = item['nzbtitle']
        if 'nzburl' in item:
            url = item['nzburl']
        if 'nzbprov' in item:
            provider = item['nzbprov']
        if 'nzbsize' in item:
            size = item['nzbsize']
        if 'nzbdate' in item:
            date = item['nzbdate']
        if 'nzbmode' in item:
            mode = item['nzbmode']
        if 'tor_title' in item:
            title = item['tor_title']
        if 'tor_url' in item:
            url = item['tor_url']
        if 'tor_prov' in item:
            provider = item['tor_prov']
        if 'tor_size' in item:
            size = item['tor_size']
        if 'tor_date' in item:
            date = item['tor_date']
        if 'tor_type' in item:
            mode = item['tor_type']

        if title and provider and mode and url:
            # Not all results have a date or a size
            if not date:
                date = 'Fri, 01 Jan 1970 00:00:00 +0100'
            if not size:
                size = '1000'

            # calculate match percentage - torrents might have words_with_underscore_separator
            score = fuzz.token_set_ratio(searchterm, title.replace('_', ' '))
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(searchterm))
            words -= len(getList(title))
            score -= abs(words)
            if score >= 40:  # ignore wildly wrong results?
                if not url.startswith('magnet'):
                    if not mode == 'torznab' and not mode == 'direct':  # what is this split for??
                        url = url.split('?')[0]
                result = {'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date,
                          'url': urllib.quote_plus(url), 'mode': mode}

                searchresults.append(result)

                # from operator import itemgetter
                # searchresults = sorted(searchresults, key=itemgetter('score'), reverse=True)

    logger.debug('Found %s %s results for %s' % (len(searchresults), cat, searchterm))
    return searchresults

Example #60

Show file

File: torrentparser.py Project: cdancette/LazyLibrarian

def EXTRA(book=None, test=False):
    errmsg = ''
    provider = "Extratorrent"
    host = lazylibrarian.CONFIG['EXTRA_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/rss")

    params = {"type": "search", "s_cat": "2", "search": book['searchterm']}
    searchURL = providerurl + "/?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])

                    try:
                        seeders = int(item['seeders'])
                    except ValueError:
                        seeders = 0

                    try:
                        size = int(item['size'])
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['href']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            'torrent',
                            'priority':
                            lazylibrarian.CONFIG['EXTRA_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" %
                                 (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg