Exemple #1
0
def NewzNabPlus(book=None, host=None, api_key=None, searchType=None, searchMode=None):

    # logger.info('[NewzNabPlus] Searching term [%s] for author [%s] and title [%s] on host [%s] for a
    # [%s] item' % (book['searchterm'], book['authorName'], book['bookName'], host, searchType))
    logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % (
                 searchType, host, searchMode, api_key, str(book)))

    results = []

    params = ReturnSearchTypeStructure(api_key, book, searchType, searchMode)

    if not str(host)[:4] == "http":
        host = 'http://' + host

    URL = host + '/api?' + urllib.urlencode(params)

    try:
        request = urllib2.Request(URL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', common.USER_AGENT)
        # do we really want to cache this, new feeds/torrents are added all the time
        # if we do, call goodreads.get_request(request, expireafter)
        # where expireafter is max cache age in days (0 for non-cached, 7 for up to a week old, etc.
        # Default is 30 days)
        resp = urllib2.urlopen(request, timeout=90)
        try:
            data = ElementTree.parse(resp)
        except (urllib2.URLError, IOError, EOFError), e:
            logger.error('Error fetching data from %s: %s' % (host, e))
            data = None

    except Exception, e:
        logger.error("Error 403 opening url %s" % e)
        data = None
Exemple #2
0
    def _sendNMA(nma_api=None, nma_priority=None, event=None, message=None, force=False):

        title = "LazyLibrarian"

        # suppress notifications if the notifier is disabled but the notify options are checked
        if not lazylibrarian.CONFIG['USE_NMA'] and not force:
            return False

        if nma_api is None:
            nma_api = lazylibrarian.CONFIG['NMA_APIKEY']

        if nma_priority is None:
            nma_priority = lazylibrarian.CONFIG['NMA_PRIORITY']

        logger.debug("NMA: title: " + title)
        logger.debug("NMA: event: " + event)
        logger.debug("NMA: message: " + message)

        batch = False

        p = pynma.PyNMA()
        keys = nma_api.split(',')
        p.addkey(keys)

        if len(keys) > 1:
            batch = True

        response = p.push(title, event, message, priority=nma_priority, batch_mode=batch)

        if not response[nma_api][u'code'] == u'200':
            logger.error(u"NMA: Could not send notification to NotifyMyAndroid")
            return False
        else:
            logger.debug(u"NMA: Success. NotifyMyAndroid returned : %s" % response[nma_api][u'code'])
            return True
def DownloadMethod(bookid=None, nzbprov=None, nzbtitle=None, nzburl=None):

    myDB = database.DBConnection()

    if lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE:
        download = sabnzbd.SABnzbd(nzbtitle, nzburl)

    elif lazylibrarian.BLACKHOLE:

        try:
            nzbfile = urllib2.urlopen(nzburl, timeout=30).read()

        except urllib2.URLError, e:
            logger.warn('Error fetching nzb from url: ' + nzburl + ' %s' % e)

        nzbname = str.replace(nzbtitle, ' ', '_') + '.nzb'
        nzbpath = os.path.join(lazylibrarian.BLACKHOLEDIR, nzbname)

        try:
            f = open(nzbpath, 'w')
            f.write(nzbfile)
            f.close()
            logger.info('NZB file saved to: ' + nzbpath)
            download = True
        except Exception, e:
            logger.error('%s not writable, NZB not saved. Error: %s' % (nzbpath, e))
            download = False
Exemple #4
0
def request_json(url, **kwargs):
    """
    Wrapper for `request_response', which will decode the response as JSON
    object and return the result, if no exceptions are raised.

    As an option, a validator callback can be given, which should return True
    if the result is valid.
    """

    validator = kwargs.pop("validator", None)
    response = request_response(url, **kwargs)

    if response is not None:
        try:
            result = response.json()

            if validator and not validator(result):
                logger.error("JSON validation result failed")
            else:
                return result
        except ValueError:
            logger.error("Response returned invalid JSON data")

            # Debug response
            if lazylibrarian.VERBOSE:
                server_message(response)
def runGit(args):

    git_locations = ['git']

    if platform.system().lower() == 'darwin':
        git_locations.append('/usr/local/git/bin/git')

    output = err = None

    for cur_git in git_locations:

        cmd = cur_git + ' ' + args

        try:
            logger.debug('(RunGit)Trying to execute: "' + cmd + '" with shell in ' + lazylibrarian.PROG_DIR)
            p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, cwd=lazylibrarian.PROG_DIR)
            output, err = p.communicate()
            logger.debug('(RunGit)Git output: [%s]' % output)

        except OSError:
            logger.debug('(RunGit)Command ' + cmd + ' didn\'t work, couldn\'t find git')
            continue

        if 'not found' in output or "not recognized as an internal or external command" in output:
            logger.debug('(RunGit)Unable to find git with command ' + cmd)
            output = None
        elif 'fatal:' in output or err:
            logger.error('(RunGit)Git returned bad info. Are you sure this is a git installation?')
            output = None
        elif output:
            break

    return (output, err)
Exemple #6
0
def fetchURL(URL, headers=None, retry=True):
    """ Return the result of fetching a URL and True if success
        Otherwise return error message and False
        Allow one retry on timeout by default"""
    request = urllib2.Request(URL)
    if lazylibrarian.PROXY_HOST:
        request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE)
    if headers is None:
        # some sites insist on having a user-agent, default is to add one
        # if you don't want any headers, send headers=[]
        request.add_header('User-Agent', USER_AGENT)
    else:
        for item in headers:
            request.add_header(item, headers[item])
    try:
        resp = urllib2.urlopen(request, timeout=30)
        if str(resp.getcode()).startswith("2"):  # (200 OK etc)
            try:
                result = resp.read()
            except socket.error as e:
                return str(e), False
            return result, True
        return str(resp.getcode()), False
    except socket.timeout as e:
        if not retry:
            logger.error(u"fetchURL: Timeout getting response from %s" % URL)
            return str(e), False
        logger.warn(u"fetchURL: retrying - got timeout on %s" % URL)
        result, success = fetchURL(URL, headers=headers, retry=False)
        return result, success
    except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e:
        if hasattr(e, 'reason'):
            return e.reason, False
        return str(e), False
Exemple #7
0
def setSeedRatio(result):
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug('Deluge: Setting seed ratio')
    if not any(delugeweb_auth):
        _get_auth()

    timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
    try:
        ratio = None
        if result['ratio']:
            ratio = result['ratio']

        if not ratio:
            return True

        post_json = {"method": "core.set_torrent_stop_at_ratio", "params": [result['hash'], True], "id": 5}

        response = requests.post(delugeweb_url, json=post_json, cookies=delugeweb_auth,
                                 verify=deluge_verify_cert, headers=headers, timeout=timeout)
        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug('Status code: %s' % response.status_code)
            logger.debug(response.text)

        post_json = {"method": "core.set_torrent_stop_ratio", "params": [result['hash'], float(ratio)], "id": 6}

        response = requests.post(delugeweb_url, json=post_json, cookies=delugeweb_auth,
                                 verify=deluge_verify_cert, headers=headers, timeout=timeout)
        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug('Status code: %s' % response.status_code)
            logger.debug(response.text)

        return not response.json()['error']
    except Exception as err:
        logger.error('Deluge %s: Setting seedratio failed: %s' % (type(err).__name__, str(err)))
        return False
Exemple #8
0
def getServer():
    host = lazylibrarian.CONFIG['RTORRENT_HOST']
    if not host:
        logger.error("rtorrent error: No host found, check your config")
        return False

    if not host.startswith("http://") and not host.startswith("https://"):
        host = 'http://' + host
    if host.endswith('/'):
        host = host[:-1]

    if lazylibrarian.CONFIG['RTORRENT_USER']:
        user = lazylibrarian.CONFIG['RTORRENT_USER']
        password = lazylibrarian.CONFIG['RTORRENT_PASS']
        parts = host.split('://')
        host = parts[0] + '://' + user + ':' + password + '@' + parts[1]

    try:
        socket.setdefaulttimeout(20)  # so we don't freeze if server is not there
        server = xmlrpc_client.ServerProxy(host)
        result = server.system.client_version()
        socket.setdefaulttimeout(None)  # reset timeout
        logger.debug("rTorrent client version = %s" % result)
    except Exception as e:
        socket.setdefaulttimeout(None)  # reset timeout if failed
        logger.error("xmlrpc_client error: %s" % repr(e))
        return False
    if result:
        return server
    else:
        logger.warn('No response from rTorrent server')
        return False
Exemple #9
0
def _add_torrent_url(result):
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug('Deluge: Adding URL')
    if not any(delugeweb_auth):
        _get_auth()

    timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
    try:
        post_json = {"method": "core.add_torrent_url", "params": [result['url'], {}], "id": 32}

        response = requests.post(delugeweb_url, json=post_json, cookies=delugeweb_auth,
                                 verify=deluge_verify_cert, headers=headers, timeout=timeout)

        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug('Status code: %s' % response.status_code)
            logger.debug(response.text)

        result['hash'] = response.json()['result']
        msg = 'Deluge: Response was %s' % result['hash']
        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug(msg)
        if 'was None' in msg:
            logger.error('Deluge: Adding torrent URL failed: Is the WebUI running?')
        return response.json()['result']
    except Exception as err:
        logger.error('Deluge %s: Adding torrent URL failed: %s' % (type(err).__name__, str(err)))
        return False
Exemple #10
0
def _add_torrent_file(result):
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug('Deluge: Adding file')
    if not any(delugeweb_auth):
        _get_auth()

    timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
    try:
        # content is torrent file contents that needs to be encoded to base64
        post_json = {"method": "core.add_torrent_file",
                     "params": [result['name'] + '.torrent', b64encode(result['content']), {}],
                     "id": 2}

        response = requests.post(delugeweb_url, json=post_json, cookies=delugeweb_auth,
                                 verify=deluge_verify_cert, headers=headers, timeout=timeout)

        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug('Status code: %s' % response.status_code)
            logger.debug(response.text)

        result['hash'] = response.json()['result']
        msg = 'Deluge: Response was %s' % result['hash']
        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug(msg)
        if 'was None' in msg:
            logger.error('Deluge: Adding torrent file failed: Is the WebUI running?')
        return response.json()['result']
    except Exception as err:
        logger.error('Deluge %s: Adding torrent file failed: %s' % (type(err).__name__, str(err)))
        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            formatted_lines = traceback.format_exc().splitlines()
            logger.debug('; '.join(formatted_lines))
        return False
Exemple #11
0
def getFolder(hash):
    logger.debug('getFolder(%s)' % hash)

    qbclient = qbittorrentclient()

    # Get Active Directory from settings
    settings = qbclient._get_settings()
    active_dir = settings['temp_path']
    completed_dir = settings['save_path']

    if not active_dir:
        logger.error(
            'Could not get "Keep incomplete torrents in:" directory from QBitTorrent settings, please ensure it is set')
        return None

    # Get Torrent Folder Name
    torrent_folder = qbclient.get_savepath(hash)

    # If there's no folder yet then it's probably a magnet, try until folder is populated
    if torrent_folder == active_dir or not torrent_folder:
        tries = 1
        while (torrent_folder == active_dir or torrent_folder is None) and tries <= 10:
            tries += 1
            time.sleep(6)
            torrent_folder = qbclient.get_savepath(hash)

    if torrent_folder == active_dir or not torrent_folder:
        torrent_folder = qbclient.get_savepath(hash)
        return torrent_folder
    else:
        if 'windows' not in platform.system().lower():
            torrent_folder = torrent_folder.replace('\\', '/')
        return os.path.basename(os.path.normpath(torrent_folder))
Exemple #12
0
def export_CSV(search_dir=None, status="Wanted", library='eBook'):
    """ Write a csv file to the search_dir containing all books marked as "Wanted" """
    # noinspection PyBroadException
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg
        elif not os.access(search_dir, os.W_OK | os.X_OK):
            msg = "Alternate Directory [%s] not writable" % search_dir
            logger.warn(msg)
            return msg

        csvFile = os.path.join(search_dir, "%s %s - %s.csv" % (status, library, now().replace(':', '-')))

        myDB = database.DBConnection()

        cmd = 'SELECT BookID,AuthorName,BookName,BookIsbn,books.AuthorID FROM books,authors '
        if library == 'eBook':
            cmd += 'WHERE books.Status=? and books.AuthorID = authors.AuthorID'
        else:
            cmd += 'WHERE AudioStatus=? and books.AuthorID = authors.AuthorID'
        find_status = myDB.select(cmd, (status,))

        if not find_status:
            msg = "No %s marked as %s" % (library, status)
            logger.warn(msg)
        else:
            count = 0
            if PY2:
                fmode = 'wb'
            else:
                fmode = 'w'
            with open(csvFile, fmode) as csvfile:
                csvwrite = writer(csvfile, delimiter=',',
                                  quotechar='"', quoting=QUOTE_MINIMAL)

                # write headers, change AuthorName BookName BookIsbn to match import csv names
                csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID'])

                for resulted in find_status:
                    logger.debug("Exported CSV for %s %s" % (library, resulted['BookName']))
                    row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'],
                            resulted['BookIsbn'], resulted['AuthorID']])
                    if PY2:
                        csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row])
                    else:
                        csvwrite.writerow([("%s" % s) for s in row])
                    count += 1
            msg = "CSV exported %s %s%s to %s" % (count, library, plural(count), csvFile)
            logger.info(msg)
        return msg
    except Exception:
        msg = 'Unhandled exception in exportCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg
Exemple #13
0
def torrentAction(method, arguments):

    host = lazylibrarian.TRANSMISSION_HOST
    username = lazylibrarian.TRANSMISSION_USER
    password = lazylibrarian.TRANSMISSION_PASS

    if not host.startswith("http"):
        host = "http://" + host

    if host.endswith("/"):
        host = host[:-1]

    # Fix the URL. We assume that the user does not point to the RPC endpoint,
    # so add it if it is missing.
    parts = list(urlparse.urlparse(host))

    if not parts[0] in ("http", "https"):
        parts[0] = "http"

    if not parts[2].endswith("/rpc"):
        parts[2] += "/transmission/rpc"

    host = urlparse.urlunparse(parts)

    # Retrieve session id
    auth = (username, password) if username and password else None
    response = request.request_response(host, auth=auth, whitelist_status_code=[401, 409])

    if response is None:
        logger.error("Error gettings Transmission session ID")
        return

    # Parse response
    if response.status_code == 401:
        if auth:
            logger.error("Username and/or password not accepted by " "Transmission")
        else:
            logger.error("Transmission authorization required")

        return
    elif response.status_code == 409:
        session_id = response.headers["x-transmission-session-id"]

        if not session_id:
            logger.error("Expected a Session ID from Transmission")
            return

    # Prepare next request
    headers = {"x-transmission-session-id": session_id}
    data = {"method": method, "arguments": arguments}

    response = request.request_json(host, method="POST", data=json.dumps(data), headers=headers, auth=auth)

    print response

    if not response:
        logger.error("Error sending torrent to Transmission")
        return

    return response
    def find_author_id(self):
	#URL Encode Request
        URL = 'http://www.goodreads.com/api/author_url/?' + urllib.parse.urlencode(self.name) + '&' + urllib.parse.urlencode(self.params)
        logger.info("Searching for author with name: %s" % self.name)
	#Parse XML Response
        try:
            sourcexml = ElementTree.parse(urllib.request.urlopen(URL, timeout=20))
        except (urllib.error.URLError, IOError, EOFError) as e:
            logger.error("Error fetching authorid: ", e)
        
        rootxml = sourcexml.getroot()
        resultxml = rootxml.getiterator('author')
        authorlist = []
	#Parse XML Tree for Authors
        if not len(rootxml):
            logger.info('No authors found with name: %s' % self.name)
            return authorlist
        else:
	#Display authors to user
            for author in resultxml:
                authorid = author.attrib.get("id")
                logger.info('Found author: %s with GoodReads-id: %s' % (author[0].text, authorid))

            time.sleep(1)
            authorlist = self.get_author_info(authorid)
        return authorlist
Exemple #15
0
def sendNZB(nzb):

    addToTop = False
    nzbgetXMLrpc = "%(username)s:%(password)s@%(host)s/xmlrpc"

    if lazylibrarian.NZBGET_HOST is None:
        logger.error(u"No NZBget host found in configuration. Please configure it.")
        return False

    if lazylibrarian.NZBGET_HOST.startswith("https://"):
        nzbgetXMLrpc = "https://" + nzbgetXMLrpc
        lazylibrarian.NZBGET_HOST.replace("https://", "", 1)
    else:
        nzbgetXMLrpc = "http://" + nzbgetXMLrpc
        lazylibrarian.NZBGET_HOST.replace("http://", "", 1)

    url = nzbgetXMLrpc % {
        "host": lazylibrarian.NZBGET_HOST,
        "username": lazylibrarian.NZBGET_USER,
        "password": lazylibrarian.NZBGET_PASS,
    }

    nzbGetRPC = xmlrpclib.ServerProxy(url)
    try:
        if nzbGetRPC.writelog("INFO", "lazylibrarian connected to drop of %s any moment now." % (nzb.name + ".nzb")):
            logger.debug(u"Successfully connected to NZBget")
        else:
            logger.info(u"Successfully connected to NZBget, but unable to send a message" % (nzb.name + ".nzb"))

    except httplib.socket.error, e:
        logger.error(
            u"Please check your NZBget host and port (if it is running). NZBget is not responding to this combination"
        )
        return False
Exemple #16
0
    def action(self, query, args=None):
        with db_lock:

            if not query:
                return

            sqlResult = None
            attempt = 0

            while attempt < 5:

                try:
                    if not args:
                        # logger.debug(self.filename+": "+query)
                        sqlResult = self.connection.execute(query)
                    else:
                        # logger.debug(self.filename+": "+query+" with args "+str(args))
                        sqlResult = self.connection.execute(query, args)
                    self.connection.commit()
                    break

                except sqlite3.OperationalError, e:
                    if "unable to open database file" in e.message or "database is locked" in e.message:
                        logger.warn('Database Error: %s' % e)
                        attempt += 1
                        time.sleep(1)
                    else:
                        logger.error('Database error: %s' % e)
                        raise

                except sqlite3.DatabaseError, e:
                    logger.error('Fatal error executing %s :: %s' % (query, e))
                    raise
def search_tor_book(books=None, mags=None):
    if not(lazylibrarian.USE_TOR):
        logger.warn('Torrent search is disabled')
        return
    # rename this thread
    threading.currentThread().name = "SEARCHTORBOOKS"
    myDB = database.DBConnection()
    searchlist = []
    #searchlist1 = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"')

        # Clear cache
        providercache = os.path.join(lazylibrarian.DATADIR, ".ProviderCache")
        if os.path.exists(providercache):
            try:
                shutil.rmtree(providercache)
                os.mkdir(providercache)
            except OSError, e:
                logger.error('Failed to clear cache: ' + str(e))

        # Clearing throttling timeouts
        t = SimpleCache.ThrottlingProcessor()
        t.lastRequestTime.clear()
Exemple #18
0
def NZBDownloadMethod(bookid=None, nzbprov=None, nzbtitle=None, nzburl=None):

    myDB = database.DBConnection()
    if (lazylibrarian.NZB_DOWNLOADER_SABNZBD and lazylibrarian.SAB_HOST) and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE:
        download = sabnzbd.SABnzbd(nzbtitle, nzburl)
    elif (
        lazylibrarian.NZB_DOWNLOADER_NZBGET and lazylibrarian.NZBGET_HOST
    ) and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE:
        headers = {"User-Agent": USER_AGENT}
        data = request.request_content(url=nzburl, headers=headers)
        nzb = classes.NZBDataSearchResult()
        nzb.extraInfo.append(data)
        nzb.name = nzbtitle
        nzb.url = nzburl
        download = nzbget.sendNZB(nzb)

    elif lazylibrarian.NZB_DOWNLOADER_BLACKHOLE:

        try:
            req = urllib2.Request(nzburl)
            if lazylibrarian.PROXY_HOST:
                req.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE)
            req.add_header("User-Agent", USER_AGENT)
            nzbfile = urllib2.urlopen(req, timeout=90).read()

        except (urllib2.URLError, socket.timeout) as e:
            logger.warn("Error fetching nzb from url: %s, %s" % (nzburl, e))
            nzbfile = False

        if nzbfile:

            nzbname = str(nzbtitle) + ".nzb"
            nzbpath = os.path.join(lazylibrarian.NZB_BLACKHOLEDIR, nzbname)

            try:
                with open(nzbpath, "w") as f:
                    f.write(nzbfile)
                logger.debug("NZB file saved to: " + nzbpath)
                download = True
                # try:
                #    os.chmod(nzbpath, 0777)
                # except Exception, e:
                #    logger.error("Could not chmod path: " + str(nzbpath))
            except Exception as e:
                logger.error("%s not writable, NZB not saved. Error: %s" % (nzbpath, e))
                download = False

    else:
        logger.warn("No NZB download method is enabled, check config.")
        return False

    if download:
        logger.debug("Nzbfile has been downloaded from " + str(nzburl))
        myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid)
        myDB.action('UPDATE wanted SET status = "Snatched" WHERE NZBurl="%s"' % nzburl)
        return True
    else:
        logger.error(u'Failed to download nzb @ <a href="%s">%s</a>' % (nzburl, nzbprov))
        myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % nzburl)
        return False
Exemple #19
0
    def goodreads_oauth2():
        global request_token, consumer, token, client
        try:
            if request_token and 'oauth_token' in request_token and 'oauth_token_secret' in request_token:
                token = oauth.Token(request_token['oauth_token'], request_token['oauth_token_secret'])
            else:
                return "Unable to run oAuth2 - Have you run oAuth1?"
        except Exception as e:
            logger.error("Exception in oAuth2: %s %s" % (type(e).__name__, traceback.format_exc()))
            return "Unable to run oAuth2 - Have you run oAuth1?"

        access_token_url = '%s/oauth/access_token' % 'https://www.goodreads.com'

        client = oauth.Client(consumer, token)

        try:
            response, content = client.request(access_token_url, 'POST')
        except Exception as e:
            logger.error("Exception in oauth2 client.request: %s %s" % (type(e).__name__, traceback.format_exc()))
            return "Error in oauth2 client request: see error log"

        if not response['status'].startswith('2'):
            return 'Invalid response [%s] from %s' % (response['status'], access_token_url)

        access_token = dict(parse_qsl(content))
        if not PY2:
            access_token = {key.decode("utf-8"): access_token[key].decode("utf-8") for key in access_token}
        # print access_token
        lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] = access_token['oauth_token']
        lazylibrarian.CONFIG['GR_OAUTH_SECRET'] = access_token['oauth_token_secret']
        lazylibrarian.config_write('API')
        return "Authorisation complete"
Exemple #20
0
    def _get_credentials(self, key):
        request_token = {}

        request_token["oauth_token"] = lazylibrarian.TWITTER_USERNAME
        request_token["oauth_token_secret"] = lazylibrarian.TWITTER_PASSWORD
        request_token["oauth_callback_confirmed"] = "true"

        token = oauth.Token(request_token["oauth_token"], request_token["oauth_token_secret"])
        token.set_verifier(key)

        logger.info("Generating and signing request for an access token using key " + key)

        signature_method_hmac_sha1 = oauth.SignatureMethod_HMAC_SHA1()  # @UnusedVariable
        oauth_consumer = oauth.Consumer(key=self.consumer_key, secret=self.consumer_secret)
        logger.info("oauth_consumer: " + str(oauth_consumer))
        oauth_client = oauth.Client(oauth_consumer, token)
        logger.info("oauth_client: " + str(oauth_client))
        resp, content = oauth_client.request(self.ACCESS_TOKEN_URL, method="POST", body="oauth_verifier=%s" % key)
        logger.info("resp, content: " + str(resp) + "," + str(content))

        access_token = dict(parse_qsl(content))
        logger.info("access_token: " + str(access_token))

        logger.info("resp[status] = " + str(resp["status"]))
        if resp["status"] != "200":
            logger.error("The request for a token with did not succeed: " + str(resp["status"]))
            return False
        else:
            logger.info("Your Twitter Access Token key: %s" % access_token["oauth_token"])
            logger.info("Access Token secret: %s" % access_token["oauth_token_secret"])
            lazylibrarian.TWITTER_USERNAME = access_token["oauth_token"]
            lazylibrarian.TWITTER_PASSWORD = access_token["oauth_token_secret"]
            return True
Exemple #21
0
    def get_author_info(self, authorid=None, authorname=None, refresh=False):

        URL = 'http://www.goodreads.com/author/show/' + authorid + '.xml?' + urllib.urlencode(self.params)
        author_dict = {}

        try:
            rootxml, in_cache = get_xml_request(URL)
        except Exception as e:
            logger.error("Error getting author info: %s" % e)
            return author_dict
        if rootxml is None:
            logger.debug("Error requesting author info")
            return author_dict

        resultxml = rootxml.find('author')

        if not len(resultxml):
            logger.warn('No author found with ID: ' + authorid)
        else:
            logger.debug("[%s] Processing info for authorID: %s" % (authorname, authorid))

            # PAB added authorname to author_dict - this holds the intact name preferred by GR
            author_dict = {
                'authorid': resultxml[0].text,
                'authorlink': resultxml.find('link').text,
                'authorimg': resultxml.find('image_url').text,
                'authorborn': resultxml.find('born_at').text,
                'authordeath': resultxml.find('died_at').text,
                'totalbooks': resultxml.find('works_count').text,
                'authorname': authorname
            }
        return author_dict
Exemple #22
0
    def create_shelf(self, shelf='lazylibrarian'):
        global consumer, client, token, user_id
        if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \
                lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.warn("Goodreads create shelf error: Please authorise first")
            return False, 'Unauthorised'

        consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']),
                                  secret=str(lazylibrarian.CONFIG['GR_SECRET']))
        token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET'])
        client = oauth.Client(consumer, token)
        user_id = self.getUserId()

        # could also pass [featured] [exclusive_flag] [sortable_flag] all default to False
        body = urlencode({'user_shelf[name]': shelf.lower()})
        headers = {'Content-Type': 'application/x-www-form-urlencoded'}
        gr_api_sleep()

        try:
            response, content = client.request('%s/user_shelves.xml' % 'https://www.goodreads.com', 'POST',
                                               body, headers)
        except Exception as e:
            logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc()))
            return False, "Error in client.request: see error log"

        if not response['status'].startswith('2'):
            msg = 'Failure status: %s' % response['status']
            return False, msg
        return True, ''
Exemple #23
0
    def get_author_books(self, authorid=None, authorname=None, refresh=False):

        api_hits = 0
        gr_lang_hits = 0
        lt_lang_hits = 0
        gb_lang_change = 0
        cache_hits = 0
        not_cached = 0
        URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params)

        # Artist is loading
        myDB = database.DBConnection()
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": "Loading"}
        myDB.upsert("authors", newValueDict, controlValueDict)

        try:
            # Cache our request
            request = urllib2.Request(URL)
            if lazylibrarian.PROXY_HOST:
                request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE)
            request.add_header('User-Agent', USER_AGENT)
            opener = urllib2.build_opener(SimpleCache.CacheHandler(".AuthorCache"), SimpleCache.ThrottlingProcessor(5))
            resp = opener.open(request)
            api_hits = api_hits + 1
            sourcexml = ElementTree.parse(resp)
        except Exception, e:
            logger.error("Error fetching author info: " + str(e))
Exemple #24
0
def processAutoAdd(src_path=None):
    # Called to copy the book files to an auto add directory for the likes of Calibre which can't do nested dirs
    autoadddir = lazylibrarian.IMP_AUTOADD
    logger.debug('AutoAdd - Attempt to copy from [%s] to [%s]' % (src_path, autoadddir))

    if not os.path.exists(autoadddir):
        logger.info('AutoAdd directory [%s] is missing or not set - cannot perform autoadd copy' % autoadddir)
        return False
    else:
        # Now try and copy all the book files into a single dir.

        try:
            names = os.listdir(src_path)
            # TODO : n files jpg, opf & book(s) should have same name
            # Caution - book may be pdf, mobi, epub or all 3.
            # for now simply copy all files, and let the autoadder sort it out

            # os.makedirs(autoadddir)
            #errors = []
            for name in names:
                srcname = os.path.join(src_path, name)
                dstname = os.path.join(autoadddir, name)
                logger.debug('AutoAdd Copying named file [%s] as copy [%s] to [%s]' % (name, srcname, dstname))
                try:
                    shutil.copy2(srcname, dstname)
                except (IOError, os.error) as why:
                    logger.error('AutoAdd - Failed to copy file because [%s] ' % str(why))

        except OSError as why:
            logger.error('AutoAdd - Failed because [%s]' % str(why))
            return False

    logger.info('Auto Add completed for [%s]' % dstname)
    return True
Exemple #25
0
def NewzNabPlus(book=None, host=None, api_key=None, searchType=None, searchMode=None):

    # logger.info('[NewzNabPlus] Searching term [%s] for author [%s] and title [%s] on host [%s] for a [%s] item' % (book['searchterm'], book['authorName'], book['bookName'], host, searchType))
    logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % (searchType, host, searchMode, api_key, str(book)))

    results = []

    params = ReturnSearchTypeStructure(api_key, book, searchType, searchMode)

    if not str(host)[:4] == "http":
        host = 'http://' + host

    URL = host + '/api?' + urllib.urlencode(params)

    try:
        request = urllib2.Request(URL)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE)
        request.add_header('User-Agent', common.USER_AGENT)
        opener = urllib2.build_opener(SimpleCache.CacheHandler(".ProviderCache"), SimpleCache.ThrottlingProcessor(5))
        resp = opener.open(request)

        try:
            data = ElementTree.parse(resp)
        except (urllib2.URLError, IOError, EOFError), e:
            logger.warn('Error fetching data from %s: %s' % (host, e))
            data = None

    except Exception, e:
        logger.error("Error 403 opening url %s" % e)
        data = None
Exemple #26
0
def getSeriesMembers(seriesID=None):
    """ Ask librarything or goodreads for details on all books in a series
        order, bookname, authorname, workid, authorid
        (workid and authorid are goodreads only)
        Return as a list of lists """
    results = []
    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        params = {"format": "xml", "key": lazylibrarian.CONFIG['GR_API']}
        URL = 'https://www.goodreads.com/series/' + seriesID + '?' + urlencode(params)
        try:
            rootxml, in_cache = gr_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting series %s" % seriesID)
                return []
        except Exception as e:
            logger.error("%s finding series %s: %s" % (type(e).__name__, seriesID, str(e)))
            return []

        works = rootxml.find('series/series_works')
        books = works.getiterator('series_work')
        if books is None:
            logger.warn('No books found for %s' % seriesID)
            return []
        for book in books:
            mydict = {}
            for mykey, location in [('order', 'user_position'),
                                    ('bookname', 'work/best_book/title'),
                                    ('authorname', 'work/best_book/author/name'),
                                    ('workid', 'work/id'),
                                    ('authorid', 'work/best_book/author/id')
                                    ]:
                if book.find(location) is not None:
                    mydict[mykey] = book.find(location).text
                else:
                    mydict[mykey] = ""
            results.append([mydict['order'], mydict['bookname'], mydict['authorname'],
                            mydict['workid'], mydict['authorid']])
    else:
        data = getBookWork(None, "SeriesPage", seriesID)
        if data:
            try:
                table = data.split('class="worksinseries"')[1].split('</table>')[0]
                rows = table.split('<tr')
                for row in rows:
                    if 'href=' in row:
                        booklink = row.split('href="')[1]
                        bookname = booklink.split('">')[1].split('<')[0]
                        # booklink = booklink.split('"')[0]
                        try:
                            authorlink = row.split('href="')[2]
                            authorname = authorlink.split('">')[1].split('<')[0]
                            # authorlink = authorlink.split('"')[0]
                            order = row.split('class="order">')[1].split('<')[0]
                            results.append([order, bookname, authorname, '', ''])
                        except IndexError:
                            logger.debug('Incomplete data in series table for series %s' % seriesID)
            except IndexError:
                if 'class="worksinseries"' in data:  # error parsing, or just no series data available?
                    logger.debug('Error in series table for series %s' % seriesID)
    return results
def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None):

    if not os.path.exists(dest_path):
        logger.debug('%s does not exist, so it\'s safe to create it' % dest_path)
        try:
            if lazylibrarian.DESTINATION_COPY:
                shutil.copytree(pp_path, dest_path)
                logger.info('Successfully copied %s to %s.' % (pp_path, dest_path))
            else:
                shutil.move(pp_path, dest_path)
                logger.info('Successfully moved %s to %s.' % (pp_path, dest_path))
            pp = True
            #try and rename the actual book file
            for file2 in os.listdir(dest_path):
            	logger.debug('file extension: ' + str(file2).split('.')[-1])
            	if ((file2.lower().find(".jpg") <= 0) & (file2.lower().find(".opf") <= 0)):
            		logger.debug('file: ' + str(file2))
            		os.rename(os.path.join(dest_path, file2), os.path.join(dest_path, bookname + '.' + str(file2).split('.')[-1]))
            try:
                os.chmod(dest_path, 0777);
            except Exception, e:
                logger.info("Could not chmod path: " + str(file2));
        except OSError:
            logger.error('Could not create destination folder or rename the downloaded ebook. Check permissions of: ' + lazylibrarian.DESTINATION_DIR)
            pp = False
    else:
        pp = False
    return pp
Exemple #28
0
    def find_author_id(self, refresh=False):
        author = self.name
        # Goodreads doesn't like initials followed by spaces,
        # eg "M L Hamilton", needs "M. L. Hamilton" or "M.L.Hamilton"
        # but DOES need spaces if not initials eg "Tom.Holt" fails, but "Tom Holt" works
        if author[1] == ' ':
            author = author.replace(' ', '.')
            author = author.replace('..', '.')
        URL = 'http://www.goodreads.com/api/author_url/' + urllib.quote(author) + '?' + urllib.urlencode(self.params)
        logger.debug("Searching for author with name: %s" % author)

        authorlist = []
        try:
            rootxml, in_cache = self.get_request(URL)
        except Exception as e:
            logger.error("Error finding authorid: " + str(e) + str(URL))
            return authorlist

        resultxml = rootxml.getiterator('author')

        if not len(resultxml):
            logger.warn('No authors found with name: %s' % author)
        else:
            # In spite of how this looks, goodreads only returns one result, even if there are multiple matches
            # we just have to hope we get the right one. eg search for "James Lovelock" returns "James E. Lovelock"
            # who only has one book listed under googlebooks, the rest are under "James Lovelock"
            # goodreads has all his books under "James E. Lovelock". Can't come up with a good solution yet.
            # For now we'll have to let the user handle this by selecting/adding the author manually
            for author in resultxml:
                authorid = author.attrib.get("id")
                authorname = author[0].text
                authorlist = self.get_author_info(authorid, authorname, refresh)
        return authorlist
Exemple #29
0
    def _get_credentials(self, key):
        request_token = {}

        request_token['oauth_token'] = lazylibrarian.TWITTER_USERNAME
        request_token['oauth_token_secret'] = lazylibrarian.TWITTER_PASSWORD
        request_token['oauth_callback_confirmed'] = 'true'

        token = oauth.Token(request_token['oauth_token'], request_token['oauth_token_secret'])
        token.set_verifier(key)

        logger.info('Generating and signing request for an access token using key ' + key)

        signature_method_hmac_sha1 = oauth.SignatureMethod_HMAC_SHA1()  # @UnusedVariable
        oauth_consumer = oauth.Consumer(key=self.consumer_key, secret=self.consumer_secret)
        logger.info('oauth_consumer: ' + str(oauth_consumer))
        oauth_client = oauth.Client(oauth_consumer, token)
        logger.info('oauth_client: ' + str(oauth_client))
        resp, content = oauth_client.request(self.ACCESS_TOKEN_URL, method='POST', body='oauth_verifier=%s' % key)
        logger.info('resp, content: ' + str(resp) + ',' + str(content))

        access_token = dict(parse_qsl(content))
        logger.info('access_token: ' + str(access_token))

        logger.info('resp[status] = ' + str(resp['status']))
        if resp['status'] != '200':
            logger.error('The request for a token with did not succeed: ' + str(resp['status']))
            return False
        else:
            logger.info('Your Twitter Access Token key: %s' % access_token['oauth_token'])
            logger.info('Access Token secret: %s' % access_token['oauth_token_secret'])
            lazylibrarian.TWITTER_USERNAME = access_token['oauth_token']
            lazylibrarian.TWITTER_PASSWORD = access_token['oauth_token_secret']
            return True
Exemple #30
0
    def get_shelf_list(self):
        global consumer, client, token, user_id
        if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \
                lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.warn("Goodreads get shelf error: Please authorise first")
            return []
        else:
            #
            # loop over each page of shelves
            #     loop over each shelf
            #         add shelf to list
            #
            consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']),
                                      secret=str(lazylibrarian.CONFIG['GR_SECRET']))
            token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET'])
            client = oauth.Client(consumer, token)
            user_id = self.getUserId()

            current_page = 0
            shelves = []
            page_shelves = 1
            while page_shelves:
                current_page = current_page + 1
                page_shelves = 0
                shelf_template = Template('${base}/shelf/list.xml?user_id=${user_id}&key=${key}&page=${page}')
                body = urlencode({})
                headers = {'Content-Type': 'application/x-www-form-urlencoded'}
                request_url = shelf_template.substitute(base='https://www.goodreads.com', user_id=user_id,
                                                        page=current_page, key=lazylibrarian.CONFIG['GR_API'])
                gr_api_sleep()
                try:
                    response, content = client.request(request_url, 'GET', body, headers)
                except Exception as e:
                    logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc()))
                    return shelves

                if not response['status'].startswith('2'):
                    logger.error('Failure status: %s for page %s' % (response['status'], current_page))
                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync:
                        logger.debug(request_url)
                else:
                    xmldoc = xml.dom.minidom.parseString(content)

                    shelf_list = xmldoc.getElementsByTagName('shelves')[0]
                    for item in shelf_list.getElementsByTagName('user_shelf'):
                        shelf_name = item.getElementsByTagName('name')[0].firstChild.nodeValue
                        shelf_count = item.getElementsByTagName('book_count')[0].firstChild.nodeValue
                        shelf_exclusive = item.getElementsByTagName('exclusive_flag')[0].firstChild.nodeValue
                        shelves.append({'name': shelf_name, 'books': shelf_count, 'exclusive': shelf_exclusive})
                        page_shelves += 1

                        if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync:
                            logger.debug('Shelf %s : %s: Exclusive %s' % (shelf_name, shelf_count, shelf_exclusive))

                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync:
                        logger.debug('Found %s shelves on page %s' % (page_shelves, current_page))

            logger.debug('Found %s shelves on %s page%s' % (len(shelves), current_page - 1, plural(current_page - 1)))
            # print shelves
            return shelves
Exemple #31
0
def setTorrentLabel(result):
    logger.debug('Deluge: Setting label')
    label = lazylibrarian.CONFIG['DELUGE_LABEL']

    if not any(delugeweb_auth):
        _get_auth()

    if ' ' in label:
        logger.error(
            'Deluge: Invalid label. Label can\'t contain spaces - replacing with underscores'
        )
        label = label.replace(' ', '_')
    if label:
        # check if label already exists and create it if not
        post_data = json.dumps({
            "method": 'label.get_labels',
            "params": [],
            "id": 3
        })
        if PY2:
            post_data = post_data.encode(lazylibrarian.SYS_ENCODING)
        response = requests.post(delugeweb_url,
                                 data=post_data,
                                 cookies=delugeweb_auth,
                                 headers=headers)
        labels = json.loads(response.text)['result']

        if labels:
            if label not in labels:
                try:
                    logger.debug(
                        'Deluge: %s label doesn\'t exist in Deluge, let\'s add it'
                        % label)
                    post_data = json.dumps({
                        "method": 'label.add',
                        "params": [label],
                        "id": 4
                    })
                    if PY2:
                        post_data = post_data.encode(
                            lazylibrarian.SYS_ENCODING)
                    _ = requests.post(delugeweb_url,
                                      data=post_data,
                                      cookies=delugeweb_auth,
                                      headers=headers)
                    logger.debug('Deluge: %s label added to Deluge' % label)
                except Exception as err:
                    logger.error('Deluge %s: Setting label failed: %s' %
                                 (type(err).__name__, str(err)))
                    formatted_lines = traceback.format_exc().splitlines()
                    logger.error('; '.join(formatted_lines))

            # add label to torrent
            post_data = json.dumps({
                "method": 'label.set_torrent',
                "params": [result['hash'], label],
                "id": 5
            })
            if PY2:
                post_data = post_data.encode(lazylibrarian.SYS_ENCODING)
            response = requests.post(delugeweb_url,
                                     data=post_data,
                                     cookies=delugeweb_auth,
                                     headers=headers)
            logger.debug('Deluge: %s label added to torrent' % label)
            return not json.loads(response.text)['error']
        else:
            logger.debug('Deluge: Label plugin not detected')
            return False
    else:
        logger.debug('Deluge: No Label set')
        return True
def downloadResult(match, book):
    """ match:  best result from search providers
        book:   book we are downloading
        return: True if already snatched, False if failed to snatch, >True if we snatched it
    """
    try:
        myDB = database.DBConnection()

        resultTitle = match[1]
        newValueDict = match[2]
        controlValueDict = match[3]

        if book['library'] == 'AudioBook':
            auxinfo = 'AudioBook'
        else:  # elif book['library'] == 'eBook':
            auxinfo = 'eBook'

        if auxinfo == 'eBook':
            snatchedbooks = myDB.match(
                'SELECT BookID from books WHERE BookID=? and Status="Snatched"',
                (newValueDict["BookID"], ))
        else:
            snatchedbooks = myDB.match(
                'SELECT BookID from books WHERE BookID=? and AudioStatus="Snatched"',
                (newValueDict["BookID"], ))

        if snatchedbooks:
            logger.debug('%s %s already marked snatched' %
                         (book['authorName'], book['bookName']))
            return True  # someone else already found it
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if 'libgen' in newValueDict[
                    "NZBprov"]:  # for libgen we use direct download links
                snatch = DirectDownloadMethod(newValueDict["BookID"],
                                              newValueDict["NZBtitle"],
                                              controlValueDict["NZBurl"],
                                              resultTitle, auxinfo)
            elif newValueDict['NZBmode'] in ["torznab", "torrent", "magnet"]:
                snatch = TORDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"], auxinfo)
            elif newValueDict['NZBmode'] == 'nzb':
                snatch = NZBDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"], auxinfo)
            else:
                logger.error(
                    'Unhandled NZBmode [%s] for %s' %
                    (newValueDict['NZBmode'], controlValueDict["NZBurl"]))
                snatch = False

            if snatch:
                logger.info('Downloading %s %s from %s' %
                            (auxinfo, newValueDict["NZBtitle"],
                             newValueDict["NZBprov"]))
                notify_snatch("%s %s from %s at %s" %
                              (auxinfo, newValueDict["NZBtitle"],
                               newValueDict["NZBprov"], now()))
                custom_notify_snatch(newValueDict["BookID"])
                # at this point we could add NZBprov to the blocklist with a short timeout, a second or two?
                # This would implement a round-robin search system. Blocklist with an incremental counter.
                # If number of active providers == number blocklisted, so no unblocked providers are left,
                # either sleep for a while, or unblock the one with the lowest counter.
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
        return False
    except Exception:
        logger.error('Unhandled exception in downloadResult: %s' %
                     traceback.format_exc())
Exemple #33
0
def getWorkSeries(bookID=None):
    """ Return the series names and numbers in series for the given id as a list of tuples
        For goodreads the id is a WorkID, for librarything it's a BookID """
    myDB = database.DBConnection()
    serieslist = []
    if not bookID:
        logger.error("getWorkSeries - No bookID")
        return serieslist

    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        URL = "https://www.goodreads.com/work/"
        seriesurl = URL + bookID + "/series?format=xml&key=" + lazylibrarian.CONFIG[
            'GR_API']

        rootxml, in_cache = gr_xml_request(seriesurl)
        if rootxml is None:
            logger.warn('Error getting XML for %s' % seriesurl)
        else:
            resultxml = rootxml.getiterator('series_work')
            for item in resultxml:
                try:
                    seriesname = item.find('./series/title').text
                    seriesname = seriesname.strip('\n').strip('\n').strip()
                    seriesid = item.find('./series/id').text
                    seriesnum = item.find('./user_position').text
                except (KeyError, AttributeError):
                    continue
                if seriesname and seriesid:
                    seriesname = cleanName(unaccented(seriesname), '&/')
                    seriesnum = cleanName(unaccented(seriesnum))
                    serieslist.append((seriesid, seriesnum, seriesname))
                    match = myDB.match(
                        'SELECT SeriesID from series WHERE SeriesName=?',
                        (seriesname, ))
                    if not match:
                        myDB.action(
                            'INSERT INTO series VALUES (?, ?, ?, ?, ?)',
                            (seriesid, seriesname, "Active", 0, 0))
                    elif match['SeriesID'] != seriesid:
                        myDB.action(
                            'UPDATE series SET SeriesID=? WHERE SeriesName=?',
                            (seriesid, seriesname))
    else:
        work = getBookWork(bookID, "Series")
        if work:
            try:
                slist = work.split('<h3><b>Series:')[1].split(
                    '</h3>')[0].split('<a href="/series/')
                for item in slist[1:]:
                    try:
                        series = item.split('">')[1].split('</a>')[0]
                        if series and '(' in series:
                            seriesnum = series.split('(')[1].split(
                                ')')[0].strip()
                            series = series.split(' (')[0].strip()
                        else:
                            seriesnum = ''
                            series = series.strip()
                        seriesname = cleanName(unaccented(series), '&/')
                        seriesnum = cleanName(unaccented(seriesnum))
                        serieslist.append(('', seriesnum, seriesname))
                    except IndexError:
                        pass
            except IndexError:
                pass

    return serieslist
Exemple #34
0
def getSeriesMembers(seriesID=None):
    """ Ask librarything or goodreads for details on all books in a series
        order, bookname, authorname, workid, authorid
        (workid and authorid are goodreads only)
        Return as a list of lists """
    results = []
    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        params = {"format": "xml", "key": lazylibrarian.CONFIG['GR_API']}
        URL = 'https://www.goodreads.com/series/' + seriesID + '?' + urlencode(
            params)
        try:
            rootxml, in_cache = gr_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting series %s" % seriesID)
                return []
        except Exception as e:
            logger.error("%s finding series %s: %s" %
                         (type(e).__name__, seriesID, str(e)))
            return []

        works = rootxml.find('series/series_works')
        books = works.getiterator('series_work')
        if books is None:
            logger.warn('No books found for %s' % seriesID)
            return []
        for book in books:
            mydict = {}
            for mykey, location in [('order', 'user_position'),
                                    ('bookname', 'work/best_book/title'),
                                    ('authorname',
                                     'work/best_book/author/name'),
                                    ('workid', 'work/id'),
                                    ('authorid', 'work/best_book/author/id')]:
                if book.find(location) is not None:
                    mydict[mykey] = book.find(location).text
                else:
                    mydict[mykey] = ""
            results.append([
                mydict['order'], mydict['bookname'], mydict['authorname'],
                mydict['workid'], mydict['authorid']
            ])
    else:
        data = getBookWork(None, "SeriesPage", seriesID)
        if data:
            try:
                table = data.split('class="worksinseries"')[1].split(
                    '</table>')[0]
                rows = table.split('<tr')
                for row in rows:
                    if 'href=' in row:
                        booklink = row.split('href="')[1]
                        bookname = booklink.split('">')[1].split('<')[0]
                        # booklink = booklink.split('"')[0]
                        try:
                            authorlink = row.split('href="')[2]
                            authorname = authorlink.split('">')[1].split(
                                '<')[0]
                            # authorlink = authorlink.split('"')[0]
                            order = row.split('class="order">')[1].split(
                                '<')[0]
                            results.append(
                                [order, bookname, authorname, '', ''])
                        except IndexError:
                            logger.debug(
                                'Incomplete data in series table for series %s'
                                % seriesID)
            except IndexError:
                if 'class="worksinseries"' in data:  # error parsing, or just no series data available?
                    logger.debug('Error in series table for series %s' %
                                 seriesID)
    return results
Exemple #35
0
def getSeriesAuthors(seriesid):
    """ Get a list of authors contributing to a series
        and import those authors (and their books) into the database
        Return how many authors you added """
    myDB = database.DBConnection()
    result = myDB.match("select count(*) as counter from authors")
    start = int(result['counter'])
    result = myDB.match('select SeriesName from series where SeriesID=?',
                        (seriesid, ))
    seriesname = result['SeriesName']
    members = getSeriesMembers(seriesid)
    dic = {
        u'\u2018': "",
        u'\u2019': "",
        u'\u201c': '',
        u'\u201d': '',
        "'": "",
        '"': ''
    }

    if members:
        myDB = database.DBConnection()
        for member in members:
            # order = member[0]
            bookname = member[1]
            authorname = member[2]
            # workid = member[3]
            authorid = member[4]
            bookname = replace_all(bookname, dic)
            if not authorid:
                # goodreads gives us all the info we need, librarything/google doesn't
                base_url = 'https://www.goodreads.com/search.xml?q='
                params = {"key": lazylibrarian.CONFIG['GR_API']}
                searchname = bookname + ' ' + authorname
                searchname = cleanName(unaccented(searchname))
                if PY2:
                    searchname = searchname.encode(lazylibrarian.SYS_ENCODING)
                searchterm = quote_plus(searchname)
                set_url = base_url + searchterm + '&' + urlencode(params)
                try:
                    rootxml, in_cache = gr_xml_request(set_url)
                    if rootxml is None:
                        logger.warn('Error getting XML for %s' % searchname)
                    else:
                        resultxml = rootxml.getiterator('work')
                        for item in resultxml:
                            try:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                            except (KeyError, AttributeError):
                                booktitle = ""
                            book_fuzz = fuzz.token_set_ratio(
                                booktitle, bookname)
                            if book_fuzz >= 98:
                                try:
                                    author = item.find(
                                        './best_book/author/name').text
                                except (KeyError, AttributeError):
                                    author = ""
                                # try:
                                #     workid = item.find('./work/id').text
                                # except (KeyError, AttributeError):
                                #     workid = ""
                                try:
                                    authorid = item.find(
                                        './best_book/author/id').text
                                except (KeyError, AttributeError):
                                    authorid = ""
                                logger.debug(
                                    "Author Search found %s %s, authorid %s" %
                                    (author, booktitle, authorid))
                                break
                    if not authorid:  # try again with title only
                        searchname = cleanName(unaccented(bookname))
                        if PY2:
                            searchname = searchname.encode(
                                lazylibrarian.SYS_ENCODING)
                        searchterm = quote_plus(searchname)
                        set_url = base_url + searchterm + '&' + urlencode(
                            params)
                        rootxml, in_cache = gr_xml_request(set_url)
                        if rootxml is None:
                            logger.warn('Error getting XML for %s' %
                                        searchname)
                        else:
                            resultxml = rootxml.getiterator('work')
                            for item in resultxml:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                                book_fuzz = fuzz.token_set_ratio(
                                    booktitle, bookname)
                                if book_fuzz >= 98:
                                    try:
                                        author = item.find(
                                            './best_book/author/name').text
                                    except (KeyError, AttributeError):
                                        author = ""
                                    # try:
                                    #     workid = item.find('./work/id').text
                                    # except (KeyError, AttributeError):
                                    #     workid = ""
                                    try:
                                        authorid = item.find(
                                            './best_book/author/id').text
                                    except (KeyError, AttributeError):
                                        authorid = ""
                                    logger.debug(
                                        "Title Search found %s %s, authorid %s"
                                        % (author, booktitle, authorid))
                                    break
                    if not authorid:
                        logger.warn("GoodReads doesn't know about %s %s" %
                                    (authorname, bookname))
                except Exception as e:
                    logger.error("Error finding goodreads results: %s %s" %
                                 (type(e).__name__, str(e)))

            if authorid:
                lazylibrarian.importer.addAuthorToDB(refresh=False,
                                                     authorid=authorid)

    result = myDB.match("select count(*) as counter from authors")
    finish = int(result['counter'])
    newauth = finish - start
    logger.info("Added %s new author%s for %s" %
                (newauth, plural(newauth), seriesname))
    return newauth
Exemple #36
0
def getBookAuthors(bookid):
    """ Get a list of authors contributing to a book from the goodreads bookpage or the librarything bookwork file """
    authorlist = []
    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        params = {"key": lazylibrarian.CONFIG['GR_API']}
        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urlencode(
            params)
        try:
            rootxml, in_cache = gr_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting book %s" % bookid)
                return []
        except Exception as e:
            logger.error("%s finding book %s: %s" %
                         (type(e).__name__, bookid, str(e)))
            return []

        book = rootxml.find('book')
        authors = book.find('authors')
        anames = authors.getiterator('author')
        if anames is None:
            logger.warn('No authors found for %s' % bookid)
            return []
        for aname in anames:
            author = {}
            if aname.find('id') is not None:
                author['id'] = aname.find('id').text
            if aname.find('name') is not None:
                author['name'] = aname.find('name').text
            if aname.find('role') is not None:
                role = aname.find('role').text
                if not role:
                    role = ''
                author['role'] = role
            if author:
                authorlist.append(author)
    else:
        data = getBookWork(bookid, "Authors")
        if data:
            try:
                data = data.split('otherauthors_container')[1].split(
                    '</table>')[0].split('<table')[1].split('>', 1)[1]
            except IndexError:
                data = ''

        authorlist = []
        if data and 'Work?' in data:
            try:
                rows = data.split('<tr')
                for row in rows[2:]:
                    author = {}
                    col = row.split('<td>')
                    author['name'] = col[1].split('">')[1].split('<')[0]
                    author['role'] = col[2].split('<')[0]
                    author['type'] = col[3].split('<')[0]
                    author['work'] = col[4].split('<')[0]
                    author['status'] = col[5].split('<')[0]
                    authorlist.append(author)
            except IndexError:
                logger.debug('Error parsing authorlist for %s' % bookid)
    return authorlist
Exemple #37
0
def getBookWork(bookID=None, reason=None, seriesID=None):
    """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given
        preferably from the cache. If not already cached cache the results
        Return None if no workpage/seriespage available """
    global ALLOW_NEW, LAST_NEW
    if not bookID and not seriesID:
        logger.error("getBookWork - No bookID or seriesID")
        return None

    if not reason:
        reason = ""

    myDB = database.DBConnection()
    if bookID:
        cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?'
        cmd += ' and books.AuthorID = authors.AuthorID'
        cacheLocation = "WorkCache"
        item = myDB.match(cmd, (bookID, ))
    else:
        cmd = 'select SeriesName from series where SeriesID=?'
        cacheLocation = "SeriesCache"
        item = myDB.match(cmd, (seriesID, ))
    if item:
        cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation)
        if bookID:
            workfile = os.path.join(cacheLocation, str(bookID) + '.html')
        else:
            workfile = os.path.join(cacheLocation, str(seriesID) + '.html')

        # does the workpage need to expire? For now only expire if it was an error page
        # (small file) or a series page as librarything might get better info over time, more series members etc
        if os.path.isfile(workfile):
            if seriesID or os.path.getsize(workfile) < 500:
                cache_modified_time = os.stat(workfile).st_mtime
                time_now = time.time()
                expiry = lazylibrarian.CONFIG[
                    'CACHE_AGE'] * 24 * 60 * 60  # expire cache after this many seconds
                if cache_modified_time < time_now - expiry:
                    # Cache entry is too old, delete it
                    if ALLOW_NEW:
                        os.remove(workfile)

        if os.path.isfile(workfile):
            # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs
            lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
            if bookID:
                if reason:
                    logger.debug(
                        "getBookWork: Returning Cached entry for %s %s" %
                        (bookID, reason))
                else:
                    logger.debug(
                        "getBookWork: Returning Cached workpage for %s" %
                        bookID)
            else:
                logger.debug(
                    "getBookWork: Returning Cached seriespage for %s" %
                    item['seriesName'])

            if PY2:
                with open(workfile, "r") as cachefile:
                    source = cachefile.read()
            else:
                # noinspection PyArgumentList
                with open(workfile, "r",
                          errors="backslashreplace") as cachefile:
                    source = cachefile.read()
            return source
        else:
            lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
            if not ALLOW_NEW:
                # don't nag. Show message no more than every 12 hrs
                timenow = int(time.time())
                if check_int(LAST_NEW, 0) + 43200 < timenow:
                    logger.warn("New WhatWork is disabled")
                    LAST_NEW = timenow
                return None
            if bookID:
                title = safe_unicode(item['BookName'])
                author = safe_unicode(item['AuthorName'])
                if PY2:
                    title = title.encode(lazylibrarian.SYS_ENCODING)
                    author = author.encode(lazylibrarian.SYS_ENCODING)
                URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \
                      (quote_plus(author), quote_plus(title))
            else:
                seriesname = safe_unicode(item['seriesName'])
                if PY2:
                    seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING)
                URL = 'http://www.librarything.com/series/%s' % quote_plus(
                    seriesname)

            librarything_wait()
            result, success = fetchURL(URL)
            if bookID and success:
                # noinspection PyBroadException
                try:
                    workpage = result.split('<link>')[1].split('</link>')[0]
                    librarything_wait()
                    result, success = fetchURL(workpage)
                except Exception:
                    try:
                        errmsg = result.split('<error>')[1].split(
                            '</error>')[0]
                    except IndexError:
                        errmsg = "Unknown Error"
                    # if no workpage link, try isbn instead
                    if item['BookISBN']:
                        URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item[
                            'BookISBN']
                        librarything_wait()
                        result, success = fetchURL(URL)
                        if success:
                            # noinspection PyBroadException
                            try:
                                workpage = result.split('<link>')[1].split(
                                    '</link>')[0]
                                librarything_wait()
                                result, success = fetchURL(workpage)
                            except Exception:
                                # no workpage link found by isbn
                                try:
                                    errmsg = result.split('<error>')[1].split(
                                        '</error>')[0]
                                except IndexError:
                                    errmsg = "Unknown Error"
                                # still cache if whatwork returned a result without a link, so we don't keep retrying
                                logger.debug("Librarything: [%s] for ISBN %s" %
                                             (errmsg, item['BookISBN']))
                                success = True
                    else:
                        # still cache if whatwork returned a result without a link, so we don't keep retrying
                        msg = "Librarything: [" + errmsg + "] for "
                        logger.debug(msg + item['AuthorName'] + ' ' +
                                     item['BookName'])
                        success = True
            if success:
                with open(workfile, "w") as cachefile:
                    cachefile.write(result)
                    if bookID:
                        logger.debug("getBookWork: Caching workpage for %s" %
                                     workfile)
                    else:
                        logger.debug(
                            "getBookWork: Caching series page for %s" %
                            workfile)
                    # return None if we got an error page back
                    if '</request><error>' in result:
                        return None
                return result
            else:
                if bookID:
                    logger.debug(
                        "getBookWork: Unable to cache workpage, got %s" %
                        result)
                else:
                    logger.debug(
                        "getBookWork: Unable to cache series page, got %s" %
                        result)
            return None
    else:
        if bookID:
            logger.debug('Get Book Work - Invalid bookID [%s]' % bookID)
        else:
            logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID)
        return None
Exemple #38
0
def request_response(url,
                     method="get",
                     auto_raise=True,
                     whitelist_status_code=None,
                     **kwargs):
    """
    Convenient wrapper for `requests.get', which will capture the exceptions
    and log them. On success, the Response object is returned. In case of a
    exception, None is returned.

    Additionally, there is support for rate limiting. To use this feature,
    supply a tuple of (lock, request_limit). The lock is used to make sure no
    other request with the same lock is executed. The request limit is the
    minimal time between two requests (and so 1/request_limit is the number of
    requests per seconds).
    """

    # Convert whitelist_status_code to a list if needed
    if whitelist_status_code and not isinstance(whitelist_status_code, list):
        whitelist_status_code = [whitelist_status_code]

    # Disable verification of SSL certificates if requested. Note: this could
    # pose a security issue!
    # kwargs["verify"] = bool(lazylibrarian.VERIFY_SSL_CERT)

    # Map method to the request.XXX method. This is a simple hack, but it
    # allows requests to apply more magic per method. See lib/requests/api.py.
    request_method = getattr(requests, method.lower())

    try:
        # Request URL and wait for response
        # with lock:
        logger.debug("Requesting URL via %s method: %s" %
                     (method.upper(), url))
        response = request_method(url, **kwargs)

        # If status code != OK, then raise exception, except if the status code
        # is white listed.
        if whitelist_status_code and auto_raise:
            if response.status_code not in whitelist_status_code:
                try:
                    response.raise_for_status()
                except:
                    logger.debug("Response status code %d is not white "
                                 "listed, raised exception" %
                                 response.status_code)
                    raise
        elif auto_raise:
            response.raise_for_status()

        return response
    except requests.exceptions.SSLError as e:
        logger.error("SSL error raised during connection: %s" % e)
    except requests.ConnectionError:
        logger.error("Unable to connect to remote host. Check if the remote "
                     "host is up and running.")
    except requests.Timeout:
        logger.error(
            "Request timed out. The remote host did not respond timely.")
    except requests.HTTPError as e:
        if e.response is not None:
            if e.response.status_code >= 500:
                cause = "remote server error"
            elif e.response.status_code >= 400:
                cause = "local client error"
            else:
                # I don't think we will end up here, but for completeness
                cause = "unknown"

            logger.error("Request raise HTTP error with status code %d (%s)." %
                         (e.response.status_code, cause))

            # Debug response
            server_message(e.response)
        else:
            logger.error("Request raised HTTP error.")
    except requests.RequestException as e:
        logger.error("Request raised exception: %s" % e)
Exemple #39
0
                    url = item['links'][1]['href']
                    size = int(item['links'][1]['length'])
                    
                    if minimumseeders < int(seeders):
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': "KAT",
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                            })

                    logger.info('Found %s. Size: %s' % (title, size))
                
                except Exception, e:
                    logger.error(u"An unknown error occurred in the KAT parser: %s" % e)

    return results

def UsenetCrawler(book=None, searchType=None):


    results = []
    
    #print book.keys()
    
    results = NewzNabPlus(book, lazylibrarian.USENETCRAWLER_HOST, lazylibrarian.USENETCRAWLER_API, searchType)
    return results
    
def OLDUsenetCrawler(book=None):
def search_book(books=None, library=None):
    """
    books is a list of new books to add, or None for backlog search
    library is "eBook" or "AudioBook" or None to search all book types
    """
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if books is None:
                threading.currentThread().name = "SEARCHALLBOOKS"
            else:
                threading.currentThread().name = "SEARCHBOOKS"

        myDB = database.DBConnection()
        searchlist = []
        searchbooks = []

        if books is None:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus '
            cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") '
            cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc'
            results = myDB.select(cmd)
            for terms in results:
                searchbooks.append(terms)
        else:
            # The user has added a new book
            for book in books:
                if not book['bookid'] in ['booklang', 'library', 'ignored']:
                    cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus '
                    cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID'
                    results = myDB.select(cmd, (book['bookid'], ))
                    if results:
                        for terms in results:
                            searchbooks.append(terms)
                    else:
                        logger.debug(
                            "SearchBooks - BookID %s is not in the database" %
                            book['bookid'])

        if len(searchbooks) == 0:
            logger.debug("SearchBooks - No books to search for")
            return

        nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR(
        ) + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT()

        if nprov == 0:
            logger.debug("SearchBooks - No providers to search")
            return

        modelist = []
        if lazylibrarian.USE_NZB():
            modelist.append('nzb')
        if lazylibrarian.USE_TOR():
            modelist.append('tor')
        if lazylibrarian.USE_DIRECT():
            modelist.append('direct')
        if lazylibrarian.USE_RSS():
            modelist.append('rss')

        logger.info('Searching %s provider%s %s for %i book%s' %
                    (nprov, plural(nprov), str(modelist), len(searchbooks),
                     plural(len(searchbooks))))

        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            if library is None or library == 'eBook':
                if searchbook['Status'] == "Wanted":
                    searchlist.append({
                        "bookid": searchbook['BookID'],
                        "bookName": searchbook['BookName'],
                        "bookSub": searchbook['BookSub'],
                        "authorName": searchbook['AuthorName'],
                        "library": "eBook",
                        "searchterm": searchterm
                    })

            if library is None or library == 'AudioBook':
                if searchbook['AudioStatus'] == "Wanted":
                    searchlist.append({
                        "bookid": searchbook['BookID'],
                        "bookName": searchbook['BookName'],
                        "bookSub": searchbook['BookSub'],
                        "authorName": searchbook['AuthorName'],
                        "library": "AudioBook",
                        "searchterm": searchterm
                    })

        # only get rss results once per run, as they are not search specific
        rss_resultlist = None
        if 'rss' in modelist:
            rss_resultlist, nprov = IterateOverRSSSites()
            if not nprov:
                modelist.remove('rss')

        book_count = 0
        for book in searchlist:
            matches = []
            for mode in modelist:
                # first attempt, try author/title in category "book"
                if book['library'] == 'AudioBook':
                    searchtype = 'audio'
                else:
                    searchtype = 'book'

                resultlist = None
                if mode == 'nzb':
                    resultlist, nprov = IterateOverNewzNabSites(
                        book, searchtype)
                    if not nprov:
                        logger.debug("No active nzb providers found")
                        if 'nzb' in modelist:
                            modelist.remove('nzb')
                elif mode == 'tor':
                    resultlist, nprov = IterateOverTorrentSites(
                        book, searchtype)
                    if not nprov:
                        logger.debug("No active tor providers found")
                        if 'tor' in modelist:
                            modelist.remove('tor')
                elif mode == 'direct':
                    resultlist, nprov = IterateOverDirectSites(
                        book, searchtype)
                    if not nprov:
                        logger.debug("No active direct providers found")
                        if 'direct' in modelist:
                            modelist.remove('direct')
                elif mode == 'rss':
                    if rss_resultlist:
                        resultlist = rss_resultlist
                    else:
                        logger.debug("No active rss providers found")
                        if 'rss' in modelist:
                            modelist.remove('rss')

                if resultlist:
                    match = findBestResult(resultlist, book, searchtype, mode)
                else:
                    match = None

                # if you can't find the book, try author/title without any "(extended details, series etc)"
                if not goodEnough(match) and '(' in book['bookName']:
                    searchtype = 'short' + searchtype
                    if mode == 'nzb':
                        resultlist, nprov = IterateOverNewzNabSites(
                            book, searchtype)
                        if not nprov:
                            logger.debug("No active nzb providers found")
                            if 'nzb' in modelist:
                                modelist.remove('nzb')
                    elif mode == 'tor':
                        resultlist, nprov = IterateOverTorrentSites(
                            book, searchtype)
                        if not nprov:
                            logger.debug("No active tor providers found")
                            if 'tor' in modelist:
                                modelist.remove('tor')
                    elif mode == 'direct':
                        resultlist, nprov = IterateOverDirectSites(
                            book, searchtype)
                        if not nprov:
                            logger.debug("No active direct providers found")
                            if 'direct' in modelist:
                                modelist.remove('direct')
                    elif mode == 'rss':
                        resultlist = rss_resultlist

                    if resultlist:
                        match = findBestResult(resultlist, book, searchtype,
                                               mode)
                    else:
                        match = None

                # if you can't find the book under "books", you might find under general search
                # general search is the same as booksearch for torrents and rss, no need to check again
                if not goodEnough(match):
                    searchtype = 'general'
                    if mode == 'nzb':
                        resultlist, nprov = IterateOverNewzNabSites(
                            book, searchtype)
                        if not nprov:
                            logger.debug("No active nzb providers found")
                            if 'nzb' in modelist:
                                modelist.remove('nzb')
                        if resultlist:
                            match = findBestResult(resultlist, book,
                                                   searchtype, mode)
                        else:
                            match = None

                # if still not found, try general search again without any "(extended details, series etc)"
                if not goodEnough(match) and '(' in book['searchterm']:
                    searchtype = 'shortgeneral'
                    if mode == 'nzb':
                        resultlist, _ = IterateOverNewzNabSites(
                            book, searchtype)
                        if not nprov:
                            logger.debug("No active nzb providers found")
                            if 'nzb' in modelist:
                                modelist.remove('nzb')
                        if resultlist:
                            match = findBestResult(resultlist, book,
                                                   searchtype, mode)
                        else:
                            match = None

                if not goodEnough(match):
                    logger.info(
                        "%s Searches for %s %s returned no results." %
                        (mode.upper(), book['library'], book['searchterm']))
                else:
                    logger.info("Found %s result: %s %s%%, %s priority %s" %
                                (mode.upper(), searchtype, match[0],
                                 match[2]['NZBprov'], match[4]))
                    matches.append(match)

            if matches:
                highest = max(matches, key=lambda s:
                              (s[0], s[4]))  # sort on percentage and priority
                logger.info("Requesting %s download: %s%% %s: %s" %
                            (book['library'], highest[0],
                             highest[2]['NZBprov'], highest[1]))
                if downloadResult(highest, book) > True:
                    book_count += 1  # we found it

        logger.info("Search for Wanted items complete, found %s book%s" %
                    (book_count, plural(book_count)))

    except Exception:
        logger.error('Unhandled exception in search_book: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Exemple #41
0
def getBookCover(bookID=None, src=None):
    """ Return link to a local file containing a book cover image for a bookid, and which source used.
        Try 1. Local file cached from goodreads/googlebooks when book was imported
            2. cover.jpg if we have the book
            3. LibraryThing cover image (if you have a dev key)
            4. LibraryThing whatwork (if available)
            5. Goodreads search (if book was imported from goodreads)
            6. Google isbn search (if google has a link to book for sale)
            7. Google images search (if lazylibrarian config allows)

        src = cache, cover, goodreads, librarything, whatwork, googleisbn, googleimage
        Return None if no cover available. """
    if not bookID:
        logger.error("getBookCover- No bookID")
        return None, src

    if not src:
        src = ''
    logger.debug("Getting %s cover for %s" % (src, bookID))
    # noinspection PyBroadException
    try:
        cachedir = lazylibrarian.CACHEDIR
        coverfile = os.path.join(cachedir, "book", bookID + '.jpg')
        if not src or src == 'cache' or src == 'current':
            if os.path.isfile(coverfile):  # use cached image if there is one
                lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
                coverlink = 'cache/book/' + bookID + '.jpg'
                return coverlink, 'cache'
            elif src:
                lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
                return None, src

        myDB = database.DBConnection()
        if not src or src == 'cover':
            item = myDB.match('select BookFile from books where bookID=?',
                              (bookID, ))
            if item:
                bookfile = item['BookFile']
                if bookfile:  # we may have a cover.jpg in the same folder
                    bookdir = os.path.dirname(bookfile)
                    coverimg = os.path.join(bookdir, "cover.jpg")
                    if os.path.isfile(coverimg):
                        if src:
                            coverfile = os.path.join(cachedir, "book",
                                                     bookID + '_cover.jpg')
                            coverlink = 'cache/book/' + bookID + '_cover.jpg'
                            logger.debug("Caching cover.jpg for %s" % bookID)
                        else:
                            coverlink = 'cache/book/' + bookID + '.jpg'
                            logger.debug("Caching cover.jpg for %s" %
                                         coverfile)
                        _ = safe_copy(coverimg, coverfile)
                        return coverlink, src
            if src:
                logger.debug('No cover.jpg found for %s' % bookID)
                return None, src

        # see if librarything  has a cover
        if not src or src == 'librarything':
            if lazylibrarian.CONFIG['LT_DEVKEY']:
                cmd = 'select BookISBN from books where bookID=?'
                item = myDB.match(cmd, (bookID, ))
                if item and item['BookISBN']:
                    img = 'https://www.librarything.com/devkey/%s/large/isbn/%s' % (
                        lazylibrarian.CONFIG['LT_DEVKEY'], item['BookISBN'])
                    if src:
                        coverlink, success, _ = cache_img(
                            "book", bookID + '_lt', img)
                    else:
                        coverlink, success, _ = cache_img("book",
                                                          bookID,
                                                          img,
                                                          refresh=True)

                    # if librarything has no image they return a 1x1 gif
                    data = ''
                    coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                    if os.path.isfile(coverfile):
                        with open(coverfile, 'rb') as f:
                            data = f.read()
                    if len(data) < 50:
                        logger.debug(
                            'Got an empty librarything image for %s [%s]' %
                            (bookID, coverlink))
                    elif success:
                        logger.debug("Caching librarything cover for %s" %
                                     bookID)
                        return coverlink, 'librarything'
                    else:
                        logger.debug('Failed to cache image for %s [%s]' %
                                     (img, coverlink))
                else:
                    logger.debug("No isbn for %s" % bookID)
            if src:
                return None, src

        # see if librarything workpage has a cover
        if not src or src == 'whatwork':
            work = getBookWork(bookID, "Cover")
            if work:
                try:
                    img = work.split('workCoverImage')[1].split('="')[1].split(
                        '"')[0]
                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img(
                                "book", bookID + '_ww', img)
                        else:
                            coverlink, success, _ = cache_img("book",
                                                              bookID,
                                                              img,
                                                              refresh=True)

                        # if librarything has no image they return a 1x1 gif
                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR,
                                                 coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug(
                                'Got an empty whatwork image for %s [%s]' %
                                (bookID, coverlink))
                        elif success:
                            logger.debug("Caching whatwork cover for %s" %
                                         bookID)
                            return coverlink, 'whatwork'
                        else:
                            logger.debug('Failed to cache image for %s [%s]' %
                                         (img, coverlink))
                    else:
                        logger.debug("No image found in work page for %s" %
                                     bookID)
                except IndexError:
                    logger.debug(
                        'workCoverImage not found in work page for %s' %
                        bookID)

                try:
                    img = work.split('og:image')[1].split('="')[1].split(
                        '"')[0]
                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img(
                                "book", bookID + '_ww', img)
                        else:
                            coverlink, success, _ = cache_img("book",
                                                              bookID,
                                                              img,
                                                              refresh=True)

                        # if librarything has no image they return a 1x1 gif
                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR,
                                                 coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug(
                                'Got an empty whatwork image for %s [%s]' %
                                (bookID, coverlink))
                        if success:
                            logger.debug("Caching whatwork cover for %s" %
                                         bookID)
                            return coverlink, 'whatwork'
                        else:
                            logger.debug('Failed to cache image for %s [%s]' %
                                         (img, coverlink))
                    else:
                        logger.debug("No image found in work page for %s" %
                                     bookID)
                except IndexError:
                    logger.debug('og:image not found in work page for %s' %
                                 bookID)
            else:
                logger.debug('No work page for %s' % bookID)
            if src:
                return None, src

        cmd = 'select BookName,AuthorName,BookLink from books,authors where bookID=?'
        cmd += ' and books.AuthorID = authors.AuthorID'
        item = myDB.match(cmd, (bookID, ))
        safeparams = ''
        booklink = ''
        if item:
            title = safe_unicode(item['BookName'])
            author = safe_unicode(item['AuthorName'])
            if PY2:
                title = title.encode(lazylibrarian.SYS_ENCODING)
                author = author.encode(lazylibrarian.SYS_ENCODING)
            booklink = item['BookLink']
            safeparams = quote_plus("%s %s" % (author, title))

        # try to get a cover from goodreads
        if not src or src == 'goodreads':
            if booklink and 'goodreads' in booklink:
                # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID}
                # and scrape the page for og:image
                # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/
                # 1388267702i/16304._UY475_SS475_.jpg"/>
                # to get the cover
                result, success = fetchURL(booklink)
                if success:
                    try:
                        img = result.split('id="coverImage"')[1].split(
                            'src="')[1].split('"')[0]
                    except IndexError:
                        try:
                            img = result.split('og:image')[1].split(
                                '="')[1].split('"')[0]
                        except IndexError:
                            img = None
                    if img and img.startswith(
                            'http'
                    ) and 'nocover' not in img and 'nophoto' not in img:
                        if src == 'goodreads':
                            coverlink, success, _ = cache_img(
                                "book", bookID + '_gr', img)
                        else:
                            coverlink, success, _ = cache_img("book",
                                                              bookID,
                                                              img,
                                                              refresh=True)

                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR,
                                                 coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug(
                                'Got an empty goodreads image for %s [%s]' %
                                (bookID, coverlink))
                        elif success:
                            logger.debug(
                                "Caching goodreads cover for %s %s" %
                                (item['AuthorName'], item['BookName']))
                            return coverlink, 'goodreads'
                        else:
                            logger.debug(
                                "Error getting goodreads image for %s, [%s]" %
                                (img, coverlink))
                    else:
                        logger.debug(
                            "No image found in goodreads page for %s" % bookID)
                else:
                    logger.debug("Error getting goodreads page %s, [%s]" %
                                 (booklink, result))
            if src:
                return None, src

        if not src or src == 'googleisbn':
            # try a google isbn page search...
            # there is no image returned if google doesn't have a link for buying the book
            if safeparams:
                URL = "http://www.google.com/search?q=ISBN+" + safeparams
                result, success = fetchURL(URL)
                if success:
                    try:
                        img = result.split('imgurl=')[1].split('&imgrefurl')[0]
                    except IndexError:
                        try:
                            img = result.split('img src="')[1].split('"')[0]
                        except IndexError:
                            img = None

                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img(
                                "book", bookID + '_gi', img)
                        else:
                            coverlink, success, _ = cache_img("book",
                                                              bookID,
                                                              img,
                                                              refresh=True)

                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR,
                                                 coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug(
                                'Got an empty google image for %s [%s]' %
                                (bookID, coverlink))
                        elif success:
                            logger.debug(
                                "Caching google isbn cover for %s %s" %
                                (item['AuthorName'], item['BookName']))
                            return coverlink, 'google isbn'
                        else:
                            logger.debug(
                                "Error caching google image %s, [%s]" %
                                (img, coverlink))
                    else:
                        logger.debug(
                            "No image found in google isbn page for %s" %
                            bookID)
                else:
                    logger.debug("Failed to fetch url from google")
            else:
                logger.debug("No parameters for google isbn search for %s" %
                             bookID)
            if src:
                return None, src

        if src == 'googleimage' or not src and lazylibrarian.CONFIG[
                'IMP_GOOGLEIMAGE']:
            # try a google image search...
            # tbm=isch      search images
            # tbs=isz:l     large images
            # ift:jpg       jpeg file type
            if safeparams:
                URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook"
                img = None
                result, success = fetchURL(URL)
                if success:
                    try:
                        img = result.split('url?q=')[1].split('">')[1].split(
                            'src="')[1].split('"')[0]
                    except IndexError:
                        img = None

                if img and img.startswith('http'):
                    if src:
                        coverlink, success, _ = cache_img(
                            "book", bookID + '_gb', img)
                    else:
                        coverlink, success, _ = cache_img("book",
                                                          bookID,
                                                          img,
                                                          refresh=True)

                    data = ''
                    coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                    if os.path.isfile(coverfile):
                        with open(coverfile, 'rb') as f:
                            data = f.read()
                    if len(data) < 50:
                        logger.debug(
                            'Got an empty goodreads image for %s [%s]' %
                            (bookID, coverlink))
                    elif success:
                        logger.debug("Caching google search cover for %s %s" %
                                     (item['AuthorName'], item['BookName']))
                        return coverlink, 'google image'
                    else:
                        logger.debug("Error getting google image %s, [%s]" %
                                     (img, coverlink))
                else:
                    logger.debug("No image found in google page for %s" %
                                 bookID)
            else:
                logger.debug("No parameters for google image search for %s" %
                             bookID)
            if src:
                return None, src

        logger.debug("No image found from any configured source")
        return None, src
    except Exception:
        logger.error('Unhandled exception in getBookCover: %s' %
                     traceback.format_exc())
    return None, src
def update():

    if lazylibrarian.INSTALL_TYPE == 'win':
        logger.debug('(update) Windows install - no update available')
        logger.info('(update) Windows .exe updating not supported yet.')
        #pass
    elif lazylibrarian.INSTALL_TYPE == 'git':

        branch = getCurrentGitBranch()

        output, err = runGit('stash clear')
        output, err = runGit('pull origin ' + branch)

        if not output:
            logger.error('(update) Couldn\'t download latest version')

        for line in output.split('\n'):

            if 'Already up-to-date.' in line:
                logger.info('(update) No update available, not updating')
                logger.info('(update) Output: ' + str(output))
            elif line.endswith('Aborting.'):
                logger.error('(update) Unable to update from git: ' + line)
                logger.info('(update) Output: ' + str(output))

    elif lazylibrarian.INSTALL_TYPE == 'source':

        #As this is a non GIT install, we assume that the comparison is
        #always to master.
        branch = lazylibrarian.CURRENT_BRANCH

        tar_download_url = 'https://github.com/%s/%s/tarball/%s' % (
            lazylibrarian.GIT_USER, lazylibrarian.GIT_REPO,
            lazylibrarian.GIT_BRANCH)
        update_dir = os.path.join(lazylibrarian.PROG_DIR, 'update')
        version_path = os.path.join(lazylibrarian.PROG_DIR, 'version.txt')

        try:
            logger.info('(update) Downloading update from: ' +
                        tar_download_url)
            data = urllib2.urlopen(tar_download_url)
        except (IOError, URLError):
            logger.error("(update) Unable to retrieve new version from " +
                         tar_download_url + ", can't update")
            return

        download_name = data.geturl().split('/')[-1]

        tar_download_path = os.path.join(lazylibrarian.PROG_DIR, download_name)

        # Save tar to disk
        f = open(tar_download_path, 'wb')
        f.write(data.read())
        f.close()

        # Extract the tar to update folder
        logger.info('(update) Extracing file' + tar_download_path)
        tar = tarfile.open(tar_download_path)
        tar.extractall(update_dir)
        tar.close()

        # Delete the tar.gz
        logger.info('(update) Deleting file' + tar_download_path)
        os.remove(tar_download_path)

        # Find update dir name
        update_dir_contents = [
            x for x in os.listdir(update_dir)
            if os.path.isdir(os.path.join(update_dir, x))
        ]
        if len(update_dir_contents) != 1:
            logger.error(u"(update) Invalid update data, update failed: " +
                         str(update_dir_contents))
            return
        content_dir = os.path.join(update_dir, update_dir_contents[0])

        # walk temp folder and move files to main folder
        for dirname, dirnames, filenames in os.walk(content_dir):
            dirname = dirname[len(content_dir) + 1:]
            for curfile in filenames:
                old_path = os.path.join(content_dir, dirname, curfile)
                new_path = os.path.join(lazylibrarian.PROG_DIR, dirname,
                                        curfile)

                if os.path.isfile(new_path):
                    os.remove(new_path)
                os.renames(old_path, new_path)

        # Update version.txt
        updateVersionFile(lazylibrarian.LATEST_VERSION)
    else:
        logger.error("(update) Cannot perform update - Install Type not set")
        return
Exemple #43
0
def search_wishlist():
    if not (lazylibrarian.USE_RSS()):
        logger.warn('RSS search is disabled')
        scheduleJob(action='Stop', target='search_wishlist')
        return
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "SEARCHWISHLIST"

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
            scheduleJob(action='Stop', target='search_wishlist')
            return  # No point in continuing

        # for each item in resultlist, add to database if necessary, and mark as wanted
        logger.debug('Processing %s item%s in wishlists' %
                     (len(resultlist), plural(len(resultlist))))
        for book in resultlist:
            # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
            # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks
            # not sure if anyone would use a goodreads wishlist if not using goodreads interface...
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book[
                    'rss_bookid']:
                bookmatch = myDB.match(
                    'select Status,BookName from books where bookid=?',
                    (book['rss_bookid'], ))
                if bookmatch:
                    bookstatus = bookmatch['Status']
                    bookname = bookmatch['BookName']
                    if bookstatus in ['Open', 'Wanted', 'Have']:
                        logger.info('Found book %s, already marked as "%s"' %
                                    (bookname, bookstatus))
                    else:  # skipped/ignored
                        logger.info('Found book %s, marking as "Wanted"' %
                                    bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                else:
                    import_book(book['rss_bookid'])
                    new_books += 1
            else:
                item = {}
                results = None
                item['Title'] = book['rss_title']
                if book['rss_bookid']:
                    item['BookID'] = book['rss_bookid']
                if book['rss_isbn']:
                    item['ISBN'] = book['rss_isbn']
                bookmatch = finditem(item, book['rss_author'])
                if bookmatch:  # it's already in the database
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    bookstatus = bookmatch['Status']
                    if bookstatus in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found book %s by %s, already marked as "%s"' %
                            (bookname, authorname, bookstatus))
                    else:  # skipped/ignored
                        logger.info(
                            'Found book %s by %s, marking as "Wanted"' %
                            (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                else:  # not in database yet
                    if book['rss_isbn']:
                        results = search_for(book['rss_isbn'])
                    if results:
                        result = results[0]  # type: dict
                        if result['isbn_fuzz'] > check_int(
                                lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info(
                                "Found (%s%%) %s: %s" %
                                (result['isbn_fuzz'], result['authorname'],
                                 result['bookname']))
                            import_book(result['bookid'])
                            new_books += 1
                            bookmatch = True
                    if not results:
                        searchterm = "%s <ll> %s" % (item['Title'],
                                                     formatAuthorName(
                                                         book['rss_author']))
                        results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]  # type: dict
                        if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info(
                                "Found (%s%% %s%%) %s: %s" %
                                (result['author_fuzz'], result['book_fuzz'],
                                 result['authorname'], result['bookname']))
                            import_book(result['bookid'])
                            new_books += 1
                            bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'],
                                                          book['rss_author'])
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            result = results[0]  # type: dict
                            msg = "Closest match (%s%% %s%%) %s: %s" % (
                                result['author_fuzz'], result['book_fuzz'],
                                result['authorname'], result['bookname'])
                        logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" %
                        (new_books, plural(new_books)))

    except Exception:
        logger.error('Unhandled exception in search_wishlist: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Exemple #44
0
def createMagCover(issuefile=None, refresh=False):
    if lazylibrarian.CONFIG[
            'IMP_CONVERT'] == 'None':  # special flag to say "no covers required"
        return
    if issuefile is None or not os.path.isfile(issuefile):
        logger.debug('No issuefile %s' % issuefile)
        return

    base, extn = os.path.splitext(issuefile)
    if not extn:
        logger.debug('Unable to create cover for %s, no extension?' %
                     issuefile)
        return

    coverfile = base + '.jpg'

    if os.path.isfile(coverfile):
        if refresh:
            os.remove(coverfile)
        else:
            logger.debug('Cover for %s exists' % issuefile)
            return  # quit if cover already exists and we didn't want to refresh

    logger.debug('Creating cover for %s' % issuefile)
    data = ''  # result from unzip or unrar
    extn = extn.lower()
    if extn in ['.cbz', '.epub']:
        try:
            data = zipfile.ZipFile(issuefile)
        except Exception as why:
            logger.error("Failed to read zip file %s, %s %s" %
                         (issuefile, type(why).__name__, str(why)))
            data = ''
    elif extn in ['.cbr']:
        try:
            # unrar will complain if the library isn't installed, needs to be compiled separately
            # see https://pypi.python.org/pypi/unrar/ for instructions
            # Download source from http://www.rarlab.com/rar_add.htm
            # note we need LIBRARY SOURCE not a binary package
            # make lib; sudo make install-lib; sudo ldconfig
            # lib.unrar should then be able to find libunrar.so
            from lib.unrar import rarfile
            data = rarfile.RarFile(issuefile)
        except Exception as why:
            logger.error("Failed to read rar file %s, %s %s" %
                         (issuefile, type(why).__name__, str(why)))
            data = ''
    if data:
        img = None
        try:
            for member in data.namelist():
                memlow = member.lower()
                if '-00.' in memlow or '000.' in memlow or 'cover.' in memlow:
                    if memlow.endswith('.jpg') or memlow.endswith('.jpeg'):
                        img = data.read(member)
                        break
            if img:
                with open(coverfile, 'wb') as f:
                    if PY2:
                        f.write(img)
                    else:
                        f.write(img.encode())
                return
            else:
                logger.debug("Failed to find image in %s" % issuefile)
        except Exception as why:
            logger.error("Failed to extract image from %s, %s %s" %
                         (issuefile, type(why).__name__, str(why)))

    elif extn == '.pdf':
        generator = ""
        if len(lazylibrarian.CONFIG['IMP_CONVERT']
               ):  # allow external convert to override libraries
            generator = "external program: %s" % lazylibrarian.CONFIG[
                'IMP_CONVERT']
            if "gsconvert.py" in lazylibrarian.CONFIG['IMP_CONVERT']:
                msg = "Use of gsconvert.py is deprecated, equivalent functionality is now built in. "
                msg += "Support for gsconvert.py may be removed in a future release. See wiki for details."
                logger.warn(msg)
            converter = lazylibrarian.CONFIG['IMP_CONVERT']
            postfix = ''
            # if not os.path.isfile(converter):  # full path given, or just program_name?
            #     converter = os.path.join(os.getcwd(), lazylibrarian.CONFIG['IMP_CONVERT'])
            if 'convert' in converter and 'gs' not in converter:
                # tell imagemagick to only convert first page
                postfix = '[0]'
            try:
                params = [
                    converter,
                    '%s%s' % (issuefile, postfix),
                    '%s' % coverfile
                ]
                res = subprocess.check_output(params, stderr=subprocess.STDOUT)
                res = makeUnicode(res).strip()
                if res:
                    logger.debug('%s reports: %s' %
                                 (lazylibrarian.CONFIG['IMP_CONVERT'], res))
            except Exception as e:
                # logger.debug(params)
                logger.warn('External "convert" failed %s %s' %
                            (type(e).__name__, str(e)))

        elif platform.system() == "Windows":
            GS = os.path.join(os.getcwd(), "gswin64c.exe")
            generator = "local gswin64c"
            if not os.path.isfile(GS):
                GS = os.path.join(os.getcwd(), "gswin32c.exe")
                generator = "local gswin32c"
            if not os.path.isfile(GS):
                params = ["where", "gswin64c"]
                try:
                    GS = subprocess.check_output(params,
                                                 stderr=subprocess.STDOUT)
                    GS = makeUnicode(GS).strip()
                    generator = "gswin64c"
                except Exception as e:
                    logger.debug("where gswin64c failed: %s %s" %
                                 (type(e).__name__, str(e)))
            if not os.path.isfile(GS):
                params = ["where", "gswin32c"]
                try:
                    GS = subprocess.check_output(params,
                                                 stderr=subprocess.STDOUT)
                    GS = makeUnicode(GS).strip()
                    generator = "gswin32c"
                except Exception as e:
                    logger.debug("where gswin32c failed: %s %s" %
                                 (type(e).__name__, str(e)))
            if not os.path.isfile(GS):
                logger.debug("No gswin found")
                generator = "(no windows ghostscript found)"
            else:
                # noinspection PyBroadException
                try:
                    params = [GS, "--version"]
                    res = subprocess.check_output(params,
                                                  stderr=subprocess.STDOUT)
                    res = makeUnicode(res).strip()
                    logger.debug("Found %s [%s] version %s" %
                                 (generator, GS, res))
                    generator = "%s version %s" % (generator, res)
                    issuefile = issuefile.split('[')[0]
                    params = [
                        GS, "-sDEVICE=jpeg", "-dNOPAUSE", "-dBATCH", "-dSAFER",
                        "-dFirstPage=1", "-dLastPage=1", "-dUseCropBox",
                        "-sOutputFile=%s" % coverfile, issuefile
                    ]

                    res = subprocess.check_output(params,
                                                  stderr=subprocess.STDOUT)
                    res = makeUnicode(res).strip()

                    if not os.path.isfile(coverfile):
                        logger.debug("Failed to create jpg: %s" % res)
                except Exception:  # as why:
                    logger.warn("Failed to create jpg for %s" % issuefile)
                    logger.debug('Exception in gswin create_cover: %s' %
                                 traceback.format_exc())
        else:  # not windows
            try:
                # noinspection PyUnresolvedReferences
                from wand.image import Image
                interface = "wand"
            except ImportError:
                try:
                    # No PythonMagick in python3
                    # noinspection PyUnresolvedReferences
                    import PythonMagick
                    interface = "pythonmagick"
                except ImportError:
                    interface = ""
            try:
                if interface == 'wand':
                    generator = "wand interface"
                    with Image(filename=issuefile + '[0]') as img:
                        img.save(filename=coverfile)

                elif interface == 'pythonmagick':
                    generator = "pythonmagick interface"
                    img = PythonMagick.Image()
                    # PythonMagick requires filenames to be bytestr, not unicode
                    if type(issuefile) is text_type:
                        issuefile = makeBytestr(issuefile)
                    if type(coverfile) is text_type:
                        coverfile = makeBytestr(coverfile)
                    img.read(issuefile + '[0]')
                    img.write(coverfile)

                else:
                    GS = os.path.join(os.getcwd(), "gs")
                    generator = "local gs"
                    if not os.path.isfile(GS):
                        GS = ""
                        params = ["which", "gs"]
                        try:
                            GS = subprocess.check_output(
                                params, stderr=subprocess.STDOUT)
                            GS = makeUnicode(GS).strip()
                            generator = GS
                        except Exception as e:
                            logger.debug("which gs failed: %s %s" %
                                         (type(e).__name__, str(e)))
                        if not os.path.isfile(GS):
                            logger.debug("Cannot find gs")
                            generator = "(no gs found)"
                        else:
                            params = [GS, "--version"]
                            res = subprocess.check_output(
                                params, stderr=subprocess.STDOUT)
                            res = makeUnicode(res).strip()
                            logger.debug("Found gs [%s] version %s" %
                                         (GS, res))
                            generator = "%s version %s" % (generator, res)
                            issuefile = issuefile.split('[')[0]
                            params = [
                                GS, "-sDEVICE=jpeg", "-dNOPAUSE", "-dBATCH",
                                "-dSAFER", "-dFirstPage=1", "-dLastPage=1",
                                "-dUseCropBox",
                                "-sOutputFile=%s" % coverfile, issuefile
                            ]
                            res = subprocess.check_output(
                                params, stderr=subprocess.STDOUT)
                            res = makeUnicode(res).strip()
                            if not os.path.isfile(coverfile):
                                logger.debug("Failed to create jpg: %s" % res)
            except Exception as e:
                logger.warn("Unable to create cover for %s using %s %s" %
                            (issuefile, type(e).__name__, generator))
                logger.debug('Exception in create_cover: %s' %
                             traceback.format_exc())

        if os.path.isfile(coverfile):
            setperm(coverfile)
            logger.debug("Created cover for %s using %s" %
                         (issuefile, generator))
            return

    # if not recognised extension or cover creation failed
    try:
        coverfile = safe_copy(
            os.path.join(lazylibrarian.PROG_DIR, 'data/images/nocover.jpg'),
            coverfile)
        setperm(coverfile)
    except Exception as why:
        logger.error("Failed to copy nocover file, %s %s" %
                     (type(why).__name__, str(why)))
    return
Exemple #45
0
def magazineScan():
    lazylibrarian.MAG_UPDATE = 1
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()

        mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER']
        mag_path = mag_path.split('$')[0]

        if lazylibrarian.CONFIG['MAG_RELATIVE']:
            if mag_path[0] not in '._':
                mag_path = '_' + mag_path
            mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path)

        mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING)
        if lazylibrarian.CONFIG['FULL_SCAN']:
            mags = myDB.select('select * from Issues')
            # check all the issues are still there, delete entry if not
            for mag in mags:
                title = mag['Title']
                issuedate = mag['IssueDate']
                issuefile = mag['IssueFile']

                if issuefile and not os.path.isfile(issuefile):
                    myDB.action('DELETE from Issues where issuefile=?',
                                (issuefile, ))
                    logger.info('Issue %s - %s deleted as not found on disk' %
                                (title, issuedate))
                    controlValueDict = {"Title": title}
                    newValueDict = {
                        "LastAcquired": None,  # clear magazine dates
                        "IssueDate": None,  # we will fill them in again later
                        "LatestCover": None,
                        "IssueStatus":
                        "Skipped"  # assume there are no issues now
                    }
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    logger.debug('Magazine %s details reset' % title)

            mags = myDB.select('SELECT * from magazines')
            # now check the magazine titles and delete any with no issues
            for mag in mags:
                title = mag['Title']
                count = myDB.select(
                    'SELECT COUNT(Title) as counter FROM issues WHERE Title=?',
                    (title, ))
                issues = count[0]['counter']
                if not issues:
                    logger.debug('Magazine %s deleted as no issues found' %
                                 title)
                    myDB.action('DELETE from magazines WHERE Title=?',
                                (title, ))

        logger.info(' Checking [%s] for magazines' % mag_path)

        matchString = ''
        for char in lazylibrarian.CONFIG['MAG_DEST_FILE']:
            matchString = matchString + '\\' + char
        # massage the MAG_DEST_FILE config parameter into something we can use
        # with regular expression matching
        booktypes = ''
        count = -1
        booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE'])
        for book_type in booktype_list:
            count += 1
            if count == 0:
                booktypes = book_type
            else:
                booktypes = booktypes + '|' + book_type
        match = matchString.replace(
            "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
                "\\$\\T\\i\\t\\l\\e",
                "(?P<title>.*?)") + '\.[' + booktypes + ']'
        title_pattern = re.compile(match, re.VERBOSE)
        match = matchString.replace(
            "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
                "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']'
        date_pattern = re.compile(match, re.VERBOSE)

        # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file
        # to utf-8 and fails (eg scandinavian characters in ascii 8bit)
        if isinstance(mag_path, unicode):
            try:
                mag_path = mag_path.encode('ASCII')
            except UnicodeEncodeError:
                logger.debug('Unicode error converting %s' % repr(mag_path))

        for dirname, dirnames, filenames in os.walk(mag_path):
            for fname in filenames[:]:
                # maybe not all magazines will be pdf?
                if is_valid_booktype(fname, booktype='mag'):
                    issuedate = ''
                    # noinspection PyBroadException
                    try:
                        match = title_pattern.match(fname)
                        if match:
                            issuedate = match.group("issuedate")
                            title = match.group("title")
                            match = True
                        else:
                            match = False
                    except Exception:
                        match = False

                    if not match:
                        try:
                            match = date_pattern.match(fname)
                            if match:
                                issuedate = match.group("issuedate")
                                title = os.path.basename(dirname)
                            else:
                                logger.debug("Pattern match failed for [%s]" %
                                             fname)
                                continue
                        except Exception as e:
                            logger.debug("Invalid name format for [%s] %s %s" %
                                         (fname, type(e).__name__, str(e)))
                            continue

                    logger.debug("Found %s Issue %s" % (title, fname))

                    issuefile = os.path.join(dirname,
                                             fname)  # full path to issue.pdf
                    mtime = os.path.getmtime(issuefile)
                    iss_acquired = datetime.date.isoformat(
                        datetime.date.fromtimestamp(mtime))

                    controlValueDict = {"Title": title}

                    # is this magazine already in the database?
                    mag_entry = myDB.match(
                        'SELECT LastAcquired, IssueDate, MagazineAdded from magazines WHERE Title=?',
                        (title, ))
                    if not mag_entry:
                        # need to add a new magazine to the database
                        newValueDict = {
                            "Reject": None,
                            "Status": "Active",
                            "MagazineAdded": None,
                            "LastAcquired": None,
                            "LatestCover": None,
                            "IssueDate": None,
                            "IssueStatus": "Skipped",
                            "Regex": None
                        }
                        logger.debug("Adding magazine %s" % title)
                        myDB.upsert("magazines", newValueDict,
                                    controlValueDict)
                        magissuedate = None
                        magazineadded = None
                    else:
                        maglastacquired = mag_entry['LastAcquired']
                        magissuedate = mag_entry['IssueDate']
                        magazineadded = mag_entry['MagazineAdded']
                        magissuedate = str(magissuedate).zfill(4)

                    issuedate = str(issuedate).zfill(
                        4)  # for sorting issue numbers

                    # is this issue already in the database?
                    controlValueDict = {"Title": title, "IssueDate": issuedate}
                    issue_id = create_id("%s %s" % (title, issuedate))
                    iss_entry = myDB.match(
                        'SELECT Title from issues WHERE Title=? and IssueDate=?',
                        (title, issuedate))
                    if not iss_entry:
                        newValueDict = {
                            "IssueAcquired": iss_acquired,
                            "IssueID": issue_id,
                            "IssueFile": issuefile
                        }
                        myDB.upsert("Issues", newValueDict, controlValueDict)
                        logger.debug("Adding issue %s %s" % (title, issuedate))

                    create_cover(issuefile)
                    lazylibrarian.postprocess.processMAGOPF(
                        issuefile, title, issuedate, issue_id)

                    # see if this issues date values are useful
                    controlValueDict = {"Title": title}
                    if not mag_entry:  # new magazine, this is the only issue
                        newValueDict = {
                            "MagazineAdded": iss_acquired,
                            "LastAcquired": iss_acquired,
                            "LatestCover":
                            os.path.splitext(issuefile)[0] + '.jpg',
                            "IssueDate": issuedate,
                            "IssueStatus": "Open"
                        }
                        myDB.upsert("magazines", newValueDict,
                                    controlValueDict)
                    else:
                        # Set magazine_issuedate to issuedate of most recent issue we have
                        # Set latestcover to most recent issue cover
                        # Set magazine_added to acquired date of earliest issue we have
                        # Set magazine_lastacquired to acquired date of most recent issue we have
                        # acquired dates are read from magazine file timestamps
                        newValueDict = {"IssueStatus": "Open"}
                        if not magazineadded or iss_acquired < magazineadded:
                            newValueDict["MagazineAdded"] = iss_acquired
                        if not maglastacquired or iss_acquired > maglastacquired:
                            newValueDict["LastAcquired"] = iss_acquired
                        if not magissuedate or issuedate >= magissuedate:
                            newValueDict["IssueDate"] = issuedate
                            newValueDict["LatestCover"] = os.path.splitext(
                                issuefile)[0] + '.jpg'
                        myDB.upsert("magazines", newValueDict,
                                    controlValueDict)

        magcount = myDB.match("select count(*) from magazines")
        isscount = myDB.match("select count(*) from issues")

        logger.info("Magazine scan complete, found %s magazine%s, %s issue%s" %
                    (magcount['count(*)'], plural(magcount['count(*)']),
                     isscount['count(*)'], plural(isscount['count(*)'])))
        lazylibrarian.MAG_UPDATE = 0

    except Exception:
        lazylibrarian.MAG_UPDATE = 0
        logger.error('Unhandled exception in magazineScan: %s' %
                     traceback.format_exc())
Exemple #46
0
def search_rss_book(books=None, library=None):
    """
    books is a list of new books to add, or None for backlog search
    library is "eBook" or "AudioBook" or None to search all book types
    """
    if not (lazylibrarian.USE_RSS()):
        logger.warn('RSS search is disabled')
        scheduleJob(action='Stop', target='search_rss_book')
        return
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if books is None:
                threading.currentThread().name = "SEARCHALLRSS"
            else:
                threading.currentThread().name = "SEARCHRSS"

        myDB = database.DBConnection()

        searchbooks = []
        if books is None:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus '
            cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") '
            cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc'
            results = myDB.select(cmd)
            for terms in results:
                searchbooks.append(terms)
        else:
            # The user has added a new book
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus '
                cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID'
                results = myDB.select(cmd, (book['bookid'], ))
                for terms in results:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            logger.debug("SearchRSS - No books to search for")
            return

        resultlist, nproviders = IterateOverRSSSites()
        if not nproviders:
            logger.warn('No rss providers are available')
            scheduleJob(action='Stop', target='search_rss_book')
            return  # No point in continuing

        logger.info('RSS Searching for %i book%s' %
                    (len(searchbooks), plural(len(searchbooks))))

        searchlist = []
        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            if library is None or library == 'eBook':
                if searchbook['Status'] == "Wanted":
                    cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="eBook" and Status="Snatched"'
                    snatched = myDB.match(cmd, (searchbook["BookID"], ))
                    if snatched:
                        logger.warn(
                            'eBook %s %s already marked snatched in wanted table'
                            %
                            (searchbook['AuthorName'], searchbook['BookName']))
                    else:
                        searchlist.append({
                            "bookid":
                            searchbook['BookID'],
                            "bookName":
                            searchbook['BookName'],
                            "bookSub":
                            searchbook['BookSub'],
                            "authorName":
                            searchbook['AuthorName'],
                            "library":
                            "eBook",
                            "searchterm":
                            searchterm
                        })

            if library is None or library == 'AudioBook':
                if searchbook['AudioStatus'] == "Wanted":
                    cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="AudioBook" and Status="Snatched"'
                    snatched = myDB.match(cmd, (searchbook["BookID"], ))
                    if snatched:
                        logger.warn(
                            'AudioBook %s %s already marked snatched in wanted table'
                            %
                            (searchbook['AuthorName'], searchbook['BookName']))
                    else:
                        searchlist.append({
                            "bookid":
                            searchbook['BookID'],
                            "bookName":
                            searchbook['BookName'],
                            "bookSub":
                            searchbook['BookSub'],
                            "authorName":
                            searchbook['AuthorName'],
                            "library":
                            "AudioBook",
                            "searchterm":
                            searchterm
                        })

        rss_count = 0
        for book in searchlist:
            if book['library'] == 'AudioBook':
                searchtype = 'audio'
            else:
                searchtype = 'book'
            found = processResultList(resultlist, book, searchtype, 'rss')

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:  # anything to shorten?
                searchtype = 'short' + searchtype
                found = processResultList(resultlist, book, searchtype, 'rss')

            if not found:
                logger.info("NZB Searches for %s %s returned no results." %
                            (book['library'], book['searchterm']))
            if found > 1:
                rss_count += 1

        logger.info("RSS Search for Wanted items complete, found %s book%s" %
                    (rss_count, plural(rss_count)))

    except Exception:
        logger.error('Unhandled exception in search_rss_book: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Exemple #47
0
def import_book(pp_path=None, bookID=None):

    # Separated this into a function so we can more easily import books from an alternate directory
    # and move them into LL folder structure given just the bookID, returns True or False
    # eg if import_book(source_directory, bookID):
    #         ppcount = ppcount + 1
    #
    myDB = database.DBConnection()
    data = myDB.select('SELECT * from books WHERE BookID="%s"' % bookID)
    if data:
        authorname = data[0]['AuthorName']
        bookname = data[0]['BookName']

        # try:
        #    auth_dir = os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode(lazylibrarian.SYS_ENCODING)
        #    os.chmod(auth_dir, 0777)
        # except Exception, e:
        #    logger.debug("Could not chmod author directory: " + str(auth_dir))

        if 'windows' in platform.system().lower(
        ) and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
            logger.warn('Please check your EBOOK_DEST_FOLDER setting')
            lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                '/', '\\')

        dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace(
            '$Author', authorname).replace('$Title', bookname)
        global_name = lazylibrarian.EBOOK_DEST_FILE.replace(
            '$Author', authorname).replace('$Title', bookname)
        global_name = common.remove_accents(global_name)
        # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
        dic = {
            '<': '',
            '>': '',
            '...': '',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': '',
            '*': '',
            ':': '',
            ';': '',
            '\'': ''
        }
        dest_path = formatter.latinToAscii(
            formatter.replace_all(dest_path, dic))
        dest_path = os.path.join(lazylibrarian.DESTINATION_DIR,
                                 dest_path).encode(lazylibrarian.SYS_ENCODING)

        processBook = processDestination(pp_path, dest_path, authorname,
                                         bookname, global_name)

        if processBook:
            # update nzbs
            controlValueDict = {"BookID": bookID}
            newValueDict = {
                "Status": "Processed",
                "NZBDate": formatter.now()
            }  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)
            processExtras(myDB, dest_path, global_name, data)
            logger.info('Successfully processed: %s' % global_name)
            return True
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' %
                         pp_path)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except:
                logger.debug("Unable to rename %s" % pp_path)
            return False
Exemple #48
0
def processDir(force=False, reset=False):
    # rename this thread
    threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError as why:
        logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror))
        return False

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if force is False and len(snatched) == 0:
        logger.info('Nothing marked as snatched. Stopping postprocessor job.')
        common.schedule_job(action='Stop', target='processDir')
    elif len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
    else:
        logger.debug("Checking %s downloads for %s snatched files" % (len(downloads), len(snatched)))
        ppcount = 0
        for book in snatched:
            found = False
            for fname in downloads:
                if not fname.endswith('.fail'):  # has this failed before?
                    # this is to get round differences in torrent filenames.
                    # Torrents aren't always returned with the name we searched for
                    # there might be a better way...
                    if isinstance(fname, str):
                        matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                    else:
                        matchname = fname
                    if ' LL.(' in matchname:
                        matchname = matchname.split(' LL.(')[0]
                    matchtitle = book['NZBtitle']
                    if ' LL.(' in matchtitle:
                        matchtitle = matchtitle.split(' LL.(')[0]
                    match = fuzz.token_set_ratio(matchtitle, matchname)
                    if match >= 95:
                        fname = matchname
                        if os.path.isfile(os.path.join(processpath, fname)):
                            # handle single file downloads here...
                            if formatter.is_valid_booktype(fname, booktype="book") \
                                or formatter.is_valid_booktype(fname, booktype="mag"):
                                dirname = os.path.join(processpath, os.path.splitext(fname)[0])
                                if not os.path.exists(dirname):
                                    try:
                                        os.makedirs(dirname)
                                    except OSError as why:
                                        logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror))
                                if os.path.exists(dirname):
                                    try:
                                        shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname))
                                        fname = os.path.splitext(fname)[0]
                                    except Exception as why:
                                        logger.debug("Failed to move file %s to %s, %s" % 
                                            (fname, dirname, str(why)))                                         
                        if os.path.isdir(os.path.join(processpath, fname)): 
                            pp_path = os.path.join(processpath, fname)
                            logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle']))
                            found = True
                            break
                    else:
                        logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle))
                else:
                    logger.debug('Skipping %s' % fname)
            if found:
                data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID'])
                if data:
                    authorname = data[0]['AuthorName']
                    bookname = data[0]['BookName']
                    if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                        logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                        lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

                    # Default destination path, should be allowed change per config file.
                    dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = common.remove_accents(global_name)
                    # dest_path = authorname+'/'+bookname
                    # global_name = bookname + ' - ' + authorname
                    # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic))
                    dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                        lazylibrarian.SYS_ENCODING)
                else:
                    data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                    if data:
                        # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                        # files are downloading, there will be an error in post-processing, trying to go to the
                        # same directory.
                        mostrecentissue = data[0]['IssueDate']  # keep for processing issues arriving out of order
                        # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                        mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic))
                        # book auxinfo is a cleaned date, eg 2015-01-01
                        dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                            '$IssueDate',
                            book['AuxInfo']).replace('$Title',
                                                     mag_name)
                        # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                        if lazylibrarian.MAG_RELATIVE:
                            if dest_path[0] not in '._':
                                dest_path = '_' + dest_path
                            dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                                lazylibrarian.SYS_ENCODING)
                        else:
                            dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                        authorname = None
                        bookname = None
                        global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                            '$Title', mag_name)
                        global_name = common.remove_accents(global_name)
                        # global_name = book['AuxInfo']+' - '+title
                    else:
                        logger.debug("Snatched magazine %s is not in download directory" % (book['BookID']))
                        continue
            else:
                logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
                continue

            # try:
            #    os.chmod(dest_path, 0777)
            # except Exception, e:
            #    logger.debug("Could not chmod post-process directory: " + str(dest_path))

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

            if processBook:
                logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
                # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
                controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Processed", "NZBDate": formatter.now()}  # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)

                if bookname is not None:  # it's a book, if None it's a magazine
                    processExtras(myDB, dest_path, global_name, data)
                else:
                    # update mags
                    controlValueDict = {"Title": book['BookID']}
                    if mostrecentissue > book['AuxInfo']:  # check this in case processing issues arriving out of order
                        newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"}
                    else:
                        newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(),
                                        "IssueStatus": "Open"}
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    # dest_path is where we put the magazine after processing, but we don't have the full filename
                    # so look for any "book" in that directory
                    dest_file = book_file(dest_path, booktype='mag')
                    controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                    newValueDict = {"IssueAcquired": formatter.today(),
                                    "IssueFile": dest_file,
                                    "IssueID": magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                    }
                    myDB.upsert("issues", newValueDict, controlValueDict)

                    # create a thumbnail cover for the new issue
                    magazinescan.create_cover(dest_file)

                logger.info('Successfully processed: %s' % global_name)
                ppcount = ppcount + 1
                notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now())
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s.fail' % pp_path)
                # at this point, as it failed we should move it or it will get postprocessed
                # again (and fail again)
                try:
                    os.rename(pp_path, pp_path + '.fail')
                except:
                    logger.debug("Unable to rename %s" % pp_path)

        downloads = os.listdir(processpath)  # check in case we processed/deleted some above
        for directory in downloads:
            if "LL.(" in directory and not directory.endswith('.fail'):
                bookID = str(directory).split("LL.(")[1].split(")")[0]
                logger.debug("Book with id: " + str(bookID) + " is in downloads")
                pp_path = os.path.join(processpath, directory)

                if os.path.isfile(pp_path):
                    pp_path = os.path.join(processpath)

                if (os.path.isdir(pp_path)):
                    logger.debug('Found LL folder %s.' % pp_path)
                if import_book(pp_path, bookID):
                    ppcount = ppcount + 1

        if ppcount:
            logger.info('%s books/mags have been processed.' % ppcount)
        else:
            logger.info('No snatched books/mags have been found')
    if reset:
        common.schedule_job(action='Restart', target='processDir')
def SABnzbd(title=None, nzburl=None, remove_data=False):

    if nzburl in ['delete', 'delhistory'] and title == 'unknown':
        res = '%s function unavailable in this version of sabnzbd, no nzo_ids' % nzburl
        logger.debug(res)
        return False, res

    hostname = lazylibrarian.CONFIG['SAB_HOST']
    port = check_int(lazylibrarian.CONFIG['SAB_PORT'], 0)
    if not hostname or not port:
        res = 'Invalid sabnzbd host or port, check your config'
        logger.error(res)
        return False, res

    if hostname.endswith('/'):
        hostname = hostname[:-1]
    if not hostname.startswith("http://") and not hostname.startswith(
            "https://"):
        hostname = 'http://' + hostname

    HOST = "%s:%s" % (hostname, port)

    if lazylibrarian.CONFIG['SAB_SUBDIR']:
        HOST = HOST + "/" + lazylibrarian.CONFIG['SAB_SUBDIR']

    params = {}
    if nzburl == 'auth' or nzburl == 'get_cats':
        # connection test, check auth mode or get_cats
        params['mode'] = nzburl
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(%s)' % nzburl
    elif nzburl == 'queue':
        params['mode'] = 'queue'
        params['limit'] = '100'
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(Queue)'
    elif nzburl == 'history':
        params['mode'] = 'history'
        params['limit'] = '100'
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(History)'
    elif nzburl == 'delete':
        # only deletes tasks if still in the queue, ie NOT completed tasks
        params['mode'] = 'queue'
        params['output'] = 'json'
        params['name'] = nzburl
        params['value'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if remove_data:
            params['del_files'] = 1
        title = 'LL.(Delete) ' + title
    elif nzburl == 'delhistory':
        params['mode'] = 'history'
        params['output'] = 'json'
        params['name'] = 'delete'
        params['value'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if remove_data:
            params['del_files'] = 1
        title = 'LL.(DelHistory) ' + title
    else:
        params['mode'] = 'addurl'
        params['output'] = 'json'
        if nzburl:
            params['name'] = nzburl
        if title:
            params['nzbname'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if lazylibrarian.CONFIG['SAB_CAT']:
            params['cat'] = lazylibrarian.CONFIG['SAB_CAT']
        if lazylibrarian.CONFIG['USENET_RETENTION']:
            params["maxage"] = lazylibrarian.CONFIG['USENET_RETENTION']

# FUTURE-CODE
#    if lazylibrarian.SAB_PRIO:
#        params["priority"] = lazylibrarian.SAB_PRIO
#    if lazylibrarian.SAB_PP:
#        params["script"] = lazylibrarian.SAB_SCRIPT

    URL = HOST + "/api?" + urlencode(params)

    # to debug because of api
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug('Request url for <a href="%s">SABnzbd</a>' % URL)
    proxies = proxyList()
    try:
        timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
        r = requests.get(URL, timeout=timeout, proxies=proxies)
        result = r.json()
    except requests.exceptions.Timeout:
        res = "Timeout connecting to SAB with URL: %s" % URL
        logger.error(res)
        return False, res
    except Exception as e:
        if hasattr(e, 'reason'):
            errmsg = e.reason
        elif hasattr(e, 'strerror'):
            errmsg = e.strerror
        else:
            errmsg = str(e)

        res = "Unable to connect to SAB with URL: %s, %s" % (URL, errmsg)
        logger.error(res)
        return False, res
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug("Result text from SAB: " + str(result))

    if title:
        title = unaccented_str(title)
        if title.startswith('LL.('):
            return result, ''
    if result['status'] is True:
        logger.info("%s sent to SAB successfully." % title)
        # sab versions earlier than 0.8.0 don't return nzo_ids
        if 'nzo_ids' in result:
            if result['nzo_ids']:  # check its not empty
                return result['nzo_ids'][0], ''
        return 'unknown'
    elif result['status'] is False:
        res = "SAB returned Error: %s" % result['error']
        logger.error(res)
        return False, res
    else:
        res = "Unknown error: %s" % str(result)
        logger.error(res)
        return False, res
def update():
    if lazylibrarian.CONFIG['INSTALL_TYPE'] == 'win':
        logmsg('info', 'Windows .exe updating not supported yet.')
        return False
    elif lazylibrarian.CONFIG['INSTALL_TYPE'] == 'package':
        logmsg('info', 'Please use your package manager to update')
        return False

    elif lazylibrarian.CONFIG['INSTALL_TYPE'] == 'git':
        branch = getCurrentGitBranch()

        _, _ = runGit('stash clear')
        output, err = runGit('pull origin ' + branch)

        if not output:
            logmsg('error', 'Couldn\'t download latest version')
            return False

        for line in output.split('\n'):
            if 'Already up-to-date.' in line:
                logmsg('info', 'No update available: ' + str(output))
                return False
            elif 'Aborting' in line or 'local changes' in line:
                logmsg('error', 'Unable to update: ' + str(output))
                return False

        # Update version.txt and timestamp
        updateVersionFile(lazylibrarian.CONFIG['LATEST_VERSION'])
        lazylibrarian.CONFIG['GIT_UPDATED'] = str(int(time.time()))
        return True

    elif lazylibrarian.CONFIG['INSTALL_TYPE'] == 'source':
        if 'gitlab' in lazylibrarian.CONFIG['GIT_HOST']:
            tar_download_url = 'https://%s/%s/%s/-/archive/%s/%s-%s.tar.gz' % (
                lazylibrarian.GITLAB_TOKEN, lazylibrarian.CONFIG['GIT_USER'],
                lazylibrarian.CONFIG['GIT_REPO'],
                lazylibrarian.CONFIG['GIT_BRANCH'],
                lazylibrarian.CONFIG['GIT_REPO'],
                lazylibrarian.CONFIG['GIT_BRANCH'])
        else:
            tar_download_url = 'https://%s/%s/%s/tarball/%s' % (
                lazylibrarian.CONFIG['GIT_HOST'],
                lazylibrarian.CONFIG['GIT_USER'],
                lazylibrarian.CONFIG['GIT_REPO'],
                lazylibrarian.CONFIG['GIT_BRANCH'])
        update_dir = os.path.join(lazylibrarian.PROG_DIR, 'update')

        try:
            logmsg('info', 'Downloading update from: ' + tar_download_url)
            headers = {'User-Agent': getUserAgent()}
            proxies = proxyList()
            timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
            r = requests.get(tar_download_url,
                             timeout=timeout,
                             headers=headers,
                             proxies=proxies)
        except requests.exceptions.Timeout:
            logmsg('error',
                   "Timeout retrieving new version from " + tar_download_url)
            return False
        except Exception as e:
            if hasattr(e, 'reason'):
                errmsg = e.reason
            else:
                errmsg = str(e)
            logmsg(
                'error', "Unable to retrieve new version from " +
                tar_download_url + ", can't update: %s" % errmsg)
            return False

        download_name = r.url.split('/')[-1]

        tar_download_path = os.path.join(lazylibrarian.PROG_DIR, download_name)

        # Save tar to disk
        with open(tar_download_path, 'wb') as f:
            f.write(r.content)

        # Extract the tar to update folder
        logmsg('info', 'Extracting file ' + tar_download_path)
        try:
            with tarfile.open(tar_download_path) as tar:
                tar.extractall(update_dir)
        except Exception as e:
            logger.error('Failed to unpack tarfile %s (%s): %s' %
                         (type(e).__name__, tar_download_path, str(e)))
            return False

        # Delete the tar.gz
        logmsg('info', 'Deleting file ' + tar_download_path)
        os.remove(tar_download_path)

        # Find update dir name
        update_dir_contents = [
            x for x in os.listdir(update_dir)
            if os.path.isdir(os.path.join(update_dir, x))
        ]
        if len(update_dir_contents) != 1:
            logmsg(
                'error', "Invalid update data, update failed: " +
                str(update_dir_contents))
            return False
        content_dir = os.path.join(update_dir, update_dir_contents[0])

        # walk temp folder and move files to main folder
        for rootdir, dirnames, filenames in os.walk(content_dir):
            rootdir = rootdir[len(content_dir) + 1:]
            for curfile in filenames:
                old_path = os.path.join(content_dir, rootdir, curfile)
                new_path = os.path.join(lazylibrarian.PROG_DIR, rootdir,
                                        curfile)

                if os.path.isfile(new_path):
                    os.remove(new_path)
                os.renames(old_path, new_path)

        # Update version.txt and timestamp
        updateVersionFile(lazylibrarian.CONFIG['LATEST_VERSION'])
        lazylibrarian.CONFIG['GIT_UPDATED'] = str(int(time.time()))
        return True

    else:
        logmsg('error', "Cannot perform update - Install Type not set")
        return False
Exemple #51
0
    def find_results(self, searchterm=None, queue=None):
        """ GoogleBooks performs much better if we search for author OR title
            not both at once, so if searchterm is not isbn, two searches needed.
            Lazylibrarian searches use <ll> to separate title from author in searchterm
            If this token isn't present, it's an isbn or searchterm as supplied by user
        """
        try:
            myDB = database.DBConnection()
            resultlist = []
            # See if we should check ISBN field, otherwise ignore it
            api_strings = ['inauthor:', 'intitle:']
            if is_valid_isbn(searchterm):
                api_strings = ['isbn:']

            api_hits = 0

            ignored = 0
            total_count = 0
            no_author_count = 0
            title = ''
            authorname = ''

            if ' <ll> ' in searchterm:  # special token separates title from author
                title, authorname = searchterm.split(' <ll> ')

            fullterm = searchterm.replace(' <ll> ', ' ')
            logger.debug('Now searching Google Books API with searchterm: %s' %
                         fullterm)

            for api_value in api_strings:
                set_url = self.url
                if api_value == "isbn:":
                    set_url = set_url + quote(api_value + searchterm)
                elif api_value == 'intitle:':
                    searchterm = fullterm
                    if title:  # just search for title
                        # noinspection PyUnresolvedReferences
                        title = title.split(' (')[0]  # without any series info
                        searchterm = title
                    searchterm = searchterm.replace("'", "").replace(
                        '"', '').strip()  # and no quotes
                    if PY2:
                        searchterm = searchterm.encode(
                            lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote(api_value + '"' + searchterm +
                                              '"')
                elif api_value == 'inauthor:':
                    searchterm = fullterm
                    if authorname:
                        searchterm = authorname  # just search for author
                    searchterm = searchterm.strip()
                    if PY2:
                        searchterm = searchterm.encode(
                            lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote_plus(api_value + '"' +
                                                   searchterm + '"')

                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0
                try:
                    while startindex < number_results:

                        self.params['startIndex'] = startindex
                        URL = set_url + '&' + urlencode(self.params)

                        try:
                            jsonresults, in_cache = gb_json_request(URL)
                            if jsonresults is None:
                                number_results = 0
                            else:
                                if not in_cache:
                                    api_hits += 1
                                number_results = jsonresults['totalItems']
                                logger.debug('Searching url: ' + URL)
                            if number_results == 0:
                                logger.warn(
                                    'Found no results for %s with value: %s' %
                                    (api_value, searchterm))
                                break
                            else:
                                pass
                        except Exception as err:
                            if hasattr(err, 'reason'):
                                errmsg = err.reason
                            else:
                                errmsg = str(err)
                            logger.warn(
                                'Google Books API Error [%s]: Check your API key or wait a while'
                                % errmsg)
                            break

                        startindex += 40

                        for item in jsonresults['items']:
                            total_count += 1

                            book = bookdict(item)
                            if not book['author']:
                                logger.debug(
                                    'Skipped a result without authorfield.')
                                no_author_count += 1
                                continue

                            if not book['name']:
                                logger.debug('Skipped a result without title.')
                                continue

                            valid_langs = getList(
                                lazylibrarian.CONFIG['IMP_PREFLANG'])
                            if "All" not in valid_langs:  # don't care about languages, accept all
                                try:
                                    # skip if language is not in valid list -
                                    booklang = book['lang']
                                    if booklang not in valid_langs:
                                        logger.debug(
                                            'Skipped %s with language %s' %
                                            (book['name'], booklang))
                                        ignored += 1
                                        continue
                                except KeyError:
                                    ignored += 1
                                    logger.debug(
                                        'Skipped %s where no language is found'
                                        % book['name'])
                                    continue

                            if authorname:
                                author_fuzz = fuzz.ratio(
                                    book['author'], authorname)
                            else:
                                author_fuzz = fuzz.ratio(
                                    book['author'], fullterm)

                            if title:
                                book_fuzz = fuzz.token_set_ratio(
                                    book['name'], title)
                                # lose a point for each extra word in the fuzzy matches so we get the closest match
                                words = len(getList(book['name']))
                                words -= len(getList(title))
                                book_fuzz -= abs(words)
                            else:
                                book_fuzz = fuzz.token_set_ratio(
                                    book['name'], fullterm)

                            isbn_fuzz = 0
                            if is_valid_isbn(fullterm):
                                isbn_fuzz = 100

                            highest_fuzz = max((author_fuzz + book_fuzz) / 2,
                                               isbn_fuzz)

                            dic = {':': '.', '"': '', '\'': ''}
                            bookname = replace_all(book['name'], dic)

                            bookname = unaccented(bookname)
                            bookname = bookname.strip()  # strip whitespace

                            AuthorID = ''
                            if book['author']:
                                match = myDB.match(
                                    'SELECT AuthorID FROM authors WHERE AuthorName=?',
                                    (book['author'].replace('"', '""'), ))
                                if match:
                                    AuthorID = match['AuthorID']

                            resultlist.append({
                                'authorname':
                                book['author'],
                                'authorid':
                                AuthorID,
                                'bookid':
                                item['id'],
                                'bookname':
                                bookname,
                                'booksub':
                                book['sub'],
                                'bookisbn':
                                book['isbn'],
                                'bookpub':
                                book['pub'],
                                'bookdate':
                                book['date'],
                                'booklang':
                                book['lang'],
                                'booklink':
                                book['link'],
                                'bookrate':
                                float(book['rate']),
                                'bookrate_count':
                                book['rate_count'],
                                'bookimg':
                                book['img'],
                                'bookpages':
                                book['pages'],
                                'bookgenre':
                                book['genre'],
                                'bookdesc':
                                book['desc'],
                                'author_fuzz':
                                author_fuzz,
                                'book_fuzz':
                                book_fuzz,
                                'isbn_fuzz':
                                isbn_fuzz,
                                'highest_fuzz':
                                highest_fuzz,
                                'num_reviews':
                                book['ratings']
                            })

                            resultcount += 1

                except KeyError:
                    break

                logger.debug(
                    "Returning %s result%s for (%s) with keyword: %s" %
                    (resultcount, plural(resultcount), api_value, searchterm))

            logger.debug("Found %s result%s" %
                         (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" %
                         (ignored, plural(ignored)))
            logger.debug("Removed %s book%s with no author" %
                         (no_author_count, plural(no_author_count)))
            logger.debug(
                'The Google Books API was hit %s time%s for searchterm: %s' %
                (api_hits, plural(api_hits), fullterm))
            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GB.find_results: %s' %
                         traceback.format_exc())
Exemple #52
0
def grsync(status, shelf):
    # noinspection PyBroadException
    try:
        shelf = shelf.lower()
        logger.info('Syncing %s to %s shelf' % (status, shelf))
        myDB = database.DBConnection()
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        GA = grauth()
        GR = None
        shelves = GA.get_shelf_list()
        found = False
        for item in shelves:  # type: dict
            if item['name'] == shelf:
                found = True
                break
        if not found:
            res, msg = GA.create_shelf(shelf=shelf)
            if not res:
                logger.debug("Unable to create shelf %s: %s" % (shelf, msg))
                return 0, 0
            else:
                logger.debug("Created new goodreads shelf: %s" % shelf)

        gr_shelf = GA.get_gr_shelf_contents(shelf=shelf)
        dstatus = status
        if dstatus == "Open":
            dstatus += "/Have"

        logger.info("There are %s %s books, %s books on goodreads %s shelf" %
                    (len(ll_list), dstatus, len(gr_shelf), shelf))

        # Sync method for WANTED:
        # Get results of last_sync (if any)
        # For each book in last_sync
        #    if not in ll_list, new deletion, remove from gr_shelf
        #    if not in gr_shelf, new deletion, remove from ll_list, mark Skipped
        # For each book in ll_list
        #    if not in last_sync, new addition, add to gr_shelf
        # For each book in gr_shelf
        #    if not in last sync, new addition, add to ll_list, mark Wanted
        #
        # save ll WANTED as last_sync

        # For HAVE/OPEN method is the same, but only change status if HAVE, not OPEN

        cmd = 'select SyncList from sync where UserID="%s" and Label="%s"' % ("goodreads", shelf)
        res = myDB.match(cmd)
        last_sync = []
        shelf_changed = 0
        ll_changed = 0
        if res:
            last_sync = getList(res['SyncList'])

        added_to_shelf = list(set(gr_shelf) - set(last_sync))
        removed_from_shelf = list(set(last_sync) - set(gr_shelf))
        added_to_ll = list(set(ll_list) - set(last_sync))
        removed_from_ll = list(set(last_sync) - set(ll_list))

        logger.info("%s missing from lazylibrarian %s" % (len(removed_from_ll), shelf))
        for book in removed_from_ll:
            # first the deletions since last sync...
            try:
                res, content = GA.BookToList(book, shelf, action='remove')
            except Exception as e:
                logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s removed from %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content))

        logger.info("%s missing from goodreads %s" % (len(removed_from_shelf), shelf))
        for book in removed_from_shelf:
            # deleted from goodreads
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if res['Status'] in ['Have', 'Wanted']:
                    myDB.action('UPDATE books SET Status="Skipped" WHERE BookID=?', (book,))
                    ll_changed += 1
                    logger.debug("%10s set to Skipped" % book)
                else:
                    logger.warn("Not removing %s, book is marked %s" % (book, res['Status']))

        # new additions to lazylibrarian
        logger.info("%s new in lazylibrarian %s" % (len(added_to_ll), shelf))
        for book in added_to_ll:
            try:
                res, content = GA.BookToList(book, shelf, action='add')
            except Exception as e:
                logger.error("Error adding %s to %s: %s %s" % (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s added to %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to add %s to %s shelf: %s" % (book, shelf, content))

        # new additions to goodreads shelf
        logger.info("%s new in goodreads %s" % (len(added_to_shelf), shelf))
        for book in added_to_shelf:
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if status == 'Open':
                    if res['Status'] == 'Open':
                        logger.warn("Book %s is already marked Open" % book)
                    else:
                        myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,))
                        ll_changed += 1
                        logger.debug("%10s set to Have" % book)
                elif status == 'Wanted':
                    # if in "wanted" and already marked "Open/Have", optionally delete from "wanted"
                    # (depending on user prefs, to-read and wanted might not be the same thing)
                    if lazylibrarian.CONFIG['GR_UNIQUE'] and res['Status'] in ['Open', 'Have']:
                        try:
                            res, content = GA.BookToList(book, shelf, action='remove')
                        except Exception as e:
                            logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e)))
                            res = None
                            content = ''
                        if res:
                            logger.debug("%10s removed from %s shelf" % (book, shelf))
                            shelf_changed += 1
                        else:
                            logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content))
                    elif res['Status'] != 'Open':
                        myDB.action('UPDATE books SET Status="Wanted" WHERE BookID=?', (book,))
                        ll_changed += 1
                        logger.debug("%10s set to Wanted" % book)
                    else:
                        logger.warn("Not setting %s as Wanted, already marked Open" % book)

        # get new definitive list from ll
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        # store as comparison for next sync
        controlValueDict = {"UserID": "goodreads", "Label": shelf}
        newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(ll_list)}
        myDB.upsert("sync", newValueDict, controlValueDict)

        logger.debug('Sync %s to %s shelf complete' % (status, shelf))
        return shelf_changed, ll_changed

    except Exception:
        logger.error('Unhandled exception in grsync: %s' % traceback.format_exc())
        return 0, 0
Exemple #53
0
def search_rss_book(books=None, library=None):
    """
    books is a list of new books to add, or None for backlog search
    library is "eBook" or "AudioBook" or None to search all book types
    """
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if books is None:
                threading.currentThread().name = "SEARCHALLRSS"
            else:
                threading.currentThread().name = "SEARCHRSS"

        if not (lazylibrarian.USE_RSS()):
            logger.warn('RSS search is disabled')
            scheduleJob(action='Stop', target='search_rss_book')
            return

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
        else:
            # for each item in resultlist, add to database if necessary, and mark as wanted
            logger.debug('Processing %s item%s in wishlists' %
                         (len(resultlist), plural(len(resultlist))))
            for book in resultlist:
                # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
                # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks
                # not sure if anyone would use a goodreads wishlist if not using goodreads interface...
                if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book[
                        'rss_bookid']:
                    bookmatch = myDB.match(
                        'select Status,BookName from books where bookid=?',
                        (book['rss_bookid'], ))
                    if bookmatch:
                        bookstatus = bookmatch['Status']
                        bookname = bookmatch['BookName']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            logger.info(
                                'Found book %s, already marked as "%s"' %
                                (bookname, bookstatus))
                        else:  # skipped/ignored
                            logger.info('Found book %s, marking as "Wanted"' %
                                        bookname)
                            controlValueDict = {"BookID": book['rss_bookid']}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            new_books += 1
                    else:
                        import_book(book['rss_bookid'])
                        new_books += 1
                else:
                    item = {}
                    results = None
                    item['Title'] = book['rss_title']
                    if book['rss_bookid']:
                        item['BookID'] = book['rss_bookid']
                    if book['rss_isbn']:
                        item['ISBN'] = book['rss_isbn']
                    bookmatch = finditem(item, book['rss_author'])
                    if bookmatch:  # it's already in the database
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        bookstatus = bookmatch['Status']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            logger.info(
                                'Found book %s by %s, already marked as "%s"' %
                                (bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(
                                'Found book %s by %s, marking as "Wanted"' %
                                (bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            new_books += 1
                    else:  # not in database yet
                        if book['rss_isbn']:
                            results = search_for(book['rss_isbn'])
                        if results:
                            result = results[0]  # type: dict
                            if result['isbn_fuzz'] > check_int(
                                    lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                                logger.info(
                                    "Found (%s%%) %s: %s" %
                                    (result['isbn_fuzz'], result['authorname'],
                                     result['bookname']))
                                import_book(result['bookid'])
                                new_books += 1
                                bookmatch = True
                        if not results:
                            searchterm = "%s <ll> %s" % (
                                item['Title'],
                                formatAuthorName(book['rss_author']))
                            results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]  # type: dict
                            if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                    and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                                logger.info(
                                    "Found (%s%% %s%%) %s: %s" %
                                    (result['author_fuzz'],
                                     result['book_fuzz'], result['authorname'],
                                     result['bookname']))
                                import_book(result['bookid'])
                                new_books += 1
                                bookmatch = True

                        if not bookmatch:
                            msg = "Skipping book %s by %s" % (
                                item['Title'], book['rss_author'])
                            if not results:
                                msg += ', No results returned'
                                logger.warn(msg)
                            else:
                                msg += ', No match found'
                                logger.warn(msg)
                                result = results[0]  # type: dict
                                msg = "Closest match (%s%% %s%%) %s: %s" % (
                                    result['author_fuzz'], result['book_fuzz'],
                                    result['authorname'], result['bookname'])
                            logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" %
                        (new_books, plural(new_books)))

        searchbooks = []
        if books is None:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus '
            cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") '
            cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc'
            results = myDB.select(cmd)
            for terms in results:
                searchbooks.append(terms)
        else:
            # The user has added a new book
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus '
                cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID'
                results = myDB.select(cmd, (book['bookid'], ))
                for terms in results:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            logger.debug("SearchRSS - No books to search for")
            return

        resultlist, nproviders = IterateOverRSSSites()
        if not nproviders and not wishproviders:
            logger.warn('No rss providers are available')
            return  # No point in continuing

        logger.info('RSS Searching for %i book%s' %
                    (len(searchbooks), plural(len(searchbooks))))

        searchlist = []
        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            if library is None or library == 'eBook':
                if searchbook['Status'] == "Wanted":
                    cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="eBook" and Status="Snatched"'
                    snatched = myDB.match(cmd, (searchbook["BookID"], ))
                    if snatched:
                        logger.warn(
                            'eBook %s %s already marked snatched in wanted table'
                            %
                            (searchbook['AuthorName'], searchbook['BookName']))
                    else:
                        searchlist.append({
                            "bookid":
                            searchbook['BookID'],
                            "bookName":
                            searchbook['BookName'],
                            "bookSub":
                            searchbook['BookSub'],
                            "authorName":
                            searchbook['AuthorName'],
                            "library":
                            "eBook",
                            "searchterm":
                            searchterm
                        })

            if library is None or library == 'AudioBook':
                if searchbook['AudioStatus'] == "Wanted":
                    cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="AudioBook" and Status="Snatched"'
                    snatched = myDB.match(cmd, (searchbook["BookID"], ))
                    if snatched:
                        logger.warn(
                            'AudioBook %s %s already marked snatched in wanted table'
                            %
                            (searchbook['AuthorName'], searchbook['BookName']))
                    else:
                        searchlist.append({
                            "bookid":
                            searchbook['BookID'],
                            "bookName":
                            searchbook['BookName'],
                            "bookSub":
                            searchbook['BookSub'],
                            "authorName":
                            searchbook['AuthorName'],
                            "library":
                            "AudioBook",
                            "searchterm":
                            searchterm
                        })

        rss_count = 0
        for book in searchlist:
            if book['library'] == 'AudioBook':
                searchtype = 'audio'
            else:
                searchtype = 'book'
            found = processResultList(resultlist, book, searchtype, 'rss')

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:  # anything to shorten?
                searchtype = 'short' + searchtype
                found = processResultList(resultlist, book, searchtype, 'rss')

            if not found:
                logger.info("NZB Searches for %s %s returned no results." %
                            (book['library'], book['searchterm']))
            if found > True:
                rss_count += 1

        logger.info("RSS Search for Wanted items complete, found %s book%s" %
                    (rss_count, plural(rss_count)))

    except Exception:
        logger.error('Unhandled exception in search_rss_book: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Exemple #54
0
    def get_author_books(self,
                         authorid=None,
                         authorname=None,
                         bookstatus="Skipped",
                         audiostatus="Skipped",
                         entrystatus='Active',
                         refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' %
                         authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote(
                'inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            locked_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = gb_json_request(
                            URL, useCache=not refresh)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn(
                            'Google Books API Error [%s]: Check your API key or wait a while'
                            % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' %
                                     (number_results, plural(number_results),
                                      authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1
                        book = bookdict(item)
                        # skip if no author, no author is no book.
                        if not book['author']:
                            logger.debug(
                                'Skipped a result without authorfield.')
                            continue

                        isbnhead = ""
                        if len(book['isbn']) == 10:
                            isbnhead = book['isbn'][0:3]
                        elif len(book['isbn']) == 13:
                            isbnhead = book['isbn'][3:6]

                        booklang = book['lang']
                        # do we care about language?
                        if "All" not in valid_langs:
                            if book['isbn']:
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match(
                                        'SELECT lang FROM languages where isbn=?',
                                        (isbnhead, ))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug(
                                            "Found cached language [%s] for [%s]"
                                            % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(
                                                    book['isbn']
                                            ) == 13 and book[
                                                    'isbn'].startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(
                                                            lang):
                                                        booklang = lazylibrarian.isbn_979_dict[
                                                            lang]
                                                        logger.debug(
                                                            "ISBN979 returned %s for %s"
                                                            % (booklang,
                                                               isbnhead))
                                                        match = True
                                                        break
                                            elif (len(book['isbn']) == 10) or \
                                                    (len(book['isbn']) == 13 and book['isbn'].startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(
                                                            lang):
                                                        booklang = lazylibrarian.isbn_978_dict[
                                                            lang]
                                                        logger.debug(
                                                            "ISBN979 returned %s for %s"
                                                            % (booklang,
                                                               isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                myDB.action(
                                                    'insert into languages values (?, ?)',
                                                    (isbnhead, booklang))

                                    if not match:
                                        booklang = thingLang(book['isbn'])
                                        lt_lang_hits += 1
                                        if booklang:
                                            match = True
                                            myDB.action(
                                                'insert into languages values (?, ?)',
                                                (isbnhead, booklang))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in [
                                                "en-US", "en-GB", "eng"
                                        ]:
                                            # these are all english, may need to expand this list
                                            logger.debug(
                                                "%s Google thinks [%s], we think [%s]"
                                                % (book['name'], googlelang,
                                                   booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                logger.debug('Skipped [%s] with language %s' %
                                             (book['name'], booklang))
                                ignored += 1
                                continue

                        ignorable = ['future', 'date', 'isbn']
                        if lazylibrarian.CONFIG['NO_LANG']:
                            ignorable.append('lang')
                        rejected = None
                        check_status = False
                        existing_book = None
                        bookname = book['name']
                        bookid = item['id']
                        if not bookname:
                            logger.debug(
                                'Rejecting bookid %s for %s, no bookname' %
                                (bookid, authorname))
                            rejected = 'name', 'No bookname'
                        else:
                            bookname = replace_all(unaccented(bookname), {
                                ':': '.',
                                '"': '',
                                '\'': ''
                            }).strip()
                            if re.match(
                                    '[^\w-]', bookname
                            ):  # remove books with bad characters in title
                                logger.debug(
                                    "[%s] removed book for bad characters" %
                                    bookname)
                                rejected = 'chars', 'Bad characters in bookname'

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if book['date'] > today()[:len(book['date'])]:
                                logger.debug(
                                    'Rejecting %s, future publication date %s'
                                    % (bookname, book['date']))
                                rejected = 'future', 'Future publication date [%s]' % book[
                                    'date']

                        if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']:
                            if not book['date']:
                                logger.debug(
                                    'Rejecting %s, no publication date' %
                                    bookname)
                                rejected = 'date', 'No publication date'

                        if not rejected and lazylibrarian.CONFIG['NO_ISBN']:
                            if not isbnhead:
                                logger.debug('Rejecting %s, no isbn' %
                                             bookname)
                                rejected = 'isbn', 'No ISBN'

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname.replace(
                                '"', '""'), authorname.replace('"', '""')))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got %s'
                                        % (match['BookID'], authorname,
                                           bookname, bookid))
                                    rejected = 'bookid', 'Got under different bookid %s' % bookid
                                    duplicates += 1

                        cmd = 'SELECT AuthorName,BookName,AudioStatus,books.Status FROM books,authors'
                        cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                        match = myDB.match(cmd, (bookid, ))
                        if match:  # we have a book with this bookid already
                            if bookname != match[
                                    'BookName'] or authorname != match[
                                        'AuthorName']:
                                logger.debug(
                                    'Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]'
                                    % (bookid, authorname, bookname,
                                       match['AuthorName'], match['BookName']))
                            else:
                                logger.debug(
                                    'Rejecting bookid %s for [%s][%s] already got this book in database'
                                    % (bookid, authorname, bookname))
                                check_status = True
                            duplicates += 1
                            rejected = 'got', 'Already got this book in database'

                            # Make sure we don't reject books we have got
                            if match['Status'] in [
                                    'Open', 'Have'
                            ] or match['AudioStatus'] in ['Open', 'Have']:
                                rejected = None

                        if rejected and rejected[0] not in ignorable:
                            removedResults += 1
                        if check_status or rejected is None or (
                                lazylibrarian.CONFIG['IMP_IGNORE']
                                and rejected[0] in ignorable):  # dates, isbn

                            cmd = 'SELECT Status,AudioStatus,BookFile,AudioFile,Manual,BookAdded,BookName '
                            cmd += 'FROM books WHERE BookID=?'
                            existing = myDB.match(cmd, (bookid, ))
                            if existing:
                                book_status = existing['Status']
                                audio_status = existing['AudioStatus']
                                if lazylibrarian.CONFIG[
                                        'FOUND_STATUS'] == 'Open':
                                    if book_status == 'Have' and existing[
                                            'BookFile']:
                                        book_status = 'Open'
                                    if audio_status == 'Have' and existing[
                                            'AudioFile']:
                                        audio_status = 'Open'
                                locked = existing['Manual']
                                added = existing['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                book_status = bookstatus  # new_book status, or new_author status
                                audio_status = audiostatus
                                added = today()
                                locked = False

                            if rejected:
                                reason = rejected[1]
                                if rejected[0] in ignorable:
                                    book_status = 'Ignored'
                                    audio_status = 'Ignored'
                                    book_ignore_count += 1
                            else:
                                reason = ''

                            if locked:
                                locked_count += 1
                            else:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": book['sub'],
                                    "BookDesc": book['desc'],
                                    "BookIsbn": book['isbn'],
                                    "BookPub": book['pub'],
                                    "BookGenre": book['genre'],
                                    "BookImg": book['img'],
                                    "BookLink": book['link'],
                                    "BookRate": float(book['rate']),
                                    "BookPages": book['pages'],
                                    "BookDate": book['date'],
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": audio_status,
                                    "BookAdded": added,
                                    "WorkID": '',
                                    "ScanResult": reason
                                }

                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                                logger.debug("Book found: " + bookname + " " +
                                             book['date'])
                                if 'nocover' in book[
                                        'img'] or 'nophoto' in book['img']:
                                    # try to get a cover from another source
                                    workcover, source = getBookCover(bookid)
                                    if workcover:
                                        logger.debug(
                                            'Updated cover for %s using %s' %
                                            (bookname, source))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict,
                                                    controlValueDict)

                                elif book['img'] and book['img'].startswith(
                                        'http'):
                                    link, success, _ = cache_img(
                                        "book",
                                        bookid,
                                        book['img'],
                                        refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict,
                                                    controlValueDict)
                                    else:
                                        logger.debug(
                                            'Failed to cache image for %s' %
                                            book['img'])

                                serieslist = []
                                if book['series']:
                                    serieslist = [
                                        ('', book['seriesNum'],
                                         cleanName(unaccented(book['series']),
                                                   '&/'))
                                    ]
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    newserieslist = getWorkSeries(bookid)
                                    if newserieslist:
                                        serieslist = newserieslist
                                        logger.debug(
                                            'Updated series: %s [%s]' %
                                            (bookid, serieslist))
                                    setSeries(serieslist, bookid)

                                new_status = setStatus(bookid, serieslist,
                                                       bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)

                                if not existing_book:
                                    logger.debug(
                                        "[%s] Added book: %s [%s] status %s" %
                                        (authorname, bookname, booklang,
                                         book_status))
                                    added_count += 1
                                else:
                                    logger.debug(
                                        "[%s] Updated book: %s [%s] status %s"
                                        % (authorname, bookname, booklang,
                                           book_status))
                                    updated_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug(
                '[%s] The Google Books API was hit %s time%s to populate book list'
                % (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg, BookID from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid, ))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookid = lastbook['BookID']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookid = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookID": lastbookid,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)
            resultcount = added_count + updated_count
            logger.debug("Found %s total book%s for author" %
                         (total_count, plural(total_count)))
            logger.debug("Found %s locked book%s" %
                         (locked_count, plural(locked_count)))
            logger.debug("Removed %s unwanted language result%s" %
                         (ignored, plural(ignored)))
            logger.debug("Removed %s incorrect/incomplete result%s" %
                         (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s" %
                         (duplicates, plural(duplicates)))
            logger.debug("Ignored %s book%s" %
                         (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" %
                         (resultcount, plural(resultcount)))

            myDB.action(
                'insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                (authorname.replace('"', '""'), api_hits, gr_lang_hits,
                 lt_lang_hits, gb_lang_change, cache_hits, ignored,
                 removedResults, not_cached, duplicates))

            if refresh:
                logger.info(
                    "[%s] Book processing complete: Added %s book%s / Updated %s book%s"
                    % (authorname, added_count, plural(added_count),
                       updated_count, plural(updated_count)))
            else:
                logger.info(
                    "[%s] Book processing complete: Added %s book%s to the database"
                    % (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' %
                         traceback.format_exc())
Exemple #55
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
                                     )
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select(
                    'SELECT Title, Regex, LastAcquired, IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"',
                    (magazine['bookid'], ))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        logger.debug("Removing old magazine search results")
        myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' %
                    (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']

            if not searchterm:
                dic = {
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': ''
                }
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                if PY2:
                    searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm})

        if not searchlist:
            logger.warn(
                'There is nothing to search for.  Mark some magazines as active.'
            )

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No nzb providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG,
                                 0) + 1200 < timenow:
                        logger.warn(
                            'No direct providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No tor providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No rss providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': item[
                                'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace(
                        "'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(
                        nzbsize_temp, 1000
                    )  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    results = myDB.match(
                        'SELECT * from magazines WHERE Title=?', (bookid, ))
                    if not results:
                        logger.debug(
                            'Magazine [%s] does not match search term [%s].' %
                            (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(
                            lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(
                                lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" %
                                             nzbtitle)
                                rejected = True

                        if not rejected:
                            dic = {
                                '.': ' ',
                                '-': ' ',
                                '/': ' ',
                                '+': ' ',
                                '_': ' ',
                                '(': '',
                                ')': ''
                            }
                            nzbtitle_formatted = replace_all(nzbtitle,
                                                             dic).strip()
                            # Need to make sure that substrings of magazine titles don't get found
                            # (e.g. Maxim USA will find Maximum PC USA)
                            # remove extra spaces if they're in a row
                            if nzbtitle_formatted and nzbtitle_formatted[
                                    0] == '[' and nzbtitle_formatted[-1] == ']':
                                nzbtitle_formatted = nzbtitle_formatted[1:-1]
                            nzbtitle_exploded_temp = " ".join(
                                nzbtitle_formatted.split())
                            nzbtitle_exploded = nzbtitle_exploded_temp.split(
                                ' ')

                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    wlist.append(unaccented(word).lower())
                                for word in bookid_exploded:
                                    if unaccented(word).lower() not in wlist:
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        "Magazine title match failed " +
                                        bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug("Magazine matched " + bookid +
                                                 " for " + nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" %
                                             len(nzbtitle_exploded))
                                rejected = True

                        if not rejected:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(
                                str(results['Reject']).lower())
                            reject_list += getList(
                                lazylibrarian.CONFIG['REJECT_MAGS'])
                            lower_title = unaccented(
                                nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Reject: %s' %
                                                 str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" %
                                                 (nzbtitle_formatted, word))
                                    break

                        regex_pass = 0
                        if not rejected:
                            # Magazine names have many different styles of date
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            # MonthName DD YYYY or MonthName DD, YYYY
                            # YYYY MM or YYYY MM DD
                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            # nn YYYY issue number without "Nr" before it
                            # issue and year as a single 6 digit string eg 222015
                            newdatish = "none"
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            pos = 0
                            while pos < len(nzbtitle_exploded):
                                year = check_year(nzbtitle_exploded[pos])
                                if year and pos:
                                    month = month2num(nzbtitle_exploded[pos -
                                                                        1])
                                    if month:
                                        if pos - 1:
                                            day = check_int(
                                                nzbtitle_exploded[pos - 2], 1)
                                            if day > 31:  # probably issue number nn
                                                day = 1
                                        else:
                                            day = 1
                                        newdatish = "%04d-%02d-%02d" % (
                                            year, month, day)
                                        try:
                                            _ = datetime.date(year, month, day)
                                            regex_pass = 1
                                            break
                                        except ValueError:
                                            regex_pass = 0
                                pos += 1

                            # MonthName DD YYYY or MonthName DD, YYYY
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and (pos - 1):
                                        month = month2num(
                                            nzbtitle_exploded[pos - 2])
                                        if month:
                                            day = check_int(
                                                nzbtitle_exploded[
                                                    pos - 1].rstrip(','), 1)
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 2
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # YYYY MM or YYYY MM DD
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and pos + 1 < len(
                                            nzbtitle_exploded):
                                        month = check_int(
                                            nzbtitle_exploded[pos + 1], 0)
                                        if month:
                                            if pos + 2 < len(
                                                    nzbtitle_exploded):
                                                day = check_int(
                                                    nzbtitle_exploded[pos + 2],
                                                    1)
                                            else:
                                                day = 1
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 3
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    if nzbtitle_exploded[pos].lower() in [
                                            "issue", "no", "nr", "vol"
                                    ]:
                                        if pos + 1 < len(nzbtitle_exploded):
                                            issue = check_int(
                                                nzbtitle_exploded[pos + 1], 0)
                                            if issue:
                                                newdatish = str(
                                                    issue)  # 4 == 04 == 004
                                                if pos + 2 < len(
                                                        nzbtitle_exploded):
                                                    year = check_year(
                                                        nzbtitle_exploded[pos +
                                                                          2])
                                                    if year and year < int(
                                                            datetime.date.
                                                            today().year):
                                                        newdatish = '0'  # it's old
                                                    regex_pass = 4  # Issue/No/Nr/Vol nn, YYYY
                                                else:
                                                    regex_pass = 5  # Issue/No/Nr/Vol nn
                                                break
                                    pos += 1

                            # nn YYYY issue number without "Nr" before it
                            if not regex_pass:
                                pos = 1
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year:
                                        issue = check_int(
                                            nzbtitle_exploded[pos - 1], 0)
                                        if issue:
                                            newdatish = str(
                                                issue)  # 4 == 04 == 004
                                            regex_pass = 6
                                            if year < int(datetime.date.today(
                                            ).year):
                                                newdatish = '0'  # it's old
                                            break
                                    pos += 1

                            # issue and year as a single 6 digit string eg 222015
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    issue = nzbtitle_exploded[pos]
                                    if issue.isdigit() and len(issue) == 6:
                                        year = int(issue[2:])
                                        issue = int(issue[:2])
                                        newdatish = str(
                                            issue)  # 4 == 04 == 004
                                        regex_pass = 7
                                        if year < int(
                                                datetime.date.today().year):
                                            newdatish = '0'  # it's old
                                        break
                                    pos += 1

                            if not regex_pass:
                                logger.debug(
                                    'Magazine %s not in a recognised date format.'
                                    % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                regex_pass = 99

                        if rejected:
                            rejects += 1
                        else:
                            if lazylibrarian.LOGLEVEL > 2:
                                logger.debug("regex %s [%s] %s" %
                                             (regex_pass, nzbtitle_formatted,
                                              newdatish))
                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped"
                            insert_table = "pastissues"
                            insert_status = "Skipped"

                            control_date = results['IssueDate']
                            if control_date is None:  # we haven't got any copies of this magazine yet
                                # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                                # could perhaps calc differently for weekly, biweekly etc
                                # or for magazines with only an issue number, use zero

                                if str(newdatish).isdigit():
                                    logger.debug(
                                        'Magazine comparing issue numbers (%s)'
                                        % newdatish)
                                    control_date = 0
                                elif re.match('\d+-\d\d-\d\d', str(newdatish)):
                                    start_time = time.time()
                                    start_time -= int(
                                        lazylibrarian.CONFIG['MAG_AGE']
                                    ) * 24 * 60 * 60  # number of seconds in days
                                    if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                        start_time = 0
                                    control_date = time.strftime(
                                        "%Y-%m-%d", time.localtime(start_time))
                                    logger.debug(
                                        'Magazine date comparing to %s' %
                                        control_date)
                                else:
                                    logger.debug(
                                        'Magazine unable to find comparison type [%s]'
                                        % newdatish)
                                    control_date = 0

                            if str(control_date).isdigit() and str(
                                    newdatish).isdigit():
                                # for issue numbers, check if later than last one we have
                                comp_date = int(newdatish) - int(control_date)
                                newdatish = "%s" % newdatish
                                newdatish = newdatish.zfill(
                                    4)  # pad so we sort correctly
                            elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                    re.match('\d+-\d\d-\d\d', str(newdatish)):
                                # only grab a copy if it's newer than the most recent we have,
                                # or newer than a month ago if we have none
                                comp_date = datecompare(
                                    newdatish, control_date)
                            else:
                                # invalid comparison of date and issue number
                                if re.match('\d+-\d\d-\d\d',
                                            str(control_date)):
                                    logger.debug(
                                        'Magazine %s failed: Expecting a date'
                                        % nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        'Magazine %s failed: Expecting issue number'
                                        % nzbtitle_formatted)
                                bad_date += 1
                                newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                                comp_date = 0

                            if comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + newdatish
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug(
                                        'This issue of %s is new, downloading'
                                        % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' %
                                                 len(issues))
                                    if lazylibrarian.LOGLEVEL > 2:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    insert_status = "Wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug(
                                        'This issue of %s is already flagged for download'
                                        % issue)
                            else:
                                if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                    logger.debug(
                                        'This issue of %s is old; skipping.' %
                                        nzbtitle_formatted)
                                    old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match(
                                'SELECT * from %s WHERE NZBtitle=? and NZBprov=?'
                                % insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug(
                                        '%s is already in %s marked %s' %
                                        (nzbtitle, insert_table,
                                         insert_status))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": newdatish,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict,
                                            controlValueDict)
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Added %s to %s marked %s' %
                                                 (nzbtitle, insert_table,
                                                  insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (
                    total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (
                    old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch = TORDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    else:
                        snatch = NZBDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    if snatch:
                        logger.info(
                            'Downloading %s from %s' %
                            (magazine['nzbtitle'], magazine["nzbprov"]))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']),
                                       magazine["nzbprov"], now()))
                        custom_notify_snatch(magazine['bookid'])
                        scheduleJob(action='Start', target='processDir')

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
def findBestResult(resultlist, book, searchtype, source):
    """ resultlist: collated results from search providers
        book:       the book we want to find
        searchtype: book, magazine, shortbook, audiobook etc.
        source:     nzb, tor, rss, direct
        return:     highest scoring match, or None if no match
    """
    try:
        myDB = database.DBConnection()
        dictrepl = {
            '...': '',
            '.': ' ',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': ' ',
            '*': '',
            '(': '',
            ')': '',
            '[': '',
            ']': '',
            '#': '',
            '0': '',
            '1': '',
            '2': '',
            '3': '',
            '4': '',
            '5': '',
            '6': '',
            '7': '',
            '8': '',
            '9': '',
            '\'': '',
            ':': '',
            '!': '',
            '-': ' ',
            '\s\s': ' '
        }

        dic = {
            '...': '',
            '.': ' ',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': '',
            '*': '',
            ':': '.',
            ';': '',
            '\'': ''
        }

        if source == 'rss':
            author, title = get_searchterm(book, searchtype)
        else:
            author = unaccented_str(replace_all(book['authorName'], dic))
            title = unaccented_str(replace_all(book['bookName'], dic))

        if book['library'] == 'AudioBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0)
            auxinfo = 'AudioBook'

        else:  # elif book['library'] == 'eBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0)
            auxinfo = 'eBook'

        if source == 'nzb':
            prefix = 'nzb'
        else:  # rss and libgen return same names as torrents
            prefix = 'tor_'

        logger.debug('Searching %s %s results for best %s match' %
                     (len(resultlist), source, auxinfo))

        matches = []
        for res in resultlist:
            resultTitle = unaccented_str(
                replace_all(res[prefix + 'title'], dictrepl)).strip()
            resultTitle = re.sub(r"\s\s+", " ",
                                 resultTitle)  # remove extra whitespace
            Author_match = fuzz.token_set_ratio(author, resultTitle)
            Book_match = fuzz.token_set_ratio(title, resultTitle)
            logger.debug("%s author/book Match: %s/%s %s at %s" %
                         (source.upper(), Author_match, Book_match,
                          resultTitle, res[prefix + 'prov']))

            rejected = False

            url = res[prefix + 'url']
            if url is None:
                rejected = True
                logger.debug("Rejecting %s, no URL found" % resultTitle)

            if not rejected:
                already_failed = myDB.match(
                    'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                    (url, ))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" %
                                 (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and not url.startswith(
                    'http') and not url.startswith('magnet'):
                rejected = True
                logger.debug("Rejecting %s, invalid URL [%s]" %
                             (resultTitle, url))

            if not rejected:
                for word in reject_list:
                    if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \
                            and word not in getList(title.lower()):
                        rejected = True
                        logger.debug("Rejecting %s, contains %s" %
                                     (resultTitle, word))
                        break

            size_temp = check_int(
                res[prefix + 'size'],
                1000)  # Need to cater for when this is NONE (Issue 35)
            size = round(float(size_temp) / 1048576, 2)

            if not rejected and maxsize and size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % resultTitle)

            if not rejected and minsize and size < minsize:
                rejected = True
                logger.debug("Rejecting %s, too small" % resultTitle)

            if not rejected:
                bookid = book['bookid']
                newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] +
                            ')').strip()

                if source == 'nzb':
                    mode = res['nzbmode']  # nzb, torznab
                else:
                    mode = res[
                        'tor_type']  # torrent, magnet, nzb(from rss), direct

                controlValueDict = {"NZBurl": url}
                newValueDict = {
                    "NZBprov": res[prefix + 'prov'],
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": size,
                    "NZBtitle": newTitle,
                    "NZBmode": mode,
                    "AuxInfo": auxinfo,
                    "Status": "Skipped"
                }

                score = (Book_match + Author_match) / 2  # as a percentage
                # lose a point for each unwanted word in the title so we get the closest match
                # but for RSS ignore anything at the end in square braces [keywords, genres etc]
                if source == 'rss':
                    wordlist = getList(resultTitle.rsplit('[', 1)[0].lower())
                else:
                    wordlist = getList(resultTitle.lower())
                words = [
                    x for x in wordlist if x not in getList(author.lower())
                ]
                words = [x for x in words if x not in getList(title.lower())]
                typelist = ''

                if newValueDict['AuxInfo'] == 'eBook':
                    words = [
                        x for x in words
                        if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
                    ]
                    typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
                elif newValueDict['AuxInfo'] == 'AudioBook':
                    words = [
                        x for x in words if x not in getList(
                            lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])
                    ]
                    typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])
                score -= len(words)
                # prioritise titles that include the ebook types we want
                # add more points for booktypes nearer the left in the list
                # eg if epub, mobi, pdf  add 3 points if epub found, 2 for mobi, 1 for pdf
                booktypes = [x for x in wordlist if x in typelist]
                if booktypes:
                    typelist = list(reversed(typelist))
                    for item in booktypes:
                        for i in [
                                i for i, x in enumerate(typelist) if x == item
                        ]:
                            score += i + 1
                # score += len(booktypes)
                matches.append([
                    score, resultTitle, newValueDict, controlValueDict,
                    res['priority']
                ])

        if matches:
            highest = max(matches, key=lambda s: (s[0], s[4]))
            score = highest[0]
            resultTitle = highest[1]
            newValueDict = highest[2]
            # controlValueDict = highest[3]
            dlpriority = highest[4]

            if score < int(lazylibrarian.CONFIG['MATCH_RATIO']):
                logger.info(
                    'Nearest match (%s%%): %s using %s search for %s %s' %
                    (score, resultTitle, searchtype, book['authorName'],
                     book['bookName']))
            else:
                logger.info(
                    'Best match (%s%%): %s using %s search, %s priority %s' %
                    (score, resultTitle, searchtype, newValueDict['NZBprov'],
                     dlpriority))
            return highest
        else:
            logger.debug("No %s found for [%s] using searchtype %s" %
                         (source, book["searchterm"], searchtype))
        return None
    except Exception:
        logger.error('Unhandled exception in findBestResult: %s' %
                     traceback.format_exc())
Exemple #57
0
    def _sendPushover(message=None,
                      event=None,
                      pushover_apitoken=None,
                      pushover_keys=None,
                      pushover_device=None,
                      notificationType=None,
                      method=None,
                      force=False):

        if not lazylibrarian.CONFIG['USE_PUSHOVER'] and not force:
            return False

        if pushover_apitoken is None:
            pushover_apitoken = lazylibrarian.CONFIG['PUSHOVER_APITOKEN']
        if pushover_keys is None:
            pushover_keys = lazylibrarian.CONFIG['PUSHOVER_KEYS']
        if pushover_device is None:
            pushover_device = lazylibrarian.CONFIG['PUSHOVER_DEVICE']
        if method is None:
            method = 'POST'
        if notificationType is None:
            testMessage = True
            uri = "/1/users/validate.json"
            logger.debug(
                "Testing Pushover authentication and retrieving the device list."
            )
        else:
            testMessage = False
            uri = "/1/messages.json"
        logger.debug("Pushover event: " + str(event))
        logger.debug("Pushover message: " + str(message))
        logger.debug("Pushover api: " + str(pushover_apitoken))
        logger.debug("Pushover keys: " + str(pushover_keys))
        logger.debug("Pushover device: " + str(pushover_device))
        logger.debug("Pushover notification type: " + str(notificationType))

        http_handler = HTTPSConnection('api.pushover.net')

        if PY2:
            message = message.encode(lazylibrarian.SYS_ENCODING)
            event = event.encode(lazylibrarian.SYS_ENCODING)
        try:
            data = {
                'token': pushover_apitoken,
                'user': pushover_keys,
                'title': event,
                'message': message,
                'device': pushover_device,
                'priority': lazylibrarian.CONFIG['PUSHOVER_PRIORITY']
            }
            http_handler.request(
                method,
                uri,
                headers={'Content-type': "application/x-www-form-urlencoded"},
                body=urlencode(data))
            pass
        except Exception as e:
            logger.error(str(e))
            return False

        response = http_handler.getresponse()
        request_body = response.read()
        request_status = response.status
        logger.debug("Pushover Response: %s" % request_status)
        logger.debug("Pushover Reason: %s" % response.reason)

        if request_status == 200:
            if testMessage:
                logger.debug(request_body)
                if 'devices' in request_body:
                    return "Devices: %s" % request_body.split('[')[1].split(
                        ']')[0]
                else:
                    return request_body
            else:
                return True
        elif 400 <= request_status < 500:
            logger.error("Pushover request failed: %s" % str(request_body))
            return False
        else:
            logger.error("Pushover notification failed: %s" % request_status)
            return False
Exemple #58
0
def _get_auth():
    logger.debug('Deluge: Authenticating...')
    global delugeweb_auth, delugeweb_url, headers
    delugeweb_auth = {}

    delugeweb_host = lazylibrarian.CONFIG['DELUGE_HOST']
    delugeweb_url_base = lazylibrarian.CONFIG['DELUGE_URL_BASE']
    delugeweb_port = check_int(lazylibrarian.CONFIG['DELUGE_PORT'], 0)
    if not delugeweb_host or not delugeweb_port:
        logger.error('Invalid delugeweb host or port, check your config')
        return None

    delugeweb_password = lazylibrarian.CONFIG['DELUGE_PASS']

    if not delugeweb_host.startswith(
            "http://") and not delugeweb_host.startswith("https://"):
        delugeweb_host = 'http://%s' % delugeweb_host

    if delugeweb_host.endswith('/'):
        delugeweb_host = delugeweb_host[:-1]

    if delugeweb_url_base.endswith('/'):
        delugeweb_url_base = delugeweb_url_base[:-1]

    delugeweb_host = "%s:%s" % (delugeweb_host, delugeweb_port)

    delugeweb_url = delugeweb_host + delugeweb_url_base + '/json'

    post_data = json.dumps({
        "method": "auth.login",
        "params": [delugeweb_password],
        "id": 1
    })
    if PY2:
        post_data = post_data.encode(lazylibrarian.SYS_ENCODING)
    try:
        response = requests.post(delugeweb_url,
                                 data=post_data,
                                 cookies=delugeweb_auth,
                                 headers=headers)
        #                                  , verify=TORRENT_VERIFY_CERT)
    except Exception as err:
        logger.debug('Deluge %s: auth.login returned %s' %
                     (type(err).__name__, str(err)))
        delugeweb_auth = {}
        return None

    auth = json.loads(response.text)["result"]
    if auth is False:
        logger.debug('Deluge: auth.login returned False')
        delugeweb_auth = {}
        return None

    delugeweb_auth = response.cookies

    post_data = json.dumps({"method": "web.connected", "params": [], "id": 10})
    if PY2:
        post_data = post_data.encode(lazylibrarian.SYS_ENCODING)
    try:
        response = requests.post(delugeweb_url,
                                 data=post_data,
                                 cookies=delugeweb_auth,
                                 headers=headers)
        #                                  , verify=TORRENT_VERIFY_CERT)
    except Exception as err:
        logger.debug('Deluge %s: web.connected returned %s' %
                     (type(err).__name__, str(err)))
        delugeweb_auth = {}
        return None

    connected = json.loads(response.text)['result']

    if not connected:
        post_data = json.dumps({
            "method": "web.get_hosts",
            "params": [],
            "id": 11
        })
        if PY2:
            post_data = post_data.encode(lazylibrarian.SYS_ENCODING)
        try:
            response = requests.post(delugeweb_url,
                                     data=post_data,
                                     cookies=delugeweb_auth,
                                     headers=headers)
            #                                  , verify=TORRENT_VERIFY_CERT)
        except Exception as err:
            logger.debug('Deluge %s: web.get_hosts returned %s' %
                         (type(err).__name__, str(err)))
            delugeweb_auth = {}
            return None

        delugeweb_hosts = json.loads(response.text)['result']
        if len(delugeweb_hosts) == 0:
            logger.error('Deluge: WebUI does not contain daemons')
            delugeweb_auth = {}
            return None

        post_data = json.dumps({
            "method": "web.connect",
            "params": [delugeweb_hosts[0][0]],
            "id": 11
        })
        if PY2:
            post_data = post_data.encode(lazylibrarian.SYS_ENCODING)

        try:
            _ = requests.post(delugeweb_url,
                              data=post_data,
                              cookies=delugeweb_auth,
                              headers=headers)
            #                                  , verify=TORRENT_VERIFY_CERT)
        except Exception as err:
            logger.debug('Deluge %s: web.connect returned %s' %
                         (type(err).__name__, str(err)))
            delugeweb_auth = {}
            return None

        post_data = json.dumps({
            "method": "web.connected",
            "params": [],
            "id": 10
        })

        if PY2:
            post_data = post_data.encode(lazylibrarian.SYS_ENCODING)
        try:
            response = requests.post(delugeweb_url,
                                     data=post_data,
                                     cookies=delugeweb_auth,
                                     headers=headers)
            #                                  , verify=TORRENT_VERIFY_CERT)
        except Exception as err:
            logger.debug('Deluge %s: web.connected returned %s' %
                         (type(err).__name__, str(err)))
            delugeweb_auth = {}
            return None

        connected = json.loads(response.text)['result']

        if not connected:
            logger.error('Deluge: WebUI could not connect to daemon')
            delugeweb_auth = {}
            return None

    return auth
def processDir():
    # rename this thread
    threading.currentThread().name = "POSTPROCESS"

    processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    #TODO - try exception on os.listdir - it throws debug level
    #exception if dir doesn't exist - bloody hard to catch
    try:
        downloads = os.listdir(processpath)
    except OSError:
        logger.error('Could not access [%s] directory ' % processpath)

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if snatched is None:
        logger.info('No books are snatched. Nothing to process.')
    elif downloads is None:
        logger.info('No downloads are found. Nothing to process.')
    else:
        ppcount = 0
        for book in snatched:
            if book['NZBtitle'] in downloads:
                pp_path = os.path.join(processpath, book['NZBtitle'])
                logger.info('Found folder %s.' % pp_path)

                data = myDB.select("SELECT * from books WHERE BookID='%s'" %
                                   book['BookID'])
                for metadata in data:
                    authorname = metadata['AuthorName']
                    authorimg = metadata['AuthorLink']
                    bookname = metadata['BookName']
                    bookdesc = metadata['BookDesc']
                    bookisbn = metadata['BookIsbn']
                    bookrate = metadata['BookRate']
                    bookimg = metadata['BookImg']
                    bookpage = metadata['BookPages']
                    booklink = metadata['BookLink']
                    bookdate = metadata['BookDate']
                    booklang = metadata['BookLang']
                    bookpub = metadata['BookPub']

                dest_path = authorname + '/' + bookname
                dic = {
                    '<': '',
                    '>': '',
                    '=': '',
                    '?': '',
                    '"': '',
                    ',': '',
                    '*': '',
                    ':': '',
                    ';': ''
                }
                dest_path = formatter.latinToAscii(
                    formatter.replace_all(dest_path, dic))
                dest_path = os.path.join(lazylibrarian.DESTINATION_DIR,
                                         dest_path).encode(
                                             lazylibrarian.SYS_ENCODING)

                processBook = processDestination(pp_path, dest_path,
                                                 authorname, bookname)

                if processBook:

                    ppcount = ppcount + 1

                    # If you use auto add by Calibre you need the book in a single directory, not nested
                    #So take the file you Copied/Moved to Dest_path and copy it to a Calibre auto add folder.
                    processAutoAdd(dest_path)

                    # try image
                    processIMG(dest_path, bookimg)

                    # try metadata
                    processOPF(dest_path, authorname, bookname, bookisbn,
                               book['BookID'], bookpub, bookdate, bookdesc,
                               booklang)

                    #update nzbs
                    controlValueDict = {"NZBurl": book['NZBurl']}
                    newValueDict = {"Status": "Success"}
                    myDB.upsert("wanted", newValueDict, controlValueDict)

                    #update books
                    controlValueDict = {"BookID": book['BookID']}
                    newValueDict = {"Status": "Have"}
                    myDB.upsert("books", newValueDict, controlValueDict)

                    #update authors
                    query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND Status="Have"' % authorname
                    countbooks = myDB.action(query).fetchone()
                    havebooks = int(countbooks[0])
                    controlValueDict = {"AuthorName": authorname}
                    newValueDict = {"HaveBooks": havebooks}
                    myDB.upsert("authors", newValueDict, controlValueDict)

                    logger.info('Successfully processed: %s - %s' %
                                (authorname, bookname))
                else:
                    logger.error(
                        'Postprocessing for %s has failed. Warning - AutoAdd will be repeated'
                        % bookname)
        if ppcount:
            logger.info('%s books are downloaded and processed.' % ppcount)
    logger.debug(' - Completed all snatched/downloaded files')
def magazineScan(title=None):
    lazylibrarian.MAG_UPDATE = 1
    onetitle = title

    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()

        mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER']
        mag_path = mag_path.split('$')[0]

        if lazylibrarian.CONFIG['MAG_RELATIVE']:
            mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path)
        if PY2:
            mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING)

        if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle:
            mags = myDB.select('select * from Issues')
            # check all the issues are still there, delete entry if not
            for mag in mags:
                title = mag['Title']
                issuedate = mag['IssueDate']
                issuefile = mag['IssueFile']

                if issuefile and not os.path.isfile(issuefile):
                    myDB.action('DELETE from Issues where issuefile=?', (issuefile,))
                    logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate))
                    controlValueDict = {"Title": title}
                    newValueDict = {
                        "LastAcquired": None,  # clear magazine dates
                        "IssueDate": None,  # we will fill them in again later
                        "LatestCover": None,
                        "IssueStatus": "Skipped"  # assume there are no issues now
                    }
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    logger.debug('Magazine %s details reset' % title)

            mags = myDB.select('SELECT * from magazines')
            # now check the magazine titles and delete any with no issues
            for mag in mags:
                title = mag['Title']
                count = myDB.select('SELECT COUNT(Title) as counter FROM issues WHERE Title=?', (title,))
                issues = count[0]['counter']
                if not issues:
                    logger.debug('Magazine %s deleted as no issues found' % title)
                    myDB.action('DELETE from magazines WHERE Title=?', (title,))

        if onetitle:
            match = myDB.match('SELECT LatestCover from magazines where Title=?', (onetitle,))
            if match:
                mag_path = os.path.dirname(match['LatestCover'])

        logger.info(' Checking [%s] for magazines' % mag_path)

        matchString = ''
        for char in lazylibrarian.CONFIG['MAG_DEST_FILE']:
            matchString = matchString + '\\' + char
        # massage the MAG_DEST_FILE config parameter into something we can use
        # with regular expression matching
        booktypes = ''
        count = -1
        booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE'])
        for book_type in booktype_list:
            count += 1
            if count == 0:
                booktypes = book_type
            else:
                booktypes = booktypes + '|' + book_type
        match = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']'
        title_pattern = re.compile(match, re.VERBOSE)
        match = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']'
        date_pattern = re.compile(match, re.VERBOSE)

        # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file
        # to utf-8 and fails (eg scandinavian characters in ascii 8bit)
        for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)):
            rootdir = makeUnicode(rootdir)
            filenames = [makeUnicode(item) for item in filenames]
            for fname in filenames:
                # maybe not all magazines will be pdf?
                if is_valid_booktype(fname, booktype='mag'):
                    issuedate = ''
                    # noinspection PyBroadException
                    try:
                        match = title_pattern.match(fname)
                        if match:
                            title = match.group("title")
                            issuedate = match.group("issuedate")
                            if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                                logger.debug("Title pattern [%s][%s]" % (title, issuedate))
                            match = True
                        else:
                            logger.debug("Title pattern match failed for [%s]" % fname)
                    except Exception:
                        match = False

                    if not match:
                        # noinspection PyBroadException
                        try:
                            match = date_pattern.match(fname)
                            if match:
                                issuedate = match.group("issuedate")
                                title = os.path.basename(rootdir)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                                    logger.debug("Date pattern [%s][%s]" % (title, issuedate))
                                match = True
                            else:
                                logger.debug("Date pattern match failed for [%s]" % fname)
                        except Exception:
                            match = False

                    if not match:
                        title = os.path.basename(rootdir)
                        issuedate = ''

                    dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ',
                           '#': '# '}
                    if issuedate:
                        exploded = replace_all(issuedate, dic).strip()
                        # remove extra spaces if they're in a row
                        exploded = " ".join(exploded.split())
                        exploded = exploded.split(' ')
                        regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(exploded)
                        if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                            logger.debug("Date regex [%s][%s][%s]" % (regex_pass, issuedate, year))
                        if not regex_pass:
                            issuedate = ''

                    if not issuedate:
                        exploded = replace_all(fname, dic).strip()
                        exploded = " ".join(exploded.split())
                        exploded = exploded.split(' ')
                        regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(exploded)
                        if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                            logger.debug("File regex [%s][%s][%s]" % (regex_pass, issuedate, year))
                        if not regex_pass:
                            issuedate = ''

                    if not issuedate:
                        logger.warn("Invalid name format for [%s]" % fname)
                        continue

                    issuefile = os.path.join(rootdir, fname)  # full path to issue.pdf
                    mtime = os.path.getmtime(issuefile)
                    iss_acquired = datetime.date.isoformat(datetime.date.fromtimestamp(mtime))

                    if lazylibrarian.CONFIG['MAG_RENAME']:
                        filedate = issuedate
                        if issuedate and issuedate.isdigit():
                            if len(issuedate) == 8:
                                if check_year(issuedate[:4]):
                                    filedate = 'Issue %d %s' % (int(issuedate[4:]), issuedate[:4])
                                else:
                                    filedate = 'Vol %d Iss %d' % (int(issuedate[:4]), int(issuedate[4:]))
                            elif len(issuedate) == 12:
                                filedate = 'Vol %d Iss %d %s' % (int(issuedate[4:8]), int(issuedate[8:]),
                                                                 issuedate[:4])
                            else:
                                filedate = str(issuedate).zfill(4)

                        extn = os.path.splitext(fname)[1]
                        newfname = lazylibrarian.CONFIG['MAG_DEST_FILE'].replace('$Title', title).replace(
                                                                                 '$IssueDate', filedate)
                        newfname = newfname + extn
                        if newfname and newfname != fname:
                            logger.debug("Rename %s -> %s" % (fname, newfname))
                            newissuefile = os.path.join(rootdir, newfname)
                            newissuefile = safe_move(issuefile, newissuefile)
                            if os.path.exists(issuefile.replace(extn, '.jpg')):
                                safe_move(issuefile.replace(extn, '.jpg'), newissuefile.replace(extn, '.jpg'))
                            if os.path.exists(issuefile.replace(extn, '.opf')):
                                safe_move(issuefile.replace(extn, '.opf'), newissuefile.replace(extn, '.opf'))
                            issuefile = newissuefile

                    logger.debug("Found %s Issue %s" % (title, issuedate))
                    controlValueDict = {"Title": title}

                    # is this magazine already in the database?
                    mag_entry = myDB.match(
                        'SELECT LastAcquired, IssueDate, MagazineAdded from magazines WHERE Title=?', (title,))
                    if not mag_entry:
                        # need to add a new magazine to the database
                        newValueDict = {
                            "Reject": None,
                            "Status": "Active",
                            "MagazineAdded": None,
                            "LastAcquired": None,
                            "LatestCover": None,
                            "IssueDate": None,
                            "IssueStatus": "Skipped",
                            "Regex": None
                        }
                        logger.debug("Adding magazine %s" % title)
                        myDB.upsert("magazines", newValueDict, controlValueDict)
                        magissuedate = None
                        magazineadded = None
                        maglastacquired = None
                    else:
                        maglastacquired = mag_entry['LastAcquired']
                        magissuedate = mag_entry['IssueDate']
                        magazineadded = mag_entry['MagazineAdded']
                        magissuedate = str(magissuedate).zfill(4)

                    issuedate = str(issuedate).zfill(4)  # for sorting issue numbers

                    # is this issue already in the database?
                    issue_id = create_id("%s %s" % (title, issuedate))
                    iss_entry = myDB.match('SELECT Title,IssueFile from issues WHERE Title=? and IssueDate=?',
                                           (title, issuedate))
                    new_opf = False
                    if not iss_entry or iss_entry['IssueFile'] != issuefile:
                        new_opf = True  # new entry or name changed
                        if not iss_entry:
                            logger.debug("Adding issue %s %s" % (title, issuedate))
                        else:
                            logger.debug("Updating issue %s %s" % (title, issuedate))
                        controlValueDict = {"Title": title, "IssueDate": issuedate}
                        newValueDict = {
                            "IssueAcquired": iss_acquired,
                            "IssueID": issue_id,
                            "IssueFile": issuefile
                        }
                        myDB.upsert("Issues", newValueDict, controlValueDict)

                    ignorefile = os.path.join(os.path.dirname(issuefile), '.ll_ignore')
                    with open(ignorefile, 'a'):
                        os.utime(ignorefile, None)

                    createMagCover(issuefile)
                    lazylibrarian.postprocess.processMAGOPF(issuefile, title, issuedate, issue_id, overwrite=new_opf)

                    # see if this issues date values are useful
                    controlValueDict = {"Title": title}
                    if not mag_entry:  # new magazine, this is the only issue
                        newValueDict = {
                            "MagazineAdded": iss_acquired,
                            "LastAcquired": iss_acquired,
                            "LatestCover": os.path.splitext(issuefile)[0] + '.jpg',
                            "IssueDate": issuedate,
                            "IssueStatus": "Open"
                        }
                        myDB.upsert("magazines", newValueDict, controlValueDict)
                    else:
                        # Set magazine_issuedate to issuedate of most recent issue we have
                        # Set latestcover to most recent issue cover
                        # Set magazine_added to acquired date of earliest issue we have
                        # Set magazine_lastacquired to acquired date of most recent issue we have
                        # acquired dates are read from magazine file timestamps
                        newValueDict = {"IssueStatus": "Open"}
                        if not magazineadded or iss_acquired < magazineadded:
                            newValueDict["MagazineAdded"] = iss_acquired
                        if not maglastacquired or iss_acquired > maglastacquired:
                            newValueDict["LastAcquired"] = iss_acquired
                        if not magissuedate or issuedate >= magissuedate:
                            newValueDict["IssueDate"] = issuedate
                            newValueDict["LatestCover"] = os.path.splitext(issuefile)[0] + '.jpg'
                        myDB.upsert("magazines", newValueDict, controlValueDict)

        if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle:
            magcount = myDB.match("select count(*) from magazines")
            isscount = myDB.match("select count(*) from issues")
            logger.info("Magazine scan complete, found %s magazine%s, %s issue%s" %
                        (magcount['count(*)'], plural(magcount['count(*)']),
                         isscount['count(*)'], plural(isscount['count(*)'])))
        else:
            logger.info("Magazine scan complete")
        lazylibrarian.MAG_UPDATE = 0

    except Exception:
        lazylibrarian.MAG_UPDATE = 0
        logger.error('Unhandled exception in magazineScan: %s' % traceback.format_exc())