def NewzNabPlus(book=None, host=None, api_key=None, searchType=None, searchMode=None): # logger.info('[NewzNabPlus] Searching term [%s] for author [%s] and title [%s] on host [%s] for a # [%s] item' % (book['searchterm'], book['authorName'], book['bookName'], host, searchType)) logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % ( searchType, host, searchMode, api_key, str(book))) results = [] params = ReturnSearchTypeStructure(api_key, book, searchType, searchMode) if not str(host)[:4] == "http": host = 'http://' + host URL = host + '/api?' + urllib.urlencode(params) try: request = urllib2.Request(URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', common.USER_AGENT) # do we really want to cache this, new feeds/torrents are added all the time # if we do, call goodreads.get_request(request, expireafter) # where expireafter is max cache age in days (0 for non-cached, 7 for up to a week old, etc. # Default is 30 days) resp = urllib2.urlopen(request, timeout=90) try: data = ElementTree.parse(resp) except (urllib2.URLError, IOError, EOFError), e: logger.error('Error fetching data from %s: %s' % (host, e)) data = None except Exception, e: logger.error("Error 403 opening url %s" % e) data = None
def _sendNMA(nma_api=None, nma_priority=None, event=None, message=None, force=False): title = "LazyLibrarian" # suppress notifications if the notifier is disabled but the notify options are checked if not lazylibrarian.CONFIG['USE_NMA'] and not force: return False if nma_api is None: nma_api = lazylibrarian.CONFIG['NMA_APIKEY'] if nma_priority is None: nma_priority = lazylibrarian.CONFIG['NMA_PRIORITY'] logger.debug("NMA: title: " + title) logger.debug("NMA: event: " + event) logger.debug("NMA: message: " + message) batch = False p = pynma.PyNMA() keys = nma_api.split(',') p.addkey(keys) if len(keys) > 1: batch = True response = p.push(title, event, message, priority=nma_priority, batch_mode=batch) if not response[nma_api][u'code'] == u'200': logger.error(u"NMA: Could not send notification to NotifyMyAndroid") return False else: logger.debug(u"NMA: Success. NotifyMyAndroid returned : %s" % response[nma_api][u'code']) return True
def DownloadMethod(bookid=None, nzbprov=None, nzbtitle=None, nzburl=None): myDB = database.DBConnection() if lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE: download = sabnzbd.SABnzbd(nzbtitle, nzburl) elif lazylibrarian.BLACKHOLE: try: nzbfile = urllib2.urlopen(nzburl, timeout=30).read() except urllib2.URLError, e: logger.warn('Error fetching nzb from url: ' + nzburl + ' %s' % e) nzbname = str.replace(nzbtitle, ' ', '_') + '.nzb' nzbpath = os.path.join(lazylibrarian.BLACKHOLEDIR, nzbname) try: f = open(nzbpath, 'w') f.write(nzbfile) f.close() logger.info('NZB file saved to: ' + nzbpath) download = True except Exception, e: logger.error('%s not writable, NZB not saved. Error: %s' % (nzbpath, e)) download = False
def request_json(url, **kwargs): """ Wrapper for `request_response', which will decode the response as JSON object and return the result, if no exceptions are raised. As an option, a validator callback can be given, which should return True if the result is valid. """ validator = kwargs.pop("validator", None) response = request_response(url, **kwargs) if response is not None: try: result = response.json() if validator and not validator(result): logger.error("JSON validation result failed") else: return result except ValueError: logger.error("Response returned invalid JSON data") # Debug response if lazylibrarian.VERBOSE: server_message(response)
def runGit(args): git_locations = ['git'] if platform.system().lower() == 'darwin': git_locations.append('/usr/local/git/bin/git') output = err = None for cur_git in git_locations: cmd = cur_git + ' ' + args try: logger.debug('(RunGit)Trying to execute: "' + cmd + '" with shell in ' + lazylibrarian.PROG_DIR) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, cwd=lazylibrarian.PROG_DIR) output, err = p.communicate() logger.debug('(RunGit)Git output: [%s]' % output) except OSError: logger.debug('(RunGit)Command ' + cmd + ' didn\'t work, couldn\'t find git') continue if 'not found' in output or "not recognized as an internal or external command" in output: logger.debug('(RunGit)Unable to find git with command ' + cmd) output = None elif 'fatal:' in output or err: logger.error('(RunGit)Git returned bad info. Are you sure this is a git installation?') output = None elif output: break return (output, err)
def fetchURL(URL, headers=None, retry=True): """ Return the result of fetching a URL and True if success Otherwise return error message and False Allow one retry on timeout by default""" request = urllib2.Request(URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) if headers is None: # some sites insist on having a user-agent, default is to add one # if you don't want any headers, send headers=[] request.add_header('User-Agent', USER_AGENT) else: for item in headers: request.add_header(item, headers[item]) try: resp = urllib2.urlopen(request, timeout=30) if str(resp.getcode()).startswith("2"): # (200 OK etc) try: result = resp.read() except socket.error as e: return str(e), False return result, True return str(resp.getcode()), False except socket.timeout as e: if not retry: logger.error(u"fetchURL: Timeout getting response from %s" % URL) return str(e), False logger.warn(u"fetchURL: retrying - got timeout on %s" % URL) result, success = fetchURL(URL, headers=headers, retry=False) return result, success except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e: if hasattr(e, 'reason'): return e.reason, False return str(e), False
def setSeedRatio(result): if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Deluge: Setting seed ratio') if not any(delugeweb_auth): _get_auth() timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) try: ratio = None if result['ratio']: ratio = result['ratio'] if not ratio: return True post_json = {"method": "core.set_torrent_stop_at_ratio", "params": [result['hash'], True], "id": 5} response = requests.post(delugeweb_url, json=post_json, cookies=delugeweb_auth, verify=deluge_verify_cert, headers=headers, timeout=timeout) if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Status code: %s' % response.status_code) logger.debug(response.text) post_json = {"method": "core.set_torrent_stop_ratio", "params": [result['hash'], float(ratio)], "id": 6} response = requests.post(delugeweb_url, json=post_json, cookies=delugeweb_auth, verify=deluge_verify_cert, headers=headers, timeout=timeout) if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Status code: %s' % response.status_code) logger.debug(response.text) return not response.json()['error'] except Exception as err: logger.error('Deluge %s: Setting seedratio failed: %s' % (type(err).__name__, str(err))) return False
def getServer(): host = lazylibrarian.CONFIG['RTORRENT_HOST'] if not host: logger.error("rtorrent error: No host found, check your config") return False if not host.startswith("http://") and not host.startswith("https://"): host = 'http://' + host if host.endswith('/'): host = host[:-1] if lazylibrarian.CONFIG['RTORRENT_USER']: user = lazylibrarian.CONFIG['RTORRENT_USER'] password = lazylibrarian.CONFIG['RTORRENT_PASS'] parts = host.split('://') host = parts[0] + '://' + user + ':' + password + '@' + parts[1] try: socket.setdefaulttimeout(20) # so we don't freeze if server is not there server = xmlrpc_client.ServerProxy(host) result = server.system.client_version() socket.setdefaulttimeout(None) # reset timeout logger.debug("rTorrent client version = %s" % result) except Exception as e: socket.setdefaulttimeout(None) # reset timeout if failed logger.error("xmlrpc_client error: %s" % repr(e)) return False if result: return server else: logger.warn('No response from rTorrent server') return False
def _add_torrent_url(result): if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Deluge: Adding URL') if not any(delugeweb_auth): _get_auth() timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) try: post_json = {"method": "core.add_torrent_url", "params": [result['url'], {}], "id": 32} response = requests.post(delugeweb_url, json=post_json, cookies=delugeweb_auth, verify=deluge_verify_cert, headers=headers, timeout=timeout) if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Status code: %s' % response.status_code) logger.debug(response.text) result['hash'] = response.json()['result'] msg = 'Deluge: Response was %s' % result['hash'] if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug(msg) if 'was None' in msg: logger.error('Deluge: Adding torrent URL failed: Is the WebUI running?') return response.json()['result'] except Exception as err: logger.error('Deluge %s: Adding torrent URL failed: %s' % (type(err).__name__, str(err))) return False
def _add_torrent_file(result): if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Deluge: Adding file') if not any(delugeweb_auth): _get_auth() timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) try: # content is torrent file contents that needs to be encoded to base64 post_json = {"method": "core.add_torrent_file", "params": [result['name'] + '.torrent', b64encode(result['content']), {}], "id": 2} response = requests.post(delugeweb_url, json=post_json, cookies=delugeweb_auth, verify=deluge_verify_cert, headers=headers, timeout=timeout) if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Status code: %s' % response.status_code) logger.debug(response.text) result['hash'] = response.json()['result'] msg = 'Deluge: Response was %s' % result['hash'] if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug(msg) if 'was None' in msg: logger.error('Deluge: Adding torrent file failed: Is the WebUI running?') return response.json()['result'] except Exception as err: logger.error('Deluge %s: Adding torrent file failed: %s' % (type(err).__name__, str(err))) if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: formatted_lines = traceback.format_exc().splitlines() logger.debug('; '.join(formatted_lines)) return False
def getFolder(hash): logger.debug('getFolder(%s)' % hash) qbclient = qbittorrentclient() # Get Active Directory from settings settings = qbclient._get_settings() active_dir = settings['temp_path'] completed_dir = settings['save_path'] if not active_dir: logger.error( 'Could not get "Keep incomplete torrents in:" directory from QBitTorrent settings, please ensure it is set') return None # Get Torrent Folder Name torrent_folder = qbclient.get_savepath(hash) # If there's no folder yet then it's probably a magnet, try until folder is populated if torrent_folder == active_dir or not torrent_folder: tries = 1 while (torrent_folder == active_dir or torrent_folder is None) and tries <= 10: tries += 1 time.sleep(6) torrent_folder = qbclient.get_savepath(hash) if torrent_folder == active_dir or not torrent_folder: torrent_folder = qbclient.get_savepath(hash) return torrent_folder else: if 'windows' not in platform.system().lower(): torrent_folder = torrent_folder.replace('\\', '/') return os.path.basename(os.path.normpath(torrent_folder))
def export_CSV(search_dir=None, status="Wanted", library='eBook'): """ Write a csv file to the search_dir containing all books marked as "Wanted" """ # noinspection PyBroadException try: if not search_dir: msg = "Alternate Directory not configured" logger.warn(msg) return msg elif not os.path.isdir(search_dir): msg = "Alternate Directory [%s] not found" % search_dir logger.warn(msg) return msg elif not os.access(search_dir, os.W_OK | os.X_OK): msg = "Alternate Directory [%s] not writable" % search_dir logger.warn(msg) return msg csvFile = os.path.join(search_dir, "%s %s - %s.csv" % (status, library, now().replace(':', '-'))) myDB = database.DBConnection() cmd = 'SELECT BookID,AuthorName,BookName,BookIsbn,books.AuthorID FROM books,authors ' if library == 'eBook': cmd += 'WHERE books.Status=? and books.AuthorID = authors.AuthorID' else: cmd += 'WHERE AudioStatus=? and books.AuthorID = authors.AuthorID' find_status = myDB.select(cmd, (status,)) if not find_status: msg = "No %s marked as %s" % (library, status) logger.warn(msg) else: count = 0 if PY2: fmode = 'wb' else: fmode = 'w' with open(csvFile, fmode) as csvfile: csvwrite = writer(csvfile, delimiter=',', quotechar='"', quoting=QUOTE_MINIMAL) # write headers, change AuthorName BookName BookIsbn to match import csv names csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID']) for resulted in find_status: logger.debug("Exported CSV for %s %s" % (library, resulted['BookName'])) row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'], resulted['BookIsbn'], resulted['AuthorID']]) if PY2: csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row]) else: csvwrite.writerow([("%s" % s) for s in row]) count += 1 msg = "CSV exported %s %s%s to %s" % (count, library, plural(count), csvFile) logger.info(msg) return msg except Exception: msg = 'Unhandled exception in exportCSV: %s' % traceback.format_exc() logger.error(msg) return msg
def torrentAction(method, arguments): host = lazylibrarian.TRANSMISSION_HOST username = lazylibrarian.TRANSMISSION_USER password = lazylibrarian.TRANSMISSION_PASS if not host.startswith("http"): host = "http://" + host if host.endswith("/"): host = host[:-1] # Fix the URL. We assume that the user does not point to the RPC endpoint, # so add it if it is missing. parts = list(urlparse.urlparse(host)) if not parts[0] in ("http", "https"): parts[0] = "http" if not parts[2].endswith("/rpc"): parts[2] += "/transmission/rpc" host = urlparse.urlunparse(parts) # Retrieve session id auth = (username, password) if username and password else None response = request.request_response(host, auth=auth, whitelist_status_code=[401, 409]) if response is None: logger.error("Error gettings Transmission session ID") return # Parse response if response.status_code == 401: if auth: logger.error("Username and/or password not accepted by " "Transmission") else: logger.error("Transmission authorization required") return elif response.status_code == 409: session_id = response.headers["x-transmission-session-id"] if not session_id: logger.error("Expected a Session ID from Transmission") return # Prepare next request headers = {"x-transmission-session-id": session_id} data = {"method": method, "arguments": arguments} response = request.request_json(host, method="POST", data=json.dumps(data), headers=headers, auth=auth) print response if not response: logger.error("Error sending torrent to Transmission") return return response
def find_author_id(self): #URL Encode Request URL = 'http://www.goodreads.com/api/author_url/?' + urllib.parse.urlencode(self.name) + '&' + urllib.parse.urlencode(self.params) logger.info("Searching for author with name: %s" % self.name) #Parse XML Response try: sourcexml = ElementTree.parse(urllib.request.urlopen(URL, timeout=20)) except (urllib.error.URLError, IOError, EOFError) as e: logger.error("Error fetching authorid: ", e) rootxml = sourcexml.getroot() resultxml = rootxml.getiterator('author') authorlist = [] #Parse XML Tree for Authors if not len(rootxml): logger.info('No authors found with name: %s' % self.name) return authorlist else: #Display authors to user for author in resultxml: authorid = author.attrib.get("id") logger.info('Found author: %s with GoodReads-id: %s' % (author[0].text, authorid)) time.sleep(1) authorlist = self.get_author_info(authorid) return authorlist
def sendNZB(nzb): addToTop = False nzbgetXMLrpc = "%(username)s:%(password)s@%(host)s/xmlrpc" if lazylibrarian.NZBGET_HOST is None: logger.error(u"No NZBget host found in configuration. Please configure it.") return False if lazylibrarian.NZBGET_HOST.startswith("https://"): nzbgetXMLrpc = "https://" + nzbgetXMLrpc lazylibrarian.NZBGET_HOST.replace("https://", "", 1) else: nzbgetXMLrpc = "http://" + nzbgetXMLrpc lazylibrarian.NZBGET_HOST.replace("http://", "", 1) url = nzbgetXMLrpc % { "host": lazylibrarian.NZBGET_HOST, "username": lazylibrarian.NZBGET_USER, "password": lazylibrarian.NZBGET_PASS, } nzbGetRPC = xmlrpclib.ServerProxy(url) try: if nzbGetRPC.writelog("INFO", "lazylibrarian connected to drop of %s any moment now." % (nzb.name + ".nzb")): logger.debug(u"Successfully connected to NZBget") else: logger.info(u"Successfully connected to NZBget, but unable to send a message" % (nzb.name + ".nzb")) except httplib.socket.error, e: logger.error( u"Please check your NZBget host and port (if it is running). NZBget is not responding to this combination" ) return False
def action(self, query, args=None): with db_lock: if not query: return sqlResult = None attempt = 0 while attempt < 5: try: if not args: # logger.debug(self.filename+": "+query) sqlResult = self.connection.execute(query) else: # logger.debug(self.filename+": "+query+" with args "+str(args)) sqlResult = self.connection.execute(query, args) self.connection.commit() break except sqlite3.OperationalError, e: if "unable to open database file" in e.message or "database is locked" in e.message: logger.warn('Database Error: %s' % e) attempt += 1 time.sleep(1) else: logger.error('Database error: %s' % e) raise except sqlite3.DatabaseError, e: logger.error('Fatal error executing %s :: %s' % (query, e)) raise
def search_tor_book(books=None, mags=None): if not(lazylibrarian.USE_TOR): logger.warn('Torrent search is disabled') return # rename this thread threading.currentThread().name = "SEARCHTORBOOKS" myDB = database.DBConnection() searchlist = [] #searchlist1 = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') # Clear cache providercache = os.path.join(lazylibrarian.DATADIR, ".ProviderCache") if os.path.exists(providercache): try: shutil.rmtree(providercache) os.mkdir(providercache) except OSError, e: logger.error('Failed to clear cache: ' + str(e)) # Clearing throttling timeouts t = SimpleCache.ThrottlingProcessor() t.lastRequestTime.clear()
def NZBDownloadMethod(bookid=None, nzbprov=None, nzbtitle=None, nzburl=None): myDB = database.DBConnection() if (lazylibrarian.NZB_DOWNLOADER_SABNZBD and lazylibrarian.SAB_HOST) and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE: download = sabnzbd.SABnzbd(nzbtitle, nzburl) elif ( lazylibrarian.NZB_DOWNLOADER_NZBGET and lazylibrarian.NZBGET_HOST ) and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE: headers = {"User-Agent": USER_AGENT} data = request.request_content(url=nzburl, headers=headers) nzb = classes.NZBDataSearchResult() nzb.extraInfo.append(data) nzb.name = nzbtitle nzb.url = nzburl download = nzbget.sendNZB(nzb) elif lazylibrarian.NZB_DOWNLOADER_BLACKHOLE: try: req = urllib2.Request(nzburl) if lazylibrarian.PROXY_HOST: req.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) req.add_header("User-Agent", USER_AGENT) nzbfile = urllib2.urlopen(req, timeout=90).read() except (urllib2.URLError, socket.timeout) as e: logger.warn("Error fetching nzb from url: %s, %s" % (nzburl, e)) nzbfile = False if nzbfile: nzbname = str(nzbtitle) + ".nzb" nzbpath = os.path.join(lazylibrarian.NZB_BLACKHOLEDIR, nzbname) try: with open(nzbpath, "w") as f: f.write(nzbfile) logger.debug("NZB file saved to: " + nzbpath) download = True # try: # os.chmod(nzbpath, 0777) # except Exception, e: # logger.error("Could not chmod path: " + str(nzbpath)) except Exception as e: logger.error("%s not writable, NZB not saved. Error: %s" % (nzbpath, e)) download = False else: logger.warn("No NZB download method is enabled, check config.") return False if download: logger.debug("Nzbfile has been downloaded from " + str(nzburl)) myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid) myDB.action('UPDATE wanted SET status = "Snatched" WHERE NZBurl="%s"' % nzburl) return True else: logger.error(u'Failed to download nzb @ <a href="%s">%s</a>' % (nzburl, nzbprov)) myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % nzburl) return False
def goodreads_oauth2(): global request_token, consumer, token, client try: if request_token and 'oauth_token' in request_token and 'oauth_token_secret' in request_token: token = oauth.Token(request_token['oauth_token'], request_token['oauth_token_secret']) else: return "Unable to run oAuth2 - Have you run oAuth1?" except Exception as e: logger.error("Exception in oAuth2: %s %s" % (type(e).__name__, traceback.format_exc())) return "Unable to run oAuth2 - Have you run oAuth1?" access_token_url = '%s/oauth/access_token' % 'https://www.goodreads.com' client = oauth.Client(consumer, token) try: response, content = client.request(access_token_url, 'POST') except Exception as e: logger.error("Exception in oauth2 client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return "Error in oauth2 client request: see error log" if not response['status'].startswith('2'): return 'Invalid response [%s] from %s' % (response['status'], access_token_url) access_token = dict(parse_qsl(content)) if not PY2: access_token = {key.decode("utf-8"): access_token[key].decode("utf-8") for key in access_token} # print access_token lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] = access_token['oauth_token'] lazylibrarian.CONFIG['GR_OAUTH_SECRET'] = access_token['oauth_token_secret'] lazylibrarian.config_write('API') return "Authorisation complete"
def _get_credentials(self, key): request_token = {} request_token["oauth_token"] = lazylibrarian.TWITTER_USERNAME request_token["oauth_token_secret"] = lazylibrarian.TWITTER_PASSWORD request_token["oauth_callback_confirmed"] = "true" token = oauth.Token(request_token["oauth_token"], request_token["oauth_token_secret"]) token.set_verifier(key) logger.info("Generating and signing request for an access token using key " + key) signature_method_hmac_sha1 = oauth.SignatureMethod_HMAC_SHA1() # @UnusedVariable oauth_consumer = oauth.Consumer(key=self.consumer_key, secret=self.consumer_secret) logger.info("oauth_consumer: " + str(oauth_consumer)) oauth_client = oauth.Client(oauth_consumer, token) logger.info("oauth_client: " + str(oauth_client)) resp, content = oauth_client.request(self.ACCESS_TOKEN_URL, method="POST", body="oauth_verifier=%s" % key) logger.info("resp, content: " + str(resp) + "," + str(content)) access_token = dict(parse_qsl(content)) logger.info("access_token: " + str(access_token)) logger.info("resp[status] = " + str(resp["status"])) if resp["status"] != "200": logger.error("The request for a token with did not succeed: " + str(resp["status"])) return False else: logger.info("Your Twitter Access Token key: %s" % access_token["oauth_token"]) logger.info("Access Token secret: %s" % access_token["oauth_token_secret"]) lazylibrarian.TWITTER_USERNAME = access_token["oauth_token"] lazylibrarian.TWITTER_PASSWORD = access_token["oauth_token_secret"] return True
def get_author_info(self, authorid=None, authorname=None, refresh=False): URL = 'http://www.goodreads.com/author/show/' + authorid + '.xml?' + urllib.urlencode(self.params) author_dict = {} try: rootxml, in_cache = get_xml_request(URL) except Exception as e: logger.error("Error getting author info: %s" % e) return author_dict if rootxml is None: logger.debug("Error requesting author info") return author_dict resultxml = rootxml.find('author') if not len(resultxml): logger.warn('No author found with ID: ' + authorid) else: logger.debug("[%s] Processing info for authorID: %s" % (authorname, authorid)) # PAB added authorname to author_dict - this holds the intact name preferred by GR author_dict = { 'authorid': resultxml[0].text, 'authorlink': resultxml.find('link').text, 'authorimg': resultxml.find('image_url').text, 'authorborn': resultxml.find('born_at').text, 'authordeath': resultxml.find('died_at').text, 'totalbooks': resultxml.find('works_count').text, 'authorname': authorname } return author_dict
def create_shelf(self, shelf='lazylibrarian'): global consumer, client, token, user_id if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \ lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']: logger.warn("Goodreads create shelf error: Please authorise first") return False, 'Unauthorised' consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']), secret=str(lazylibrarian.CONFIG['GR_SECRET'])) token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET']) client = oauth.Client(consumer, token) user_id = self.getUserId() # could also pass [featured] [exclusive_flag] [sortable_flag] all default to False body = urlencode({'user_shelf[name]': shelf.lower()}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} gr_api_sleep() try: response, content = client.request('%s/user_shelves.xml' % 'https://www.goodreads.com', 'POST', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return False, "Error in client.request: see error log" if not response['status'].startswith('2'): msg = 'Failure status: %s' % response['status'] return False, msg return True, ''
def get_author_books(self, authorid=None, authorname=None, refresh=False): api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: # Cache our request request = urllib2.Request(URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) opener = urllib2.build_opener(SimpleCache.CacheHandler(".AuthorCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) api_hits = api_hits + 1 sourcexml = ElementTree.parse(resp) except Exception, e: logger.error("Error fetching author info: " + str(e))
def processAutoAdd(src_path=None): # Called to copy the book files to an auto add directory for the likes of Calibre which can't do nested dirs autoadddir = lazylibrarian.IMP_AUTOADD logger.debug('AutoAdd - Attempt to copy from [%s] to [%s]' % (src_path, autoadddir)) if not os.path.exists(autoadddir): logger.info('AutoAdd directory [%s] is missing or not set - cannot perform autoadd copy' % autoadddir) return False else: # Now try and copy all the book files into a single dir. try: names = os.listdir(src_path) # TODO : n files jpg, opf & book(s) should have same name # Caution - book may be pdf, mobi, epub or all 3. # for now simply copy all files, and let the autoadder sort it out # os.makedirs(autoadddir) #errors = [] for name in names: srcname = os.path.join(src_path, name) dstname = os.path.join(autoadddir, name) logger.debug('AutoAdd Copying named file [%s] as copy [%s] to [%s]' % (name, srcname, dstname)) try: shutil.copy2(srcname, dstname) except (IOError, os.error) as why: logger.error('AutoAdd - Failed to copy file because [%s] ' % str(why)) except OSError as why: logger.error('AutoAdd - Failed because [%s]' % str(why)) return False logger.info('Auto Add completed for [%s]' % dstname) return True
def NewzNabPlus(book=None, host=None, api_key=None, searchType=None, searchMode=None): # logger.info('[NewzNabPlus] Searching term [%s] for author [%s] and title [%s] on host [%s] for a [%s] item' % (book['searchterm'], book['authorName'], book['bookName'], host, searchType)) logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % (searchType, host, searchMode, api_key, str(book))) results = [] params = ReturnSearchTypeStructure(api_key, book, searchType, searchMode) if not str(host)[:4] == "http": host = 'http://' + host URL = host + '/api?' + urllib.urlencode(params) try: request = urllib2.Request(URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', common.USER_AGENT) opener = urllib2.build_opener(SimpleCache.CacheHandler(".ProviderCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) try: data = ElementTree.parse(resp) except (urllib2.URLError, IOError, EOFError), e: logger.warn('Error fetching data from %s: %s' % (host, e)) data = None except Exception, e: logger.error("Error 403 opening url %s" % e) data = None
def getSeriesMembers(seriesID=None): """ Ask librarything or goodreads for details on all books in a series order, bookname, authorname, workid, authorid (workid and authorid are goodreads only) Return as a list of lists """ results = [] if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': params = {"format": "xml", "key": lazylibrarian.CONFIG['GR_API']} URL = 'https://www.goodreads.com/series/' + seriesID + '?' + urlencode(params) try: rootxml, in_cache = gr_xml_request(URL) if rootxml is None: logger.debug("Error requesting series %s" % seriesID) return [] except Exception as e: logger.error("%s finding series %s: %s" % (type(e).__name__, seriesID, str(e))) return [] works = rootxml.find('series/series_works') books = works.getiterator('series_work') if books is None: logger.warn('No books found for %s' % seriesID) return [] for book in books: mydict = {} for mykey, location in [('order', 'user_position'), ('bookname', 'work/best_book/title'), ('authorname', 'work/best_book/author/name'), ('workid', 'work/id'), ('authorid', 'work/best_book/author/id') ]: if book.find(location) is not None: mydict[mykey] = book.find(location).text else: mydict[mykey] = "" results.append([mydict['order'], mydict['bookname'], mydict['authorname'], mydict['workid'], mydict['authorid']]) else: data = getBookWork(None, "SeriesPage", seriesID) if data: try: table = data.split('class="worksinseries"')[1].split('</table>')[0] rows = table.split('<tr') for row in rows: if 'href=' in row: booklink = row.split('href="')[1] bookname = booklink.split('">')[1].split('<')[0] # booklink = booklink.split('"')[0] try: authorlink = row.split('href="')[2] authorname = authorlink.split('">')[1].split('<')[0] # authorlink = authorlink.split('"')[0] order = row.split('class="order">')[1].split('<')[0] results.append([order, bookname, authorname, '', '']) except IndexError: logger.debug('Incomplete data in series table for series %s' % seriesID) except IndexError: if 'class="worksinseries"' in data: # error parsing, or just no series data available? logger.debug('Error in series table for series %s' % seriesID) return results
def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None): if not os.path.exists(dest_path): logger.debug('%s does not exist, so it\'s safe to create it' % dest_path) try: if lazylibrarian.DESTINATION_COPY: shutil.copytree(pp_path, dest_path) logger.info('Successfully copied %s to %s.' % (pp_path, dest_path)) else: shutil.move(pp_path, dest_path) logger.info('Successfully moved %s to %s.' % (pp_path, dest_path)) pp = True #try and rename the actual book file for file2 in os.listdir(dest_path): logger.debug('file extension: ' + str(file2).split('.')[-1]) if ((file2.lower().find(".jpg") <= 0) & (file2.lower().find(".opf") <= 0)): logger.debug('file: ' + str(file2)) os.rename(os.path.join(dest_path, file2), os.path.join(dest_path, bookname + '.' + str(file2).split('.')[-1])) try: os.chmod(dest_path, 0777); except Exception, e: logger.info("Could not chmod path: " + str(file2)); except OSError: logger.error('Could not create destination folder or rename the downloaded ebook. Check permissions of: ' + lazylibrarian.DESTINATION_DIR) pp = False else: pp = False return pp
def find_author_id(self, refresh=False): author = self.name # Goodreads doesn't like initials followed by spaces, # eg "M L Hamilton", needs "M. L. Hamilton" or "M.L.Hamilton" # but DOES need spaces if not initials eg "Tom.Holt" fails, but "Tom Holt" works if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') URL = 'http://www.goodreads.com/api/author_url/' + urllib.quote(author) + '?' + urllib.urlencode(self.params) logger.debug("Searching for author with name: %s" % author) authorlist = [] try: rootxml, in_cache = self.get_request(URL) except Exception as e: logger.error("Error finding authorid: " + str(e) + str(URL)) return authorlist resultxml = rootxml.getiterator('author') if not len(resultxml): logger.warn('No authors found with name: %s' % author) else: # In spite of how this looks, goodreads only returns one result, even if there are multiple matches # we just have to hope we get the right one. eg search for "James Lovelock" returns "James E. Lovelock" # who only has one book listed under googlebooks, the rest are under "James Lovelock" # goodreads has all his books under "James E. Lovelock". Can't come up with a good solution yet. # For now we'll have to let the user handle this by selecting/adding the author manually for author in resultxml: authorid = author.attrib.get("id") authorname = author[0].text authorlist = self.get_author_info(authorid, authorname, refresh) return authorlist
def _get_credentials(self, key): request_token = {} request_token['oauth_token'] = lazylibrarian.TWITTER_USERNAME request_token['oauth_token_secret'] = lazylibrarian.TWITTER_PASSWORD request_token['oauth_callback_confirmed'] = 'true' token = oauth.Token(request_token['oauth_token'], request_token['oauth_token_secret']) token.set_verifier(key) logger.info('Generating and signing request for an access token using key ' + key) signature_method_hmac_sha1 = oauth.SignatureMethod_HMAC_SHA1() # @UnusedVariable oauth_consumer = oauth.Consumer(key=self.consumer_key, secret=self.consumer_secret) logger.info('oauth_consumer: ' + str(oauth_consumer)) oauth_client = oauth.Client(oauth_consumer, token) logger.info('oauth_client: ' + str(oauth_client)) resp, content = oauth_client.request(self.ACCESS_TOKEN_URL, method='POST', body='oauth_verifier=%s' % key) logger.info('resp, content: ' + str(resp) + ',' + str(content)) access_token = dict(parse_qsl(content)) logger.info('access_token: ' + str(access_token)) logger.info('resp[status] = ' + str(resp['status'])) if resp['status'] != '200': logger.error('The request for a token with did not succeed: ' + str(resp['status'])) return False else: logger.info('Your Twitter Access Token key: %s' % access_token['oauth_token']) logger.info('Access Token secret: %s' % access_token['oauth_token_secret']) lazylibrarian.TWITTER_USERNAME = access_token['oauth_token'] lazylibrarian.TWITTER_PASSWORD = access_token['oauth_token_secret'] return True
def get_shelf_list(self): global consumer, client, token, user_id if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \ lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']: logger.warn("Goodreads get shelf error: Please authorise first") return [] else: # # loop over each page of shelves # loop over each shelf # add shelf to list # consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']), secret=str(lazylibrarian.CONFIG['GR_SECRET'])) token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET']) client = oauth.Client(consumer, token) user_id = self.getUserId() current_page = 0 shelves = [] page_shelves = 1 while page_shelves: current_page = current_page + 1 page_shelves = 0 shelf_template = Template('${base}/shelf/list.xml?user_id=${user_id}&key=${key}&page=${page}') body = urlencode({}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} request_url = shelf_template.substitute(base='https://www.goodreads.com', user_id=user_id, page=current_page, key=lazylibrarian.CONFIG['GR_API']) gr_api_sleep() try: response, content = client.request(request_url, 'GET', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return shelves if not response['status'].startswith('2'): logger.error('Failure status: %s for page %s' % (response['status'], current_page)) if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync: logger.debug(request_url) else: xmldoc = xml.dom.minidom.parseString(content) shelf_list = xmldoc.getElementsByTagName('shelves')[0] for item in shelf_list.getElementsByTagName('user_shelf'): shelf_name = item.getElementsByTagName('name')[0].firstChild.nodeValue shelf_count = item.getElementsByTagName('book_count')[0].firstChild.nodeValue shelf_exclusive = item.getElementsByTagName('exclusive_flag')[0].firstChild.nodeValue shelves.append({'name': shelf_name, 'books': shelf_count, 'exclusive': shelf_exclusive}) page_shelves += 1 if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync: logger.debug('Shelf %s : %s: Exclusive %s' % (shelf_name, shelf_count, shelf_exclusive)) if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync: logger.debug('Found %s shelves on page %s' % (page_shelves, current_page)) logger.debug('Found %s shelves on %s page%s' % (len(shelves), current_page - 1, plural(current_page - 1))) # print shelves return shelves
def setTorrentLabel(result): logger.debug('Deluge: Setting label') label = lazylibrarian.CONFIG['DELUGE_LABEL'] if not any(delugeweb_auth): _get_auth() if ' ' in label: logger.error( 'Deluge: Invalid label. Label can\'t contain spaces - replacing with underscores' ) label = label.replace(' ', '_') if label: # check if label already exists and create it if not post_data = json.dumps({ "method": 'label.get_labels', "params": [], "id": 3 }) if PY2: post_data = post_data.encode(lazylibrarian.SYS_ENCODING) response = requests.post(delugeweb_url, data=post_data, cookies=delugeweb_auth, headers=headers) labels = json.loads(response.text)['result'] if labels: if label not in labels: try: logger.debug( 'Deluge: %s label doesn\'t exist in Deluge, let\'s add it' % label) post_data = json.dumps({ "method": 'label.add', "params": [label], "id": 4 }) if PY2: post_data = post_data.encode( lazylibrarian.SYS_ENCODING) _ = requests.post(delugeweb_url, data=post_data, cookies=delugeweb_auth, headers=headers) logger.debug('Deluge: %s label added to Deluge' % label) except Exception as err: logger.error('Deluge %s: Setting label failed: %s' % (type(err).__name__, str(err))) formatted_lines = traceback.format_exc().splitlines() logger.error('; '.join(formatted_lines)) # add label to torrent post_data = json.dumps({ "method": 'label.set_torrent', "params": [result['hash'], label], "id": 5 }) if PY2: post_data = post_data.encode(lazylibrarian.SYS_ENCODING) response = requests.post(delugeweb_url, data=post_data, cookies=delugeweb_auth, headers=headers) logger.debug('Deluge: %s label added to torrent' % label) return not json.loads(response.text)['error'] else: logger.debug('Deluge: Label plugin not detected') return False else: logger.debug('Deluge: No Label set') return True
def downloadResult(match, book): """ match: best result from search providers book: book we are downloading return: True if already snatched, False if failed to snatch, >True if we snatched it """ try: myDB = database.DBConnection() resultTitle = match[1] newValueDict = match[2] controlValueDict = match[3] if book['library'] == 'AudioBook': auxinfo = 'AudioBook' else: # elif book['library'] == 'eBook': auxinfo = 'eBook' if auxinfo == 'eBook': snatchedbooks = myDB.match( 'SELECT BookID from books WHERE BookID=? and Status="Snatched"', (newValueDict["BookID"], )) else: snatchedbooks = myDB.match( 'SELECT BookID from books WHERE BookID=? and AudioStatus="Snatched"', (newValueDict["BookID"], )) if snatchedbooks: logger.debug('%s %s already marked snatched' % (book['authorName'], book['bookName'])) return True # someone else already found it else: myDB.upsert("wanted", newValueDict, controlValueDict) if 'libgen' in newValueDict[ "NZBprov"]: # for libgen we use direct download links snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], resultTitle, auxinfo) elif newValueDict['NZBmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], auxinfo) elif newValueDict['NZBmode'] == 'nzb': snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], auxinfo) else: logger.error( 'Unhandled NZBmode [%s] for %s' % (newValueDict['NZBmode'], controlValueDict["NZBurl"])) snatch = False if snatch: logger.info('Downloading %s %s from %s' % (auxinfo, newValueDict["NZBtitle"], newValueDict["NZBprov"])) notify_snatch("%s %s from %s at %s" % (auxinfo, newValueDict["NZBtitle"], newValueDict["NZBprov"], now())) custom_notify_snatch(newValueDict["BookID"]) # at this point we could add NZBprov to the blocklist with a short timeout, a second or two? # This would implement a round-robin search system. Blocklist with an incremental counter. # If number of active providers == number blocklisted, so no unblocked providers are left, # either sleep for a while, or unblock the one with the lowest counter. scheduleJob(action='Start', target='processDir') return True + True # we found it return False except Exception: logger.error('Unhandled exception in downloadResult: %s' % traceback.format_exc())
def getWorkSeries(bookID=None): """ Return the series names and numbers in series for the given id as a list of tuples For goodreads the id is a WorkID, for librarything it's a BookID """ myDB = database.DBConnection() serieslist = [] if not bookID: logger.error("getWorkSeries - No bookID") return serieslist if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': URL = "https://www.goodreads.com/work/" seriesurl = URL + bookID + "/series?format=xml&key=" + lazylibrarian.CONFIG[ 'GR_API'] rootxml, in_cache = gr_xml_request(seriesurl) if rootxml is None: logger.warn('Error getting XML for %s' % seriesurl) else: resultxml = rootxml.getiterator('series_work') for item in resultxml: try: seriesname = item.find('./series/title').text seriesname = seriesname.strip('\n').strip('\n').strip() seriesid = item.find('./series/id').text seriesnum = item.find('./user_position').text except (KeyError, AttributeError): continue if seriesname and seriesid: seriesname = cleanName(unaccented(seriesname), '&/') seriesnum = cleanName(unaccented(seriesnum)) serieslist.append((seriesid, seriesnum, seriesname)) match = myDB.match( 'SELECT SeriesID from series WHERE SeriesName=?', (seriesname, )) if not match: myDB.action( 'INSERT INTO series VALUES (?, ?, ?, ?, ?)', (seriesid, seriesname, "Active", 0, 0)) elif match['SeriesID'] != seriesid: myDB.action( 'UPDATE series SET SeriesID=? WHERE SeriesName=?', (seriesid, seriesname)) else: work = getBookWork(bookID, "Series") if work: try: slist = work.split('<h3><b>Series:')[1].split( '</h3>')[0].split('<a href="/series/') for item in slist[1:]: try: series = item.split('">')[1].split('</a>')[0] if series and '(' in series: seriesnum = series.split('(')[1].split( ')')[0].strip() series = series.split(' (')[0].strip() else: seriesnum = '' series = series.strip() seriesname = cleanName(unaccented(series), '&/') seriesnum = cleanName(unaccented(seriesnum)) serieslist.append(('', seriesnum, seriesname)) except IndexError: pass except IndexError: pass return serieslist
def getSeriesMembers(seriesID=None): """ Ask librarything or goodreads for details on all books in a series order, bookname, authorname, workid, authorid (workid and authorid are goodreads only) Return as a list of lists """ results = [] if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': params = {"format": "xml", "key": lazylibrarian.CONFIG['GR_API']} URL = 'https://www.goodreads.com/series/' + seriesID + '?' + urlencode( params) try: rootxml, in_cache = gr_xml_request(URL) if rootxml is None: logger.debug("Error requesting series %s" % seriesID) return [] except Exception as e: logger.error("%s finding series %s: %s" % (type(e).__name__, seriesID, str(e))) return [] works = rootxml.find('series/series_works') books = works.getiterator('series_work') if books is None: logger.warn('No books found for %s' % seriesID) return [] for book in books: mydict = {} for mykey, location in [('order', 'user_position'), ('bookname', 'work/best_book/title'), ('authorname', 'work/best_book/author/name'), ('workid', 'work/id'), ('authorid', 'work/best_book/author/id')]: if book.find(location) is not None: mydict[mykey] = book.find(location).text else: mydict[mykey] = "" results.append([ mydict['order'], mydict['bookname'], mydict['authorname'], mydict['workid'], mydict['authorid'] ]) else: data = getBookWork(None, "SeriesPage", seriesID) if data: try: table = data.split('class="worksinseries"')[1].split( '</table>')[0] rows = table.split('<tr') for row in rows: if 'href=' in row: booklink = row.split('href="')[1] bookname = booklink.split('">')[1].split('<')[0] # booklink = booklink.split('"')[0] try: authorlink = row.split('href="')[2] authorname = authorlink.split('">')[1].split( '<')[0] # authorlink = authorlink.split('"')[0] order = row.split('class="order">')[1].split( '<')[0] results.append( [order, bookname, authorname, '', '']) except IndexError: logger.debug( 'Incomplete data in series table for series %s' % seriesID) except IndexError: if 'class="worksinseries"' in data: # error parsing, or just no series data available? logger.debug('Error in series table for series %s' % seriesID) return results
def getSeriesAuthors(seriesid): """ Get a list of authors contributing to a series and import those authors (and their books) into the database Return how many authors you added """ myDB = database.DBConnection() result = myDB.match("select count(*) as counter from authors") start = int(result['counter']) result = myDB.match('select SeriesName from series where SeriesID=?', (seriesid, )) seriesname = result['SeriesName'] members = getSeriesMembers(seriesid) dic = { u'\u2018': "", u'\u2019': "", u'\u201c': '', u'\u201d': '', "'": "", '"': '' } if members: myDB = database.DBConnection() for member in members: # order = member[0] bookname = member[1] authorname = member[2] # workid = member[3] authorid = member[4] bookname = replace_all(bookname, dic) if not authorid: # goodreads gives us all the info we need, librarything/google doesn't base_url = 'https://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} searchname = bookname + ' ' + authorname searchname = cleanName(unaccented(searchname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) try: rootxml, in_cache = gr_xml_request(set_url) if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: try: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) except (KeyError, AttributeError): booktitle = "" book_fuzz = fuzz.token_set_ratio( booktitle, bookname) if book_fuzz >= 98: try: author = item.find( './best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find( './best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug( "Author Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: # try again with title only searchname = cleanName(unaccented(bookname)) if PY2: searchname = searchname.encode( lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode( params) rootxml, in_cache = gr_xml_request(set_url) if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) book_fuzz = fuzz.token_set_ratio( booktitle, bookname) if book_fuzz >= 98: try: author = item.find( './best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find( './best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug( "Title Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname)) except Exception as e: logger.error("Error finding goodreads results: %s %s" % (type(e).__name__, str(e))) if authorid: lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid) result = myDB.match("select count(*) as counter from authors") finish = int(result['counter']) newauth = finish - start logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname)) return newauth
def getBookAuthors(bookid): """ Get a list of authors contributing to a book from the goodreads bookpage or the librarything bookwork file """ authorlist = [] if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': params = {"key": lazylibrarian.CONFIG['GR_API']} URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urlencode( params) try: rootxml, in_cache = gr_xml_request(URL) if rootxml is None: logger.debug("Error requesting book %s" % bookid) return [] except Exception as e: logger.error("%s finding book %s: %s" % (type(e).__name__, bookid, str(e))) return [] book = rootxml.find('book') authors = book.find('authors') anames = authors.getiterator('author') if anames is None: logger.warn('No authors found for %s' % bookid) return [] for aname in anames: author = {} if aname.find('id') is not None: author['id'] = aname.find('id').text if aname.find('name') is not None: author['name'] = aname.find('name').text if aname.find('role') is not None: role = aname.find('role').text if not role: role = '' author['role'] = role if author: authorlist.append(author) else: data = getBookWork(bookid, "Authors") if data: try: data = data.split('otherauthors_container')[1].split( '</table>')[0].split('<table')[1].split('>', 1)[1] except IndexError: data = '' authorlist = [] if data and 'Work?' in data: try: rows = data.split('<tr') for row in rows[2:]: author = {} col = row.split('<td>') author['name'] = col[1].split('">')[1].split('<')[0] author['role'] = col[2].split('<')[0] author['type'] = col[3].split('<')[0] author['work'] = col[4].split('<')[0] author['status'] = col[5].split('<')[0] authorlist.append(author) except IndexError: logger.debug('Error parsing authorlist for %s' % bookid) return authorlist
def getBookWork(bookID=None, reason=None, seriesID=None): """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given preferably from the cache. If not already cached cache the results Return None if no workpage/seriespage available """ global ALLOW_NEW, LAST_NEW if not bookID and not seriesID: logger.error("getBookWork - No bookID or seriesID") return None if not reason: reason = "" myDB = database.DBConnection() if bookID: cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' cacheLocation = "WorkCache" item = myDB.match(cmd, (bookID, )) else: cmd = 'select SeriesName from series where SeriesID=?' cacheLocation = "SeriesCache" item = myDB.match(cmd, (seriesID, )) if item: cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if bookID: workfile = os.path.join(cacheLocation, str(bookID) + '.html') else: workfile = os.path.join(cacheLocation, str(seriesID) + '.html') # does the workpage need to expire? For now only expire if it was an error page # (small file) or a series page as librarything might get better info over time, more series members etc if os.path.isfile(workfile): if seriesID or os.path.getsize(workfile) < 500: cache_modified_time = os.stat(workfile).st_mtime time_now = time.time() expiry = lazylibrarian.CONFIG[ 'CACHE_AGE'] * 24 * 60 * 60 # expire cache after this many seconds if cache_modified_time < time_now - expiry: # Cache entry is too old, delete it if ALLOW_NEW: os.remove(workfile) if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 if bookID: if reason: logger.debug( "getBookWork: Returning Cached entry for %s %s" % (bookID, reason)) else: logger.debug( "getBookWork: Returning Cached workpage for %s" % bookID) else: logger.debug( "getBookWork: Returning Cached seriespage for %s" % item['seriesName']) if PY2: with open(workfile, "r") as cachefile: source = cachefile.read() else: # noinspection PyArgumentList with open(workfile, "r", errors="backslashreplace") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 if not ALLOW_NEW: # don't nag. Show message no more than every 12 hrs timenow = int(time.time()) if check_int(LAST_NEW, 0) + 43200 < timenow: logger.warn("New WhatWork is disabled") LAST_NEW = timenow return None if bookID: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \ (quote_plus(author), quote_plus(title)) else: seriesname = safe_unicode(item['seriesName']) if PY2: seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/series/%s' % quote_plus( seriesname) librarything_wait() result, success = fetchURL(URL) if bookID and success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split( '</error>')[0] except IndexError: errmsg = "Unknown Error" # if no workpage link, try isbn instead if item['BookISBN']: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item[ 'BookISBN'] librarything_wait() result, success = fetchURL(URL) if success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split( '</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: # no workpage link found by isbn try: errmsg = result.split('<error>')[1].split( '</error>')[0] except IndexError: errmsg = "Unknown Error" # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug("Librarything: [%s] for ISBN %s" % (errmsg, item['BookISBN'])) success = True else: # still cache if whatwork returned a result without a link, so we don't keep retrying msg = "Librarything: [" + errmsg + "] for " logger.debug(msg + item['AuthorName'] + ' ' + item['BookName']) success = True if success: with open(workfile, "w") as cachefile: cachefile.write(result) if bookID: logger.debug("getBookWork: Caching workpage for %s" % workfile) else: logger.debug( "getBookWork: Caching series page for %s" % workfile) # return None if we got an error page back if '</request><error>' in result: return None return result else: if bookID: logger.debug( "getBookWork: Unable to cache workpage, got %s" % result) else: logger.debug( "getBookWork: Unable to cache series page, got %s" % result) return None else: if bookID: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) else: logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID) return None
def request_response(url, method="get", auto_raise=True, whitelist_status_code=None, **kwargs): """ Convenient wrapper for `requests.get', which will capture the exceptions and log them. On success, the Response object is returned. In case of a exception, None is returned. Additionally, there is support for rate limiting. To use this feature, supply a tuple of (lock, request_limit). The lock is used to make sure no other request with the same lock is executed. The request limit is the minimal time between two requests (and so 1/request_limit is the number of requests per seconds). """ # Convert whitelist_status_code to a list if needed if whitelist_status_code and not isinstance(whitelist_status_code, list): whitelist_status_code = [whitelist_status_code] # Disable verification of SSL certificates if requested. Note: this could # pose a security issue! # kwargs["verify"] = bool(lazylibrarian.VERIFY_SSL_CERT) # Map method to the request.XXX method. This is a simple hack, but it # allows requests to apply more magic per method. See lib/requests/api.py. request_method = getattr(requests, method.lower()) try: # Request URL and wait for response # with lock: logger.debug("Requesting URL via %s method: %s" % (method.upper(), url)) response = request_method(url, **kwargs) # If status code != OK, then raise exception, except if the status code # is white listed. if whitelist_status_code and auto_raise: if response.status_code not in whitelist_status_code: try: response.raise_for_status() except: logger.debug("Response status code %d is not white " "listed, raised exception" % response.status_code) raise elif auto_raise: response.raise_for_status() return response except requests.exceptions.SSLError as e: logger.error("SSL error raised during connection: %s" % e) except requests.ConnectionError: logger.error("Unable to connect to remote host. Check if the remote " "host is up and running.") except requests.Timeout: logger.error( "Request timed out. The remote host did not respond timely.") except requests.HTTPError as e: if e.response is not None: if e.response.status_code >= 500: cause = "remote server error" elif e.response.status_code >= 400: cause = "local client error" else: # I don't think we will end up here, but for completeness cause = "unknown" logger.error("Request raise HTTP error with status code %d (%s)." % (e.response.status_code, cause)) # Debug response server_message(e.response) else: logger.error("Request raised HTTP error.") except requests.RequestException as e: logger.error("Request raised exception: %s" % e)
url = item['links'][1]['href'] size = int(item['links'][1]['length']) if minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': "KAT", 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.info('Found %s. Size: %s' % (title, size)) except Exception, e: logger.error(u"An unknown error occurred in the KAT parser: %s" % e) return results def UsenetCrawler(book=None, searchType=None): results = [] #print book.keys() results = NewzNabPlus(book, lazylibrarian.USENETCRAWLER_HOST, lazylibrarian.USENETCRAWLER_API, searchType) return results def OLDUsenetCrawler(book=None):
def search_book(books=None, library=None): """ books is a list of new books to add, or None for backlog search library is "eBook" or "AudioBook" or None to search all book types """ # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if books is None: threading.currentThread().name = "SEARCHALLBOOKS" else: threading.currentThread().name = "SEARCHBOOKS" myDB = database.DBConnection() searchlist = [] searchbooks = [] if books is None: # We are performing a backlog search cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus ' cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") ' cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc' results = myDB.select(cmd) for terms in results: searchbooks.append(terms) else: # The user has added a new book for book in books: if not book['bookid'] in ['booklang', 'library', 'ignored']: cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus ' cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID' results = myDB.select(cmd, (book['bookid'], )) if results: for terms in results: searchbooks.append(terms) else: logger.debug( "SearchBooks - BookID %s is not in the database" % book['bookid']) if len(searchbooks) == 0: logger.debug("SearchBooks - No books to search for") return nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR( ) + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT() if nprov == 0: logger.debug("SearchBooks - No providers to search") return modelist = [] if lazylibrarian.USE_NZB(): modelist.append('nzb') if lazylibrarian.USE_TOR(): modelist.append('tor') if lazylibrarian.USE_DIRECT(): modelist.append('direct') if lazylibrarian.USE_RSS(): modelist.append('rss') logger.info('Searching %s provider%s %s for %i book%s' % (nprov, plural(nprov), str(modelist), len(searchbooks), plural(len(searchbooks)))) for searchbook in searchbooks: # searchterm is only used for display purposes searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName'] if searchbook['BookSub']: searchterm = searchterm + ': ' + searchbook['BookSub'] if library is None or library == 'eBook': if searchbook['Status'] == "Wanted": searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "eBook", "searchterm": searchterm }) if library is None or library == 'AudioBook': if searchbook['AudioStatus'] == "Wanted": searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "AudioBook", "searchterm": searchterm }) # only get rss results once per run, as they are not search specific rss_resultlist = None if 'rss' in modelist: rss_resultlist, nprov = IterateOverRSSSites() if not nprov: modelist.remove('rss') book_count = 0 for book in searchlist: matches = [] for mode in modelist: # first attempt, try author/title in category "book" if book['library'] == 'AudioBook': searchtype = 'audio' else: searchtype = 'book' resultlist = None if mode == 'nzb': resultlist, nprov = IterateOverNewzNabSites( book, searchtype) if not nprov: logger.debug("No active nzb providers found") if 'nzb' in modelist: modelist.remove('nzb') elif mode == 'tor': resultlist, nprov = IterateOverTorrentSites( book, searchtype) if not nprov: logger.debug("No active tor providers found") if 'tor' in modelist: modelist.remove('tor') elif mode == 'direct': resultlist, nprov = IterateOverDirectSites( book, searchtype) if not nprov: logger.debug("No active direct providers found") if 'direct' in modelist: modelist.remove('direct') elif mode == 'rss': if rss_resultlist: resultlist = rss_resultlist else: logger.debug("No active rss providers found") if 'rss' in modelist: modelist.remove('rss') if resultlist: match = findBestResult(resultlist, book, searchtype, mode) else: match = None # if you can't find the book, try author/title without any "(extended details, series etc)" if not goodEnough(match) and '(' in book['bookName']: searchtype = 'short' + searchtype if mode == 'nzb': resultlist, nprov = IterateOverNewzNabSites( book, searchtype) if not nprov: logger.debug("No active nzb providers found") if 'nzb' in modelist: modelist.remove('nzb') elif mode == 'tor': resultlist, nprov = IterateOverTorrentSites( book, searchtype) if not nprov: logger.debug("No active tor providers found") if 'tor' in modelist: modelist.remove('tor') elif mode == 'direct': resultlist, nprov = IterateOverDirectSites( book, searchtype) if not nprov: logger.debug("No active direct providers found") if 'direct' in modelist: modelist.remove('direct') elif mode == 'rss': resultlist = rss_resultlist if resultlist: match = findBestResult(resultlist, book, searchtype, mode) else: match = None # if you can't find the book under "books", you might find under general search # general search is the same as booksearch for torrents and rss, no need to check again if not goodEnough(match): searchtype = 'general' if mode == 'nzb': resultlist, nprov = IterateOverNewzNabSites( book, searchtype) if not nprov: logger.debug("No active nzb providers found") if 'nzb' in modelist: modelist.remove('nzb') if resultlist: match = findBestResult(resultlist, book, searchtype, mode) else: match = None # if still not found, try general search again without any "(extended details, series etc)" if not goodEnough(match) and '(' in book['searchterm']: searchtype = 'shortgeneral' if mode == 'nzb': resultlist, _ = IterateOverNewzNabSites( book, searchtype) if not nprov: logger.debug("No active nzb providers found") if 'nzb' in modelist: modelist.remove('nzb') if resultlist: match = findBestResult(resultlist, book, searchtype, mode) else: match = None if not goodEnough(match): logger.info( "%s Searches for %s %s returned no results." % (mode.upper(), book['library'], book['searchterm'])) else: logger.info("Found %s result: %s %s%%, %s priority %s" % (mode.upper(), searchtype, match[0], match[2]['NZBprov'], match[4])) matches.append(match) if matches: highest = max(matches, key=lambda s: (s[0], s[4])) # sort on percentage and priority logger.info("Requesting %s download: %s%% %s: %s" % (book['library'], highest[0], highest[2]['NZBprov'], highest[1])) if downloadResult(highest, book) > True: book_count += 1 # we found it logger.info("Search for Wanted items complete, found %s book%s" % (book_count, plural(book_count))) except Exception: logger.error('Unhandled exception in search_book: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def getBookCover(bookID=None, src=None): """ Return link to a local file containing a book cover image for a bookid, and which source used. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. cover.jpg if we have the book 3. LibraryThing cover image (if you have a dev key) 4. LibraryThing whatwork (if available) 5. Goodreads search (if book was imported from goodreads) 6. Google isbn search (if google has a link to book for sale) 7. Google images search (if lazylibrarian config allows) src = cache, cover, goodreads, librarything, whatwork, googleisbn, googleimage Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None, src if not src: src = '' logger.debug("Getting %s cover for %s" % (src, bookID)) # noinspection PyBroadException try: cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "book", bookID + '.jpg') if not src or src == 'cache' or src == 'current': if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 coverlink = 'cache/book/' + bookID + '.jpg' return coverlink, 'cache' elif src: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 return None, src myDB = database.DBConnection() if not src or src == 'cover': item = myDB.match('select BookFile from books where bookID=?', (bookID, )) if item: bookfile = item['BookFile'] if bookfile: # we may have a cover.jpg in the same folder bookdir = os.path.dirname(bookfile) coverimg = os.path.join(bookdir, "cover.jpg") if os.path.isfile(coverimg): if src: coverfile = os.path.join(cachedir, "book", bookID + '_cover.jpg') coverlink = 'cache/book/' + bookID + '_cover.jpg' logger.debug("Caching cover.jpg for %s" % bookID) else: coverlink = 'cache/book/' + bookID + '.jpg' logger.debug("Caching cover.jpg for %s" % coverfile) _ = safe_copy(coverimg, coverfile) return coverlink, src if src: logger.debug('No cover.jpg found for %s' % bookID) return None, src # see if librarything has a cover if not src or src == 'librarything': if lazylibrarian.CONFIG['LT_DEVKEY']: cmd = 'select BookISBN from books where bookID=?' item = myDB.match(cmd, (bookID, )) if item and item['BookISBN']: img = 'https://www.librarything.com/devkey/%s/large/isbn/%s' % ( lazylibrarian.CONFIG['LT_DEVKEY'], item['BookISBN']) if src: coverlink, success, _ = cache_img( "book", bookID + '_lt', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug( 'Got an empty librarything image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching librarything cover for %s" % bookID) return coverlink, 'librarything' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No isbn for %s" % bookID) if src: return None, src # see if librarything workpage has a cover if not src or src == 'whatwork': work = getBookWork(bookID, "Cover") if work: try: img = work.split('workCoverImage')[1].split('="')[1].split( '"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img( "book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug( 'Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug( 'workCoverImage not found in work page for %s' % bookID) try: img = work.split('og:image')[1].split('="')[1].split( '"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img( "book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug( 'Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) if success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('og:image not found in work page for %s' % bookID) else: logger.debug('No work page for %s' % bookID) if src: return None, src cmd = 'select BookName,AuthorName,BookLink from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' item = myDB.match(cmd, (bookID, )) safeparams = '' booklink = '' if item: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = quote_plus("%s %s" % (author, title)) # try to get a cover from goodreads if not src or src == 'goodreads': if booklink and 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/ # 1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover result, success = fetchURL(booklink) if success: try: img = result.split('id="coverImage"')[1].split( 'src="')[1].split('"')[0] except IndexError: try: img = result.split('og:image')[1].split( '="')[1].split('"')[0] except IndexError: img = None if img and img.startswith( 'http' ) and 'nocover' not in img and 'nophoto' not in img: if src == 'goodreads': coverlink, success, _ = cache_img( "book", bookID + '_gr', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug( 'Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug( "Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'goodreads' else: logger.debug( "Error getting goodreads image for %s, [%s]" % (img, coverlink)) else: logger.debug( "No image found in goodreads page for %s" % bookID) else: logger.debug("Error getting goodreads page %s, [%s]" % (booklink, result)) if src: return None, src if not src or src == 'googleisbn': # try a google isbn page search... # there is no image returned if google doesn't have a link for buying the book if safeparams: URL = "http://www.google.com/search?q=ISBN+" + safeparams result, success = fetchURL(URL) if success: try: img = result.split('imgurl=')[1].split('&imgrefurl')[0] except IndexError: try: img = result.split('img src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img( "book", bookID + '_gi', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug( 'Got an empty google image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug( "Caching google isbn cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google isbn' else: logger.debug( "Error caching google image %s, [%s]" % (img, coverlink)) else: logger.debug( "No image found in google isbn page for %s" % bookID) else: logger.debug("Failed to fetch url from google") else: logger.debug("No parameters for google isbn search for %s" % bookID) if src: return None, src if src == 'googleimage' or not src and lazylibrarian.CONFIG[ 'IMP_GOOGLEIMAGE']: # try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type if safeparams: URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" img = None result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split( 'src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img( "book", bookID + '_gb', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug( 'Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google search cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google image' else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % bookID) else: logger.debug("No parameters for google image search for %s" % bookID) if src: return None, src logger.debug("No image found from any configured source") return None, src except Exception: logger.error('Unhandled exception in getBookCover: %s' % traceback.format_exc()) return None, src
def update(): if lazylibrarian.INSTALL_TYPE == 'win': logger.debug('(update) Windows install - no update available') logger.info('(update) Windows .exe updating not supported yet.') #pass elif lazylibrarian.INSTALL_TYPE == 'git': branch = getCurrentGitBranch() output, err = runGit('stash clear') output, err = runGit('pull origin ' + branch) if not output: logger.error('(update) Couldn\'t download latest version') for line in output.split('\n'): if 'Already up-to-date.' in line: logger.info('(update) No update available, not updating') logger.info('(update) Output: ' + str(output)) elif line.endswith('Aborting.'): logger.error('(update) Unable to update from git: ' + line) logger.info('(update) Output: ' + str(output)) elif lazylibrarian.INSTALL_TYPE == 'source': #As this is a non GIT install, we assume that the comparison is #always to master. branch = lazylibrarian.CURRENT_BRANCH tar_download_url = 'https://github.com/%s/%s/tarball/%s' % ( lazylibrarian.GIT_USER, lazylibrarian.GIT_REPO, lazylibrarian.GIT_BRANCH) update_dir = os.path.join(lazylibrarian.PROG_DIR, 'update') version_path = os.path.join(lazylibrarian.PROG_DIR, 'version.txt') try: logger.info('(update) Downloading update from: ' + tar_download_url) data = urllib2.urlopen(tar_download_url) except (IOError, URLError): logger.error("(update) Unable to retrieve new version from " + tar_download_url + ", can't update") return download_name = data.geturl().split('/')[-1] tar_download_path = os.path.join(lazylibrarian.PROG_DIR, download_name) # Save tar to disk f = open(tar_download_path, 'wb') f.write(data.read()) f.close() # Extract the tar to update folder logger.info('(update) Extracing file' + tar_download_path) tar = tarfile.open(tar_download_path) tar.extractall(update_dir) tar.close() # Delete the tar.gz logger.info('(update) Deleting file' + tar_download_path) os.remove(tar_download_path) # Find update dir name update_dir_contents = [ x for x in os.listdir(update_dir) if os.path.isdir(os.path.join(update_dir, x)) ] if len(update_dir_contents) != 1: logger.error(u"(update) Invalid update data, update failed: " + str(update_dir_contents)) return content_dir = os.path.join(update_dir, update_dir_contents[0]) # walk temp folder and move files to main folder for dirname, dirnames, filenames in os.walk(content_dir): dirname = dirname[len(content_dir) + 1:] for curfile in filenames: old_path = os.path.join(content_dir, dirname, curfile) new_path = os.path.join(lazylibrarian.PROG_DIR, dirname, curfile) if os.path.isfile(new_path): os.remove(new_path) os.renames(old_path, new_path) # Update version.txt updateVersionFile(lazylibrarian.LATEST_VERSION) else: logger.error("(update) Cannot perform update - Install Type not set") return
def search_wishlist(): if not (lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') scheduleJob(action='Stop', target='search_wishlist') return try: threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "SEARCHWISHLIST" myDB = database.DBConnection() resultlist, wishproviders = IterateOverWishLists() new_books = 0 if not wishproviders: logger.debug('No wishlists are set') scheduleJob(action='Stop', target='search_wishlist') return # No point in continuing # for each item in resultlist, add to database if necessary, and mark as wanted logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist)))) for book in resultlist: # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid) # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks # not sure if anyone would use a goodreads wishlist if not using goodreads interface... if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book[ 'rss_bookid']: bookmatch = myDB.match( 'select Status,BookName from books where bookid=?', (book['rss_bookid'], )) if bookmatch: bookstatus = bookmatch['Status'] bookname = bookmatch['BookName'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info('Found book %s, already marked as "%s"' % (bookname, bookstatus)) else: # skipped/ignored logger.info('Found book %s, marking as "Wanted"' % bookname) controlValueDict = {"BookID": book['rss_bookid']} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 else: import_book(book['rss_bookid']) new_books += 1 else: item = {} results = None item['Title'] = book['rss_title'] if book['rss_bookid']: item['BookID'] = book['rss_bookid'] if book['rss_isbn']: item['ISBN'] = book['rss_isbn'] bookmatch = finditem(item, book['rss_author']) if bookmatch: # it's already in the database authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info( 'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info( 'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 else: # not in database yet if book['rss_isbn']: results = search_for(book['rss_isbn']) if results: result = results[0] # type: dict if result['isbn_fuzz'] > check_int( lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.info( "Found (%s%%) %s: %s" % (result['isbn_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) new_books += 1 bookmatch = True if not results: searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName( book['rss_author'])) results = search_for(unaccented(searchterm)) if results: result = results[0] # type: dict if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \ and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.info( "Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) new_books += 1 bookmatch = True if not bookmatch: msg = "Skipping book %s by %s" % (item['Title'], book['rss_author']) if not results: msg += ', No results returned' logger.warn(msg) else: msg += ', No match found' logger.warn(msg) result = results[0] # type: dict msg = "Closest match (%s%% %s%%) %s: %s" % ( result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) if new_books: logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books))) except Exception: logger.error('Unhandled exception in search_wishlist: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def createMagCover(issuefile=None, refresh=False): if lazylibrarian.CONFIG[ 'IMP_CONVERT'] == 'None': # special flag to say "no covers required" return if issuefile is None or not os.path.isfile(issuefile): logger.debug('No issuefile %s' % issuefile) return base, extn = os.path.splitext(issuefile) if not extn: logger.debug('Unable to create cover for %s, no extension?' % issuefile) return coverfile = base + '.jpg' if os.path.isfile(coverfile): if refresh: os.remove(coverfile) else: logger.debug('Cover for %s exists' % issuefile) return # quit if cover already exists and we didn't want to refresh logger.debug('Creating cover for %s' % issuefile) data = '' # result from unzip or unrar extn = extn.lower() if extn in ['.cbz', '.epub']: try: data = zipfile.ZipFile(issuefile) except Exception as why: logger.error("Failed to read zip file %s, %s %s" % (issuefile, type(why).__name__, str(why))) data = '' elif extn in ['.cbr']: try: # unrar will complain if the library isn't installed, needs to be compiled separately # see https://pypi.python.org/pypi/unrar/ for instructions # Download source from http://www.rarlab.com/rar_add.htm # note we need LIBRARY SOURCE not a binary package # make lib; sudo make install-lib; sudo ldconfig # lib.unrar should then be able to find libunrar.so from lib.unrar import rarfile data = rarfile.RarFile(issuefile) except Exception as why: logger.error("Failed to read rar file %s, %s %s" % (issuefile, type(why).__name__, str(why))) data = '' if data: img = None try: for member in data.namelist(): memlow = member.lower() if '-00.' in memlow or '000.' in memlow or 'cover.' in memlow: if memlow.endswith('.jpg') or memlow.endswith('.jpeg'): img = data.read(member) break if img: with open(coverfile, 'wb') as f: if PY2: f.write(img) else: f.write(img.encode()) return else: logger.debug("Failed to find image in %s" % issuefile) except Exception as why: logger.error("Failed to extract image from %s, %s %s" % (issuefile, type(why).__name__, str(why))) elif extn == '.pdf': generator = "" if len(lazylibrarian.CONFIG['IMP_CONVERT'] ): # allow external convert to override libraries generator = "external program: %s" % lazylibrarian.CONFIG[ 'IMP_CONVERT'] if "gsconvert.py" in lazylibrarian.CONFIG['IMP_CONVERT']: msg = "Use of gsconvert.py is deprecated, equivalent functionality is now built in. " msg += "Support for gsconvert.py may be removed in a future release. See wiki for details." logger.warn(msg) converter = lazylibrarian.CONFIG['IMP_CONVERT'] postfix = '' # if not os.path.isfile(converter): # full path given, or just program_name? # converter = os.path.join(os.getcwd(), lazylibrarian.CONFIG['IMP_CONVERT']) if 'convert' in converter and 'gs' not in converter: # tell imagemagick to only convert first page postfix = '[0]' try: params = [ converter, '%s%s' % (issuefile, postfix), '%s' % coverfile ] res = subprocess.check_output(params, stderr=subprocess.STDOUT) res = makeUnicode(res).strip() if res: logger.debug('%s reports: %s' % (lazylibrarian.CONFIG['IMP_CONVERT'], res)) except Exception as e: # logger.debug(params) logger.warn('External "convert" failed %s %s' % (type(e).__name__, str(e))) elif platform.system() == "Windows": GS = os.path.join(os.getcwd(), "gswin64c.exe") generator = "local gswin64c" if not os.path.isfile(GS): GS = os.path.join(os.getcwd(), "gswin32c.exe") generator = "local gswin32c" if not os.path.isfile(GS): params = ["where", "gswin64c"] try: GS = subprocess.check_output(params, stderr=subprocess.STDOUT) GS = makeUnicode(GS).strip() generator = "gswin64c" except Exception as e: logger.debug("where gswin64c failed: %s %s" % (type(e).__name__, str(e))) if not os.path.isfile(GS): params = ["where", "gswin32c"] try: GS = subprocess.check_output(params, stderr=subprocess.STDOUT) GS = makeUnicode(GS).strip() generator = "gswin32c" except Exception as e: logger.debug("where gswin32c failed: %s %s" % (type(e).__name__, str(e))) if not os.path.isfile(GS): logger.debug("No gswin found") generator = "(no windows ghostscript found)" else: # noinspection PyBroadException try: params = [GS, "--version"] res = subprocess.check_output(params, stderr=subprocess.STDOUT) res = makeUnicode(res).strip() logger.debug("Found %s [%s] version %s" % (generator, GS, res)) generator = "%s version %s" % (generator, res) issuefile = issuefile.split('[')[0] params = [ GS, "-sDEVICE=jpeg", "-dNOPAUSE", "-dBATCH", "-dSAFER", "-dFirstPage=1", "-dLastPage=1", "-dUseCropBox", "-sOutputFile=%s" % coverfile, issuefile ] res = subprocess.check_output(params, stderr=subprocess.STDOUT) res = makeUnicode(res).strip() if not os.path.isfile(coverfile): logger.debug("Failed to create jpg: %s" % res) except Exception: # as why: logger.warn("Failed to create jpg for %s" % issuefile) logger.debug('Exception in gswin create_cover: %s' % traceback.format_exc()) else: # not windows try: # noinspection PyUnresolvedReferences from wand.image import Image interface = "wand" except ImportError: try: # No PythonMagick in python3 # noinspection PyUnresolvedReferences import PythonMagick interface = "pythonmagick" except ImportError: interface = "" try: if interface == 'wand': generator = "wand interface" with Image(filename=issuefile + '[0]') as img: img.save(filename=coverfile) elif interface == 'pythonmagick': generator = "pythonmagick interface" img = PythonMagick.Image() # PythonMagick requires filenames to be bytestr, not unicode if type(issuefile) is text_type: issuefile = makeBytestr(issuefile) if type(coverfile) is text_type: coverfile = makeBytestr(coverfile) img.read(issuefile + '[0]') img.write(coverfile) else: GS = os.path.join(os.getcwd(), "gs") generator = "local gs" if not os.path.isfile(GS): GS = "" params = ["which", "gs"] try: GS = subprocess.check_output( params, stderr=subprocess.STDOUT) GS = makeUnicode(GS).strip() generator = GS except Exception as e: logger.debug("which gs failed: %s %s" % (type(e).__name__, str(e))) if not os.path.isfile(GS): logger.debug("Cannot find gs") generator = "(no gs found)" else: params = [GS, "--version"] res = subprocess.check_output( params, stderr=subprocess.STDOUT) res = makeUnicode(res).strip() logger.debug("Found gs [%s] version %s" % (GS, res)) generator = "%s version %s" % (generator, res) issuefile = issuefile.split('[')[0] params = [ GS, "-sDEVICE=jpeg", "-dNOPAUSE", "-dBATCH", "-dSAFER", "-dFirstPage=1", "-dLastPage=1", "-dUseCropBox", "-sOutputFile=%s" % coverfile, issuefile ] res = subprocess.check_output( params, stderr=subprocess.STDOUT) res = makeUnicode(res).strip() if not os.path.isfile(coverfile): logger.debug("Failed to create jpg: %s" % res) except Exception as e: logger.warn("Unable to create cover for %s using %s %s" % (issuefile, type(e).__name__, generator)) logger.debug('Exception in create_cover: %s' % traceback.format_exc()) if os.path.isfile(coverfile): setperm(coverfile) logger.debug("Created cover for %s using %s" % (issuefile, generator)) return # if not recognised extension or cover creation failed try: coverfile = safe_copy( os.path.join(lazylibrarian.PROG_DIR, 'data/images/nocover.jpg'), coverfile) setperm(coverfile) except Exception as why: logger.error("Failed to copy nocover file, %s %s" % (type(why).__name__, str(why))) return
def magazineScan(): lazylibrarian.MAG_UPDATE = 1 # noinspection PyBroadException try: myDB = database.DBConnection() mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER'] mag_path = mag_path.split('$')[0] if lazylibrarian.CONFIG['MAG_RELATIVE']: if mag_path[0] not in '._': mag_path = '_' + mag_path mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path) mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.CONFIG['FULL_SCAN']: mags = myDB.select('select * from Issues') # check all the issues are still there, delete entry if not for mag in mags: title = mag['Title'] issuedate = mag['IssueDate'] issuefile = mag['IssueFile'] if issuefile and not os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile=?', (issuefile, )) logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "LatestCover": None, "IssueStatus": "Skipped" # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug('Magazine %s details reset' % title) mags = myDB.select('SELECT * from magazines') # now check the magazine titles and delete any with no issues for mag in mags: title = mag['Title'] count = myDB.select( 'SELECT COUNT(Title) as counter FROM issues WHERE Title=?', (title, )) issues = count[0]['counter'] if not issues: logger.debug('Magazine %s deleted as no issues found' % title) myDB.action('DELETE from magazines WHERE Title=?', (title, )) logger.info(' Checking [%s] for magazines' % mag_path) matchString = '' for char in lazylibrarian.CONFIG['MAG_DEST_FILE']: matchString = matchString + '\\' + char # massage the MAG_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type match = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']' title_pattern = re.compile(match, re.VERBOSE) match = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']' date_pattern = re.compile(match, re.VERBOSE) # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file # to utf-8 and fails (eg scandinavian characters in ascii 8bit) if isinstance(mag_path, unicode): try: mag_path = mag_path.encode('ASCII') except UnicodeEncodeError: logger.debug('Unicode error converting %s' % repr(mag_path)) for dirname, dirnames, filenames in os.walk(mag_path): for fname in filenames[:]: # maybe not all magazines will be pdf? if is_valid_booktype(fname, booktype='mag'): issuedate = '' # noinspection PyBroadException try: match = title_pattern.match(fname) if match: issuedate = match.group("issuedate") title = match.group("title") match = True else: match = False except Exception: match = False if not match: try: match = date_pattern.match(fname) if match: issuedate = match.group("issuedate") title = os.path.basename(dirname) else: logger.debug("Pattern match failed for [%s]" % fname) continue except Exception as e: logger.debug("Invalid name format for [%s] %s %s" % (fname, type(e).__name__, str(e))) continue logger.debug("Found %s Issue %s" % (title, fname)) issuefile = os.path.join(dirname, fname) # full path to issue.pdf mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat( datetime.date.fromtimestamp(mtime)) controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.match( 'SELECT LastAcquired, IssueDate, MagazineAdded from magazines WHERE Title=?', (title, )) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Reject": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "LatestCover": None, "IssueDate": None, "IssueStatus": "Skipped", "Regex": None } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) magissuedate = None magazineadded = None else: maglastacquired = mag_entry['LastAcquired'] magissuedate = mag_entry['IssueDate'] magazineadded = mag_entry['MagazineAdded'] magissuedate = str(magissuedate).zfill(4) issuedate = str(issuedate).zfill( 4) # for sorting issue numbers # is this issue already in the database? controlValueDict = {"Title": title, "IssueDate": issuedate} issue_id = create_id("%s %s" % (title, issuedate)) iss_entry = myDB.match( 'SELECT Title from issues WHERE Title=? and IssueDate=?', (title, issuedate)) if not iss_entry: newValueDict = { "IssueAcquired": iss_acquired, "IssueID": issue_id, "IssueFile": issuefile } myDB.upsert("Issues", newValueDict, controlValueDict) logger.debug("Adding issue %s %s" % (title, issuedate)) create_cover(issuefile) lazylibrarian.postprocess.processMAGOPF( issuefile, title, issuedate, issue_id) # see if this issues date values are useful controlValueDict = {"Title": title} if not mag_entry: # new magazine, this is the only issue newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "LatestCover": os.path.splitext(issuefile)[0] + '.jpg', "IssueDate": issuedate, "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) else: # Set magazine_issuedate to issuedate of most recent issue we have # Set latestcover to most recent issue cover # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps newValueDict = {"IssueStatus": "Open"} if not magazineadded or iss_acquired < magazineadded: newValueDict["MagazineAdded"] = iss_acquired if not maglastacquired or iss_acquired > maglastacquired: newValueDict["LastAcquired"] = iss_acquired if not magissuedate or issuedate >= magissuedate: newValueDict["IssueDate"] = issuedate newValueDict["LatestCover"] = os.path.splitext( issuefile)[0] + '.jpg' myDB.upsert("magazines", newValueDict, controlValueDict) magcount = myDB.match("select count(*) from magazines") isscount = myDB.match("select count(*) from issues") logger.info("Magazine scan complete, found %s magazine%s, %s issue%s" % (magcount['count(*)'], plural(magcount['count(*)']), isscount['count(*)'], plural(isscount['count(*)']))) lazylibrarian.MAG_UPDATE = 0 except Exception: lazylibrarian.MAG_UPDATE = 0 logger.error('Unhandled exception in magazineScan: %s' % traceback.format_exc())
def search_rss_book(books=None, library=None): """ books is a list of new books to add, or None for backlog search library is "eBook" or "AudioBook" or None to search all book types """ if not (lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') scheduleJob(action='Stop', target='search_rss_book') return try: threadname = threading.currentThread().name if "Thread-" in threadname: if books is None: threading.currentThread().name = "SEARCHALLRSS" else: threading.currentThread().name = "SEARCHRSS" myDB = database.DBConnection() searchbooks = [] if books is None: # We are performing a backlog search cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus ' cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") ' cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc' results = myDB.select(cmd) for terms in results: searchbooks.append(terms) else: # The user has added a new book for book in books: cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus ' cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID' results = myDB.select(cmd, (book['bookid'], )) for terms in results: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("SearchRSS - No books to search for") return resultlist, nproviders = IterateOverRSSSites() if not nproviders: logger.warn('No rss providers are available') scheduleJob(action='Stop', target='search_rss_book') return # No point in continuing logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks)))) searchlist = [] for searchbook in searchbooks: # searchterm is only used for display purposes searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName'] if searchbook['BookSub']: searchterm = searchterm + ': ' + searchbook['BookSub'] if library is None or library == 'eBook': if searchbook['Status'] == "Wanted": cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="eBook" and Status="Snatched"' snatched = myDB.match(cmd, (searchbook["BookID"], )) if snatched: logger.warn( 'eBook %s %s already marked snatched in wanted table' % (searchbook['AuthorName'], searchbook['BookName'])) else: searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "eBook", "searchterm": searchterm }) if library is None or library == 'AudioBook': if searchbook['AudioStatus'] == "Wanted": cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="AudioBook" and Status="Snatched"' snatched = myDB.match(cmd, (searchbook["BookID"], )) if snatched: logger.warn( 'AudioBook %s %s already marked snatched in wanted table' % (searchbook['AuthorName'], searchbook['BookName'])) else: searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "AudioBook", "searchterm": searchterm }) rss_count = 0 for book in searchlist: if book['library'] == 'AudioBook': searchtype = 'audio' else: searchtype = 'book' found = processResultList(resultlist, book, searchtype, 'rss') # if you can't find the book, try title without any "(extended details, series etc)" if not found and '(' in book['bookName']: # anything to shorten? searchtype = 'short' + searchtype found = processResultList(resultlist, book, searchtype, 'rss') if not found: logger.info("NZB Searches for %s %s returned no results." % (book['library'], book['searchterm'])) if found > 1: rss_count += 1 logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count))) except Exception: logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def import_book(pp_path=None, bookID=None): # Separated this into a function so we can more easily import books from an alternate directory # and move them into LL folder structure given just the bookID, returns True or False # eg if import_book(source_directory, bookID): # ppcount = ppcount + 1 # myDB = database.DBConnection() data = myDB.select('SELECT * from books WHERE BookID="%s"' % bookID) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] # try: # auth_dir = os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode(lazylibrarian.SYS_ENCODING) # os.chmod(auth_dir, 0777) # except Exception, e: # logger.debug("Could not chmod author directory: " + str(auth_dir)) if 'windows' in platform.system().lower( ) and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace( '/', '\\') dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace( '$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace( '$Author', authorname).replace('$Title', bookname) global_name = common.remove_accents(global_name) # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = { '<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } dest_path = formatter.latinToAscii( formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: # update nzbs controlValueDict = {"BookID": bookID} newValueDict = { "Status": "Processed", "NZBDate": formatter.now() } # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) processExtras(myDB, dest_path, global_name, data) logger.info('Successfully processed: %s' % global_name) return True else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) return False
def processDir(force=False, reset=False): # rename this thread threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return False myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if force is False and len(snatched) == 0: logger.info('Nothing marked as snatched. Stopping postprocessor job.') common.schedule_job(action='Stop', target='processDir') elif len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') else: logger.debug("Checking %s downloads for %s snatched files" % (len(downloads), len(snatched))) ppcount = 0 for book in snatched: found = False for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= 95: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here... if formatter.is_valid_booktype(fname, booktype="book") \ or formatter.is_valid_booktype(fname, booktype="mag"): dirname = os.path.join(processpath, os.path.splitext(fname)[0]) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): try: shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname)) fname = os.path.splitext(fname)[0] except Exception as why: logger.debug("Failed to move file %s to %s, %s" % (fname, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle'])) found = True break else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if found: data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = common.remove_accents(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = common.remove_accents(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue # try: # os.chmod(dest_path, 0777) # except Exception, e: # logger.debug("Could not chmod post-process directory: " + str(dest_path)) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": formatter.now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file, "IssueID": magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue magazinescan.create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount: logger.info('%s books/mags have been processed.' % ppcount) else: logger.info('No snatched books/mags have been found') if reset: common.schedule_job(action='Restart', target='processDir')
def SABnzbd(title=None, nzburl=None, remove_data=False): if nzburl in ['delete', 'delhistory'] and title == 'unknown': res = '%s function unavailable in this version of sabnzbd, no nzo_ids' % nzburl logger.debug(res) return False, res hostname = lazylibrarian.CONFIG['SAB_HOST'] port = check_int(lazylibrarian.CONFIG['SAB_PORT'], 0) if not hostname or not port: res = 'Invalid sabnzbd host or port, check your config' logger.error(res) return False, res if hostname.endswith('/'): hostname = hostname[:-1] if not hostname.startswith("http://") and not hostname.startswith( "https://"): hostname = 'http://' + hostname HOST = "%s:%s" % (hostname, port) if lazylibrarian.CONFIG['SAB_SUBDIR']: HOST = HOST + "/" + lazylibrarian.CONFIG['SAB_SUBDIR'] params = {} if nzburl == 'auth' or nzburl == 'get_cats': # connection test, check auth mode or get_cats params['mode'] = nzburl params['output'] = 'json' if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] title = 'LL.(%s)' % nzburl elif nzburl == 'queue': params['mode'] = 'queue' params['limit'] = '100' params['output'] = 'json' if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] title = 'LL.(Queue)' elif nzburl == 'history': params['mode'] = 'history' params['limit'] = '100' params['output'] = 'json' if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] title = 'LL.(History)' elif nzburl == 'delete': # only deletes tasks if still in the queue, ie NOT completed tasks params['mode'] = 'queue' params['output'] = 'json' params['name'] = nzburl params['value'] = title if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] if remove_data: params['del_files'] = 1 title = 'LL.(Delete) ' + title elif nzburl == 'delhistory': params['mode'] = 'history' params['output'] = 'json' params['name'] = 'delete' params['value'] = title if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] if remove_data: params['del_files'] = 1 title = 'LL.(DelHistory) ' + title else: params['mode'] = 'addurl' params['output'] = 'json' if nzburl: params['name'] = nzburl if title: params['nzbname'] = title if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] if lazylibrarian.CONFIG['SAB_CAT']: params['cat'] = lazylibrarian.CONFIG['SAB_CAT'] if lazylibrarian.CONFIG['USENET_RETENTION']: params["maxage"] = lazylibrarian.CONFIG['USENET_RETENTION'] # FUTURE-CODE # if lazylibrarian.SAB_PRIO: # params["priority"] = lazylibrarian.SAB_PRIO # if lazylibrarian.SAB_PP: # params["script"] = lazylibrarian.SAB_SCRIPT URL = HOST + "/api?" + urlencode(params) # to debug because of api if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Request url for <a href="%s">SABnzbd</a>' % URL) proxies = proxyList() try: timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) r = requests.get(URL, timeout=timeout, proxies=proxies) result = r.json() except requests.exceptions.Timeout: res = "Timeout connecting to SAB with URL: %s" % URL logger.error(res) return False, res except Exception as e: if hasattr(e, 'reason'): errmsg = e.reason elif hasattr(e, 'strerror'): errmsg = e.strerror else: errmsg = str(e) res = "Unable to connect to SAB with URL: %s, %s" % (URL, errmsg) logger.error(res) return False, res if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug("Result text from SAB: " + str(result)) if title: title = unaccented_str(title) if title.startswith('LL.('): return result, '' if result['status'] is True: logger.info("%s sent to SAB successfully." % title) # sab versions earlier than 0.8.0 don't return nzo_ids if 'nzo_ids' in result: if result['nzo_ids']: # check its not empty return result['nzo_ids'][0], '' return 'unknown' elif result['status'] is False: res = "SAB returned Error: %s" % result['error'] logger.error(res) return False, res else: res = "Unknown error: %s" % str(result) logger.error(res) return False, res
def update(): if lazylibrarian.CONFIG['INSTALL_TYPE'] == 'win': logmsg('info', 'Windows .exe updating not supported yet.') return False elif lazylibrarian.CONFIG['INSTALL_TYPE'] == 'package': logmsg('info', 'Please use your package manager to update') return False elif lazylibrarian.CONFIG['INSTALL_TYPE'] == 'git': branch = getCurrentGitBranch() _, _ = runGit('stash clear') output, err = runGit('pull origin ' + branch) if not output: logmsg('error', 'Couldn\'t download latest version') return False for line in output.split('\n'): if 'Already up-to-date.' in line: logmsg('info', 'No update available: ' + str(output)) return False elif 'Aborting' in line or 'local changes' in line: logmsg('error', 'Unable to update: ' + str(output)) return False # Update version.txt and timestamp updateVersionFile(lazylibrarian.CONFIG['LATEST_VERSION']) lazylibrarian.CONFIG['GIT_UPDATED'] = str(int(time.time())) return True elif lazylibrarian.CONFIG['INSTALL_TYPE'] == 'source': if 'gitlab' in lazylibrarian.CONFIG['GIT_HOST']: tar_download_url = 'https://%s/%s/%s/-/archive/%s/%s-%s.tar.gz' % ( lazylibrarian.GITLAB_TOKEN, lazylibrarian.CONFIG['GIT_USER'], lazylibrarian.CONFIG['GIT_REPO'], lazylibrarian.CONFIG['GIT_BRANCH'], lazylibrarian.CONFIG['GIT_REPO'], lazylibrarian.CONFIG['GIT_BRANCH']) else: tar_download_url = 'https://%s/%s/%s/tarball/%s' % ( lazylibrarian.CONFIG['GIT_HOST'], lazylibrarian.CONFIG['GIT_USER'], lazylibrarian.CONFIG['GIT_REPO'], lazylibrarian.CONFIG['GIT_BRANCH']) update_dir = os.path.join(lazylibrarian.PROG_DIR, 'update') try: logmsg('info', 'Downloading update from: ' + tar_download_url) headers = {'User-Agent': getUserAgent()} proxies = proxyList() timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) r = requests.get(tar_download_url, timeout=timeout, headers=headers, proxies=proxies) except requests.exceptions.Timeout: logmsg('error', "Timeout retrieving new version from " + tar_download_url) return False except Exception as e: if hasattr(e, 'reason'): errmsg = e.reason else: errmsg = str(e) logmsg( 'error', "Unable to retrieve new version from " + tar_download_url + ", can't update: %s" % errmsg) return False download_name = r.url.split('/')[-1] tar_download_path = os.path.join(lazylibrarian.PROG_DIR, download_name) # Save tar to disk with open(tar_download_path, 'wb') as f: f.write(r.content) # Extract the tar to update folder logmsg('info', 'Extracting file ' + tar_download_path) try: with tarfile.open(tar_download_path) as tar: tar.extractall(update_dir) except Exception as e: logger.error('Failed to unpack tarfile %s (%s): %s' % (type(e).__name__, tar_download_path, str(e))) return False # Delete the tar.gz logmsg('info', 'Deleting file ' + tar_download_path) os.remove(tar_download_path) # Find update dir name update_dir_contents = [ x for x in os.listdir(update_dir) if os.path.isdir(os.path.join(update_dir, x)) ] if len(update_dir_contents) != 1: logmsg( 'error', "Invalid update data, update failed: " + str(update_dir_contents)) return False content_dir = os.path.join(update_dir, update_dir_contents[0]) # walk temp folder and move files to main folder for rootdir, dirnames, filenames in os.walk(content_dir): rootdir = rootdir[len(content_dir) + 1:] for curfile in filenames: old_path = os.path.join(content_dir, rootdir, curfile) new_path = os.path.join(lazylibrarian.PROG_DIR, rootdir, curfile) if os.path.isfile(new_path): os.remove(new_path) os.renames(old_path, new_path) # Update version.txt and timestamp updateVersionFile(lazylibrarian.CONFIG['LATEST_VERSION']) lazylibrarian.CONFIG['GIT_UPDATED'] = str(int(time.time())) return True else: logmsg('error', "Cannot perform update - Install Type not set") return False
def find_results(self, searchterm=None, queue=None): """ GoogleBooks performs much better if we search for author OR title not both at once, so if searchterm is not isbn, two searches needed. Lazylibrarian searches use <ll> to separate title from author in searchterm If this token isn't present, it's an isbn or searchterm as supplied by user """ try: myDB = database.DBConnection() resultlist = [] # See if we should check ISBN field, otherwise ignore it api_strings = ['inauthor:', 'intitle:'] if is_valid_isbn(searchterm): api_strings = ['isbn:'] api_hits = 0 ignored = 0 total_count = 0 no_author_count = 0 title = '' authorname = '' if ' <ll> ' in searchterm: # special token separates title from author title, authorname = searchterm.split(' <ll> ') fullterm = searchterm.replace(' <ll> ', ' ') logger.debug('Now searching Google Books API with searchterm: %s' % fullterm) for api_value in api_strings: set_url = self.url if api_value == "isbn:": set_url = set_url + quote(api_value + searchterm) elif api_value == 'intitle:': searchterm = fullterm if title: # just search for title # noinspection PyUnresolvedReferences title = title.split(' (')[0] # without any series info searchterm = title searchterm = searchterm.replace("'", "").replace( '"', '').strip() # and no quotes if PY2: searchterm = searchterm.encode( lazylibrarian.SYS_ENCODING) set_url = set_url + quote(api_value + '"' + searchterm + '"') elif api_value == 'inauthor:': searchterm = fullterm if authorname: searchterm = authorname # just search for author searchterm = searchterm.strip() if PY2: searchterm = searchterm.encode( lazylibrarian.SYS_ENCODING) set_url = set_url + quote_plus(api_value + '"' + searchterm + '"') startindex = 0 resultcount = 0 ignored = 0 number_results = 1 total_count = 0 no_author_count = 0 try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request(URL) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] logger.debug('Searching url: ' + URL) if number_results == 0: logger.warn( 'Found no results for %s with value: %s' % (api_value, searchterm)) break else: pass except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) if not book['author']: logger.debug( 'Skipped a result without authorfield.') no_author_count += 1 continue if not book['name']: logger.debug('Skipped a result without title.') continue valid_langs = getList( lazylibrarian.CONFIG['IMP_PREFLANG']) if "All" not in valid_langs: # don't care about languages, accept all try: # skip if language is not in valid list - booklang = book['lang'] if booklang not in valid_langs: logger.debug( 'Skipped %s with language %s' % (book['name'], booklang)) ignored += 1 continue except KeyError: ignored += 1 logger.debug( 'Skipped %s where no language is found' % book['name']) continue if authorname: author_fuzz = fuzz.ratio( book['author'], authorname) else: author_fuzz = fuzz.ratio( book['author'], fullterm) if title: book_fuzz = fuzz.token_set_ratio( book['name'], title) # lose a point for each extra word in the fuzzy matches so we get the closest match words = len(getList(book['name'])) words -= len(getList(title)) book_fuzz -= abs(words) else: book_fuzz = fuzz.token_set_ratio( book['name'], fullterm) isbn_fuzz = 0 if is_valid_isbn(fullterm): isbn_fuzz = 100 highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace AuthorID = '' if book['author']: match = myDB.match( 'SELECT AuthorID FROM authors WHERE AuthorName=?', (book['author'].replace('"', '""'), )) if match: AuthorID = match['AuthorID'] resultlist.append({ 'authorname': book['author'], 'authorid': AuthorID, 'bookid': item['id'], 'bookname': bookname, 'booksub': book['sub'], 'bookisbn': book['isbn'], 'bookpub': book['pub'], 'bookdate': book['date'], 'booklang': book['lang'], 'booklink': book['link'], 'bookrate': float(book['rate']), 'bookrate_count': book['rate_count'], 'bookimg': book['img'], 'bookpages': book['pages'], 'bookgenre': book['genre'], 'bookdesc': book['desc'], 'author_fuzz': author_fuzz, 'book_fuzz': book_fuzz, 'isbn_fuzz': isbn_fuzz, 'highest_fuzz': highest_fuzz, 'num_reviews': book['ratings'] }) resultcount += 1 except KeyError: break logger.debug( "Returning %s result%s for (%s) with keyword: %s" % (resultcount, plural(resultcount), api_value, searchterm)) logger.debug("Found %s result%s" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored))) logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count))) logger.debug( 'The Google Books API was hit %s time%s for searchterm: %s' % (api_hits, plural(api_hits), fullterm)) queue.put(resultlist) except Exception: logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())
def grsync(status, shelf): # noinspection PyBroadException try: shelf = shelf.lower() logger.info('Syncing %s to %s shelf' % (status, shelf)) myDB = database.DBConnection() cmd = 'select bookid from books where status="%s"' % status if status == 'Open': cmd += ' or status="Have"' results = myDB.select(cmd) ll_list = [] for terms in results: ll_list.append(terms['bookid']) GA = grauth() GR = None shelves = GA.get_shelf_list() found = False for item in shelves: # type: dict if item['name'] == shelf: found = True break if not found: res, msg = GA.create_shelf(shelf=shelf) if not res: logger.debug("Unable to create shelf %s: %s" % (shelf, msg)) return 0, 0 else: logger.debug("Created new goodreads shelf: %s" % shelf) gr_shelf = GA.get_gr_shelf_contents(shelf=shelf) dstatus = status if dstatus == "Open": dstatus += "/Have" logger.info("There are %s %s books, %s books on goodreads %s shelf" % (len(ll_list), dstatus, len(gr_shelf), shelf)) # Sync method for WANTED: # Get results of last_sync (if any) # For each book in last_sync # if not in ll_list, new deletion, remove from gr_shelf # if not in gr_shelf, new deletion, remove from ll_list, mark Skipped # For each book in ll_list # if not in last_sync, new addition, add to gr_shelf # For each book in gr_shelf # if not in last sync, new addition, add to ll_list, mark Wanted # # save ll WANTED as last_sync # For HAVE/OPEN method is the same, but only change status if HAVE, not OPEN cmd = 'select SyncList from sync where UserID="%s" and Label="%s"' % ("goodreads", shelf) res = myDB.match(cmd) last_sync = [] shelf_changed = 0 ll_changed = 0 if res: last_sync = getList(res['SyncList']) added_to_shelf = list(set(gr_shelf) - set(last_sync)) removed_from_shelf = list(set(last_sync) - set(gr_shelf)) added_to_ll = list(set(ll_list) - set(last_sync)) removed_from_ll = list(set(last_sync) - set(ll_list)) logger.info("%s missing from lazylibrarian %s" % (len(removed_from_ll), shelf)) for book in removed_from_ll: # first the deletions since last sync... try: res, content = GA.BookToList(book, shelf, action='remove') except Exception as e: logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s removed from %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content)) logger.info("%s missing from goodreads %s" % (len(removed_from_shelf), shelf)) for book in removed_from_shelf: # deleted from goodreads cmd = 'select Status from books where BookID="%s"' % book res = myDB.match(cmd) if not res: logger.debug('Adding new book %s to database' % book) if not GR: GR = GoodReads(book) GR.find_book(book) res = myDB.match(cmd) if not res: logger.warn('Book %s not found in database' % book) else: if res['Status'] in ['Have', 'Wanted']: myDB.action('UPDATE books SET Status="Skipped" WHERE BookID=?', (book,)) ll_changed += 1 logger.debug("%10s set to Skipped" % book) else: logger.warn("Not removing %s, book is marked %s" % (book, res['Status'])) # new additions to lazylibrarian logger.info("%s new in lazylibrarian %s" % (len(added_to_ll), shelf)) for book in added_to_ll: try: res, content = GA.BookToList(book, shelf, action='add') except Exception as e: logger.error("Error adding %s to %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s added to %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to add %s to %s shelf: %s" % (book, shelf, content)) # new additions to goodreads shelf logger.info("%s new in goodreads %s" % (len(added_to_shelf), shelf)) for book in added_to_shelf: cmd = 'select Status from books where BookID="%s"' % book res = myDB.match(cmd) if not res: logger.debug('Adding new book %s to database' % book) if not GR: GR = GoodReads(book) GR.find_book(book) res = myDB.match(cmd) if not res: logger.warn('Book %s not found in database' % book) else: if status == 'Open': if res['Status'] == 'Open': logger.warn("Book %s is already marked Open" % book) else: myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,)) ll_changed += 1 logger.debug("%10s set to Have" % book) elif status == 'Wanted': # if in "wanted" and already marked "Open/Have", optionally delete from "wanted" # (depending on user prefs, to-read and wanted might not be the same thing) if lazylibrarian.CONFIG['GR_UNIQUE'] and res['Status'] in ['Open', 'Have']: try: res, content = GA.BookToList(book, shelf, action='remove') except Exception as e: logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s removed from %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content)) elif res['Status'] != 'Open': myDB.action('UPDATE books SET Status="Wanted" WHERE BookID=?', (book,)) ll_changed += 1 logger.debug("%10s set to Wanted" % book) else: logger.warn("Not setting %s as Wanted, already marked Open" % book) # get new definitive list from ll cmd = 'select bookid from books where status="%s"' % status if status == 'Open': cmd += ' or status="Have"' results = myDB.select(cmd) ll_list = [] for terms in results: ll_list.append(terms['bookid']) # store as comparison for next sync controlValueDict = {"UserID": "goodreads", "Label": shelf} newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(ll_list)} myDB.upsert("sync", newValueDict, controlValueDict) logger.debug('Sync %s to %s shelf complete' % (status, shelf)) return shelf_changed, ll_changed except Exception: logger.error('Unhandled exception in grsync: %s' % traceback.format_exc()) return 0, 0
def search_rss_book(books=None, library=None): """ books is a list of new books to add, or None for backlog search library is "eBook" or "AudioBook" or None to search all book types """ try: threadname = threading.currentThread().name if "Thread-" in threadname: if books is None: threading.currentThread().name = "SEARCHALLRSS" else: threading.currentThread().name = "SEARCHRSS" if not (lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') scheduleJob(action='Stop', target='search_rss_book') return myDB = database.DBConnection() resultlist, wishproviders = IterateOverWishLists() new_books = 0 if not wishproviders: logger.debug('No wishlists are set') else: # for each item in resultlist, add to database if necessary, and mark as wanted logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist)))) for book in resultlist: # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid) # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks # not sure if anyone would use a goodreads wishlist if not using goodreads interface... if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book[ 'rss_bookid']: bookmatch = myDB.match( 'select Status,BookName from books where bookid=?', (book['rss_bookid'], )) if bookmatch: bookstatus = bookmatch['Status'] bookname = bookmatch['BookName'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info( 'Found book %s, already marked as "%s"' % (bookname, bookstatus)) else: # skipped/ignored logger.info('Found book %s, marking as "Wanted"' % bookname) controlValueDict = {"BookID": book['rss_bookid']} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 else: import_book(book['rss_bookid']) new_books += 1 else: item = {} results = None item['Title'] = book['rss_title'] if book['rss_bookid']: item['BookID'] = book['rss_bookid'] if book['rss_isbn']: item['ISBN'] = book['rss_isbn'] bookmatch = finditem(item, book['rss_author']) if bookmatch: # it's already in the database authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info( 'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info( 'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 else: # not in database yet if book['rss_isbn']: results = search_for(book['rss_isbn']) if results: result = results[0] # type: dict if result['isbn_fuzz'] > check_int( lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.info( "Found (%s%%) %s: %s" % (result['isbn_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) new_books += 1 bookmatch = True if not results: searchterm = "%s <ll> %s" % ( item['Title'], formatAuthorName(book['rss_author'])) results = search_for(unaccented(searchterm)) if results: result = results[0] # type: dict if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \ and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.info( "Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) new_books += 1 bookmatch = True if not bookmatch: msg = "Skipping book %s by %s" % ( item['Title'], book['rss_author']) if not results: msg += ', No results returned' logger.warn(msg) else: msg += ', No match found' logger.warn(msg) result = results[0] # type: dict msg = "Closest match (%s%% %s%%) %s: %s" % ( result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) if new_books: logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books))) searchbooks = [] if books is None: # We are performing a backlog search cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus ' cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") ' cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc' results = myDB.select(cmd) for terms in results: searchbooks.append(terms) else: # The user has added a new book for book in books: cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus ' cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID' results = myDB.select(cmd, (book['bookid'], )) for terms in results: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("SearchRSS - No books to search for") return resultlist, nproviders = IterateOverRSSSites() if not nproviders and not wishproviders: logger.warn('No rss providers are available') return # No point in continuing logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks)))) searchlist = [] for searchbook in searchbooks: # searchterm is only used for display purposes searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName'] if searchbook['BookSub']: searchterm = searchterm + ': ' + searchbook['BookSub'] if library is None or library == 'eBook': if searchbook['Status'] == "Wanted": cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="eBook" and Status="Snatched"' snatched = myDB.match(cmd, (searchbook["BookID"], )) if snatched: logger.warn( 'eBook %s %s already marked snatched in wanted table' % (searchbook['AuthorName'], searchbook['BookName'])) else: searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "eBook", "searchterm": searchterm }) if library is None or library == 'AudioBook': if searchbook['AudioStatus'] == "Wanted": cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="AudioBook" and Status="Snatched"' snatched = myDB.match(cmd, (searchbook["BookID"], )) if snatched: logger.warn( 'AudioBook %s %s already marked snatched in wanted table' % (searchbook['AuthorName'], searchbook['BookName'])) else: searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "AudioBook", "searchterm": searchterm }) rss_count = 0 for book in searchlist: if book['library'] == 'AudioBook': searchtype = 'audio' else: searchtype = 'book' found = processResultList(resultlist, book, searchtype, 'rss') # if you can't find the book, try title without any "(extended details, series etc)" if not found and '(' in book['bookName']: # anything to shorten? searchtype = 'short' + searchtype found = processResultList(resultlist, book, searchtype, 'rss') if not found: logger.info("NZB Searches for %s %s returned no results." % (book['library'], book['searchterm'])) if found > True: rss_count += 1 logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count))) except Exception: logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", audiostatus="Skipped", entrystatus='Active', refresh=False): # noinspection PyBroadException try: logger.debug('[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names set_url = self.url + quote( 'inauthor:"%s"' % unaccented_str(authorname)) api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 startindex = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 locked_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request( URL, useCache=not refresh) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break if number_results == 0: logger.warn('Found no results for %s' % authorname) break else: logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname)) startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) # skip if no author, no author is no book. if not book['author']: logger.debug( 'Skipped a result without authorfield.') continue isbnhead = "" if len(book['isbn']) == 10: isbnhead = book['isbn'][0:3] elif len(book['isbn']) == 13: isbnhead = book['isbn'][3:6] booklang = book['lang'] # do we care about language? if "All" not in valid_langs: if book['isbn']: # seems google lies to us, sometimes tells us books are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = False lang = myDB.match( 'SELECT lang FROM languages where isbn=?', (isbnhead, )) if lang: booklang = lang['lang'] cache_hits += 1 logger.debug( "Found cached language [%s] for [%s]" % (booklang, isbnhead)) match = True if not match: # no match in cache, try lookup dict if isbnhead: if len( book['isbn'] ) == 13 and book[ 'isbn'].startswith('979'): for lang in lazylibrarian.isbn_979_dict: if isbnhead.startswith( lang): booklang = lazylibrarian.isbn_979_dict[ lang] logger.debug( "ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break elif (len(book['isbn']) == 10) or \ (len(book['isbn']) == 13 and book['isbn'].startswith('978')): for lang in lazylibrarian.isbn_978_dict: if isbnhead.startswith( lang): booklang = lazylibrarian.isbn_978_dict[ lang] logger.debug( "ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break if match: myDB.action( 'insert into languages values (?, ?)', (isbnhead, booklang)) if not match: booklang = thingLang(book['isbn']) lt_lang_hits += 1 if booklang: match = True myDB.action( 'insert into languages values (?, ?)', (isbnhead, booklang)) if match: # We found a better language match if googlelang == "en" and booklang not in [ "en-US", "en-GB", "eng" ]: # these are all english, may need to expand this list logger.debug( "%s Google thinks [%s], we think [%s]" % (book['name'], googlelang, booklang)) gb_lang_change += 1 else: # No match anywhere, accept google language booklang = googlelang # skip if language is in ignore list if booklang not in valid_langs: logger.debug('Skipped [%s] with language %s' % (book['name'], booklang)) ignored += 1 continue ignorable = ['future', 'date', 'isbn'] if lazylibrarian.CONFIG['NO_LANG']: ignorable.append('lang') rejected = None check_status = False existing_book = None bookname = book['name'] bookid = item['id'] if not bookname: logger.debug( 'Rejecting bookid %s for %s, no bookname' % (bookid, authorname)) rejected = 'name', 'No bookname' else: bookname = replace_all(unaccented(bookname), { ':': '.', '"': '', '\'': '' }).strip() if re.match( '[^\w-]', bookname ): # remove books with bad characters in title logger.debug( "[%s] removed book for bad characters" % bookname) rejected = 'chars', 'Bad characters in bookname' if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd if book['date'] > today()[:len(book['date'])]: logger.debug( 'Rejecting %s, future publication date %s' % (bookname, book['date'])) rejected = 'future', 'Future publication date [%s]' % book[ 'date'] if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date']: logger.debug( 'Rejecting %s, no publication date' % bookname) rejected = 'date', 'No publication date' if not rejected and lazylibrarian.CONFIG['NO_ISBN']: if not isbnhead: logger.debug('Rejecting %s, no isbn' % bookname) rejected = 'isbn', 'No ISBN' if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE' match = myDB.match(cmd, (bookname.replace( '"', '""'), authorname.replace('"', '""'))) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug( 'Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorname, bookname, bookid)) rejected = 'bookid', 'Got under different bookid %s' % bookid duplicates += 1 cmd = 'SELECT AuthorName,BookName,AudioStatus,books.Status FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?' match = myDB.match(cmd, (bookid, )) if match: # we have a book with this bookid already if bookname != match[ 'BookName'] or authorname != match[ 'AuthorName']: logger.debug( 'Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorname, bookname, match['AuthorName'], match['BookName'])) else: logger.debug( 'Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorname, bookname)) check_status = True duplicates += 1 rejected = 'got', 'Already got this book in database' # Make sure we don't reject books we have got if match['Status'] in [ 'Open', 'Have' ] or match['AudioStatus'] in ['Open', 'Have']: rejected = None if rejected and rejected[0] not in ignorable: removedResults += 1 if check_status or rejected is None or ( lazylibrarian.CONFIG['IMP_IGNORE'] and rejected[0] in ignorable): # dates, isbn cmd = 'SELECT Status,AudioStatus,BookFile,AudioFile,Manual,BookAdded,BookName ' cmd += 'FROM books WHERE BookID=?' existing = myDB.match(cmd, (bookid, )) if existing: book_status = existing['Status'] audio_status = existing['AudioStatus'] if lazylibrarian.CONFIG[ 'FOUND_STATUS'] == 'Open': if book_status == 'Have' and existing[ 'BookFile']: book_status = 'Open' if audio_status == 'Have' and existing[ 'AudioFile']: audio_status = 'Open' locked = existing['Manual'] added = existing['BookAdded'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: book_status = bookstatus # new_book status, or new_author status audio_status = audiostatus added = today() locked = False if rejected: reason = rejected[1] if rejected[0] in ignorable: book_status = 'Ignored' audio_status = 'Ignored' book_ignore_count += 1 else: reason = '' if locked: locked_count += 1 else: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": booklang, "Status": book_status, "AudioStatus": audio_status, "BookAdded": added, "WorkID": '', "ScanResult": reason } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("Book found: " + bookname + " " + book['date']) if 'nocover' in book[ 'img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug( 'Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif book['img'] and book['img'].startswith( 'http'): link, success, _ = cache_img( "book", bookid, book['img'], refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug( 'Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [ ('', book['seriesNum'], cleanName(unaccented(book['series']), '&/')) ] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug( 'Updated series: %s [%s]' % (bookid, serieslist)) setSeries(serieslist, bookid) new_status = setStatus(bookid, serieslist, bookstatus) if not new_status == book_status: book_status = new_status worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug( "[%s] Added book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) added_count += 1 else: logger.debug( "[%s] Updated book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) updated_count += 1 except KeyError: pass deleteEmptySeries() logger.debug( '[%s] The Google Books API was hit %s time%s to populate book list' % (authorname, api_hits, plural(api_hits))) cmd = 'SELECT BookName, BookLink, BookDate, BookImg, BookID from books WHERE AuthorID=?' cmd += ' AND Status != "Ignored" order by BookDate DESC' lastbook = myDB.match(cmd, (authorid, )) if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookid = lastbook['BookID'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookid = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": entrystatus, "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookID": lastbookid, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) resultcount = added_count + updated_count logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Found %s locked book%s" % (locked_count, plural(locked_count))) logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored))) logger.debug("Removed %s incorrect/incomplete result%s" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s" % (duplicates, plural(duplicates))) logger.debug("Ignored %s book%s" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount))) myDB.action( 'insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info( "[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info( "[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if mags is None: threading.currentThread().name = "SEARCHALLMAG" else: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if mags is None: # backlog search searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \ IssueDate from magazines WHERE Status="Active"' ) else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select( 'SELECT Title, Regex, LastAcquired, IssueDate from magazines \ WHERE Title=? AND Status="Active"', (magazine['bookid'], )) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: threading.currentThread().name = "WEBSERVER" return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored logger.debug("Removing old magazine search results") myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Regex'] if not searchterm: dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '' } # strip accents from the magazine title for easier name-matching searchterm = unaccented_str(searchmag['Title']) if not searchterm: # unless there are no ascii characters left searchterm = searchmag['Title'] searchterm = replace_all(searchterm, dic) searchterm = re.sub('[.\-/]', ' ', searchterm) if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm}) if not searchlist: logger.warn( 'There is nothing to search for. Mark some magazines as active.' ) for book in searchlist: resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow if lazylibrarian.USE_DIRECT(): dir_resultlist, nproviders = IterateOverDirectSites( book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn( 'No direct providers are available. Check config and blocklist' ) lazylibrarian.NO_DIRECT_MSG = timenow if dir_resultlist: for item in dir_resultlist: # reformat the results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites( book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn( 'No tor providers are available. Check config and blocklist' ) lazylibrarian.NO_TOR_MSG = timenow if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders = IterateOverRSSSites() if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow: logger.warn( 'No rss providers are available. Check config and blocklist' ) lazylibrarian.NO_RSS_MSG = timenow if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item[ 'tor_date'], # may be fake date as none returned from rss torrents, only rss nzb 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("No results for magazine %s" % book['searchterm']) else: bad_name = 0 bad_date = 0 old_date = 0 rejects = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] bookid = '' for nzb in resultlist: total_nzbs += 1 bookid = nzb['bookid'] # strip accents from the magazine title for easier name-matching nzbtitle = unaccented_str(nzb['nzbtitle']) if not nzbtitle: # unless it's not a latin-1 encodable name nzbtitle = nzb['nzbtitle'] nzbtitle = nzbtitle.replace('"', '').replace( "'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize_temp = check_int( nzbsize_temp, 1000 ) # not all torrents returned by torznab have a size nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] results = myDB.match( 'SELECT * from magazines WHERE Title=?', (bookid, )) if not results: logger.debug( 'Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_name += 1 else: rejected = False maxsize = check_int( lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0) if maxsize and nzbsize > maxsize: logger.debug("Rejecting %s, too large" % nzbtitle) rejected = True if not rejected: minsize = check_int( lazylibrarian.CONFIG['REJECT_MAGMIN'], 0) if minsize and nzbsize < minsize: logger.debug("Rejecting %s, too small" % nzbtitle) rejected = True if not rejected: dic = { '.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '' } nzbtitle_formatted = replace_all(nzbtitle, dic).strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) # remove extra spaces if they're in a row if nzbtitle_formatted and nzbtitle_formatted[ 0] == '[' and nzbtitle_formatted[-1] == ']': nzbtitle_formatted = nzbtitle_formatted[1:-1] nzbtitle_exploded_temp = " ".join( nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split( ' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb has magazine title and a date/issue nr # eg The MagPI July 2015 if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check all the words in the mag title are in the nzbtitle rejected = False wlist = [] for word in nzbtitle_exploded: wlist.append(unaccented(word).lower()) for word in bookid_exploded: if unaccented(word).lower() not in wlist: rejected = True break if rejected: logger.debug( "Magazine title match failed " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine matched " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded)) rejected = True if not rejected: blocked = myDB.match( 'SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl, )) if blocked: logger.debug( "Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected: reject_list = getList( str(results['Reject']).lower()) reject_list += getList( lazylibrarian.CONFIG['REJECT_MAGS']) lower_title = unaccented( nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() if reject_list: if lazylibrarian.LOGLEVEL > 2: logger.debug('Reject: %s' % str(reject_list)) logger.debug('Title: %s' % lower_title) logger.debug('Bookid: %s' % lower_bookid) for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break regex_pass = 0 if not rejected: # Magazine names have many different styles of date # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY # MonthName DD YYYY or MonthName DD, YYYY # YYYY MM or YYYY MM DD # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn # nn YYYY issue number without "Nr" before it # issue and year as a single 6 digit string eg 222015 newdatish = "none" # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and pos: month = month2num(nzbtitle_exploded[pos - 1]) if month: if pos - 1: day = check_int( nzbtitle_exploded[pos - 2], 1) if day > 31: # probably issue number nn day = 1 else: day = 1 newdatish = "%04d-%02d-%02d" % ( year, month, day) try: _ = datetime.date(year, month, day) regex_pass = 1 break except ValueError: regex_pass = 0 pos += 1 # MonthName DD YYYY or MonthName DD, YYYY if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and (pos - 1): month = month2num( nzbtitle_exploded[pos - 2]) if month: day = check_int( nzbtitle_exploded[ pos - 1].rstrip(','), 1) try: _ = datetime.date( year, month, day) newdatish = "%04d-%02d-%02d" % ( year, month, day) regex_pass = 2 break except ValueError: regex_pass = 0 pos += 1 # YYYY MM or YYYY MM DD if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and pos + 1 < len( nzbtitle_exploded): month = check_int( nzbtitle_exploded[pos + 1], 0) if month: if pos + 2 < len( nzbtitle_exploded): day = check_int( nzbtitle_exploded[pos + 2], 1) else: day = 1 try: _ = datetime.date( year, month, day) newdatish = "%04d-%02d-%02d" % ( year, month, day) regex_pass = 3 break except ValueError: regex_pass = 0 pos += 1 # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): if nzbtitle_exploded[pos].lower() in [ "issue", "no", "nr", "vol" ]: if pos + 1 < len(nzbtitle_exploded): issue = check_int( nzbtitle_exploded[pos + 1], 0) if issue: newdatish = str( issue) # 4 == 04 == 004 if pos + 2 < len( nzbtitle_exploded): year = check_year( nzbtitle_exploded[pos + 2]) if year and year < int( datetime.date. today().year): newdatish = '0' # it's old regex_pass = 4 # Issue/No/Nr/Vol nn, YYYY else: regex_pass = 5 # Issue/No/Nr/Vol nn break pos += 1 # nn YYYY issue number without "Nr" before it if not regex_pass: pos = 1 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year: issue = check_int( nzbtitle_exploded[pos - 1], 0) if issue: newdatish = str( issue) # 4 == 04 == 004 regex_pass = 6 if year < int(datetime.date.today( ).year): newdatish = '0' # it's old break pos += 1 # issue and year as a single 6 digit string eg 222015 if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): issue = nzbtitle_exploded[pos] if issue.isdigit() and len(issue) == 6: year = int(issue[2:]) issue = int(issue[:2]) newdatish = str( issue) # 4 == 04 == 004 regex_pass = 7 if year < int( datetime.date.today().year): newdatish = '0' # it's old break pos += 1 if not regex_pass: logger.debug( 'Magazine %s not in a recognised date format.' % nzbtitle_formatted) bad_date += 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues regex_pass = 99 if rejected: rejects += 1 else: if lazylibrarian.LOGLEVEL > 2: logger.debug("regex %s [%s] %s" % (regex_pass, nzbtitle_formatted, newdatish)) # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" insert_table = "pastissues" insert_status = "Skipped" control_date = results['IssueDate'] if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # or for magazines with only an issue number, use zero if str(newdatish).isdigit(): logger.debug( 'Magazine comparing issue numbers (%s)' % newdatish) control_date = 0 elif re.match('\d+-\d\d-\d\d', str(newdatish)): start_time = time.time() start_time -= int( lazylibrarian.CONFIG['MAG_AGE'] ) * 24 * 60 * 60 # number of seconds in days if start_time < 0: # limit of unixtime (1st Jan 1970) start_time = 0 control_date = time.strftime( "%Y-%m-%d", time.localtime(start_time)) logger.debug( 'Magazine date comparing to %s' % control_date) else: logger.debug( 'Magazine unable to find comparison type [%s]' % newdatish) control_date = 0 if str(control_date).isdigit() and str( newdatish).isdigit(): # for issue numbers, check if later than last one we have comp_date = int(newdatish) - int(control_date) newdatish = "%s" % newdatish newdatish = newdatish.zfill( 4) # pad so we sort correctly elif re.match('\d+-\d\d-\d\d', str(control_date)) and \ re.match('\d+-\d\d-\d\d', str(newdatish)): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare( newdatish, control_date) else: # invalid comparison of date and issue number if re.match('\d+-\d\d-\d\d', str(control_date)): logger.debug( 'Magazine %s failed: Expecting a date' % nzbtitle_formatted) else: logger.debug( 'Magazine %s failed: Expecting issue number' % nzbtitle_formatted) bad_date += 1 newdatish = "1970-01-01" # this is our fake date for ones we can't decipher comp_date = 0 if comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date += 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug( 'This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) logger.debug('Magazine request number %s' % len(issues)) if lazylibrarian.LOGLEVEL > 2: logger.debug(str(issues)) insert_table = "wanted" insert_status = "Wanted" nzbdate = now() # when we asked for it else: logger.debug( 'This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug( 'This issue of %s is old; skipping.' % nzbtitle_formatted) old_date += 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.match( 'SELECT * from %s WHERE NZBtitle=? and NZBprov=?' % insert_table, (nzbtitle, nzbprov)) if mag_entry: if lazylibrarian.LOGLEVEL > 2: logger.debug( '%s is already in %s marked %s' % (nzbtitle, insert_table, insert_status)) else: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": newdatish, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) if lazylibrarian.LOGLEVEL > 2: logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status)) msg = 'Found %i result%s for %s. %i new,' % ( total_nzbs, plural(total_nzbs), bookid, new_date) msg += ' %i old, %i fail date, %i fail name,' % ( old_date, bad_date, bad_name) msg += ' %i rejected: %i to download' % (rejects, len(maglist)) logger.info(msg) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod(magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') else: snatch = NZBDownloadMethod(magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') if snatch: logger.info( 'Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) notify_snatch("Magazine %s from %s at %s" % (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now())) custom_notify_snatch(magazine['bookid']) scheduleJob(action='Start', target='processDir') if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete") except Exception: logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def findBestResult(resultlist, book, searchtype, source): """ resultlist: collated results from search providers book: the book we want to find searchtype: book, magazine, shortbook, audiobook etc. source: nzb, tor, rss, direct return: highest scoring match, or None if no match """ try: myDB = database.DBConnection() dictrepl = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' ' } dic = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '.', ';': '', '\'': '' } if source == 'rss': author, title = get_searchterm(book, searchtype) else: author = unaccented_str(replace_all(book['authorName'], dic)) title = unaccented_str(replace_all(book['bookName'], dic)) if book['library'] == 'AudioBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO']) maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0) minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0) auxinfo = 'AudioBook' else: # elif book['library'] == 'eBook': reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS']) maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0) minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0) auxinfo = 'eBook' if source == 'nzb': prefix = 'nzb' else: # rss and libgen return same names as torrents prefix = 'tor_' logger.debug('Searching %s %s results for best %s match' % (len(resultlist), source, auxinfo)) matches = [] for res in resultlist: resultTitle = unaccented_str( replace_all(res[prefix + 'title'], dictrepl)).strip() resultTitle = re.sub(r"\s\s+", " ", resultTitle) # remove extra whitespace Author_match = fuzz.token_set_ratio(author, resultTitle) Book_match = fuzz.token_set_ratio(title, resultTitle) logger.debug("%s author/book Match: %s/%s %s at %s" % (source.upper(), Author_match, Book_match, resultTitle, res[prefix + 'prov'])) rejected = False url = res[prefix + 'url'] if url is None: rejected = True logger.debug("Rejecting %s, no URL found" % resultTitle) if not rejected: already_failed = myDB.match( 'SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (url, )) if already_failed: logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov'])) rejected = True if not rejected and not url.startswith( 'http') and not url.startswith('magnet'): rejected = True logger.debug("Rejecting %s, invalid URL [%s]" % (resultTitle, url)) if not rejected: for word in reject_list: if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \ and word not in getList(title.lower()): rejected = True logger.debug("Rejecting %s, contains %s" % (resultTitle, word)) break size_temp = check_int( res[prefix + 'size'], 1000) # Need to cater for when this is NONE (Issue 35) size = round(float(size_temp) / 1048576, 2) if not rejected and maxsize and size > maxsize: rejected = True logger.debug("Rejecting %s, too large" % resultTitle) if not rejected and minsize and size < minsize: rejected = True logger.debug("Rejecting %s, too small" % resultTitle) if not rejected: bookid = book['bookid'] newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() if source == 'nzb': mode = res['nzbmode'] # nzb, torznab else: mode = res[ 'tor_type'] # torrent, magnet, nzb(from rss), direct controlValueDict = {"NZBurl": url} newValueDict = { "NZBprov": res[prefix + 'prov'], "BookID": bookid, "NZBdate": now(), # when we asked for it "NZBsize": size, "NZBtitle": newTitle, "NZBmode": mode, "AuxInfo": auxinfo, "Status": "Skipped" } score = (Book_match + Author_match) / 2 # as a percentage # lose a point for each unwanted word in the title so we get the closest match # but for RSS ignore anything at the end in square braces [keywords, genres etc] if source == 'rss': wordlist = getList(resultTitle.rsplit('[', 1)[0].lower()) else: wordlist = getList(resultTitle.lower()) words = [ x for x in wordlist if x not in getList(author.lower()) ] words = [x for x in words if x not in getList(title.lower())] typelist = '' if newValueDict['AuxInfo'] == 'eBook': words = [ x for x in words if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE']) ] typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) elif newValueDict['AuxInfo'] == 'AudioBook': words = [ x for x in words if x not in getList( lazylibrarian.CONFIG['AUDIOBOOK_TYPE']) ] typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE']) score -= len(words) # prioritise titles that include the ebook types we want # add more points for booktypes nearer the left in the list # eg if epub, mobi, pdf add 3 points if epub found, 2 for mobi, 1 for pdf booktypes = [x for x in wordlist if x in typelist] if booktypes: typelist = list(reversed(typelist)) for item in booktypes: for i in [ i for i, x in enumerate(typelist) if x == item ]: score += i + 1 # score += len(booktypes) matches.append([ score, resultTitle, newValueDict, controlValueDict, res['priority'] ]) if matches: highest = max(matches, key=lambda s: (s[0], s[4])) score = highest[0] resultTitle = highest[1] newValueDict = highest[2] # controlValueDict = highest[3] dlpriority = highest[4] if score < int(lazylibrarian.CONFIG['MATCH_RATIO']): logger.info( 'Nearest match (%s%%): %s using %s search for %s %s' % (score, resultTitle, searchtype, book['authorName'], book['bookName'])) else: logger.info( 'Best match (%s%%): %s using %s search, %s priority %s' % (score, resultTitle, searchtype, newValueDict['NZBprov'], dlpriority)) return highest else: logger.debug("No %s found for [%s] using searchtype %s" % (source, book["searchterm"], searchtype)) return None except Exception: logger.error('Unhandled exception in findBestResult: %s' % traceback.format_exc())
def _sendPushover(message=None, event=None, pushover_apitoken=None, pushover_keys=None, pushover_device=None, notificationType=None, method=None, force=False): if not lazylibrarian.CONFIG['USE_PUSHOVER'] and not force: return False if pushover_apitoken is None: pushover_apitoken = lazylibrarian.CONFIG['PUSHOVER_APITOKEN'] if pushover_keys is None: pushover_keys = lazylibrarian.CONFIG['PUSHOVER_KEYS'] if pushover_device is None: pushover_device = lazylibrarian.CONFIG['PUSHOVER_DEVICE'] if method is None: method = 'POST' if notificationType is None: testMessage = True uri = "/1/users/validate.json" logger.debug( "Testing Pushover authentication and retrieving the device list." ) else: testMessage = False uri = "/1/messages.json" logger.debug("Pushover event: " + str(event)) logger.debug("Pushover message: " + str(message)) logger.debug("Pushover api: " + str(pushover_apitoken)) logger.debug("Pushover keys: " + str(pushover_keys)) logger.debug("Pushover device: " + str(pushover_device)) logger.debug("Pushover notification type: " + str(notificationType)) http_handler = HTTPSConnection('api.pushover.net') if PY2: message = message.encode(lazylibrarian.SYS_ENCODING) event = event.encode(lazylibrarian.SYS_ENCODING) try: data = { 'token': pushover_apitoken, 'user': pushover_keys, 'title': event, 'message': message, 'device': pushover_device, 'priority': lazylibrarian.CONFIG['PUSHOVER_PRIORITY'] } http_handler.request( method, uri, headers={'Content-type': "application/x-www-form-urlencoded"}, body=urlencode(data)) pass except Exception as e: logger.error(str(e)) return False response = http_handler.getresponse() request_body = response.read() request_status = response.status logger.debug("Pushover Response: %s" % request_status) logger.debug("Pushover Reason: %s" % response.reason) if request_status == 200: if testMessage: logger.debug(request_body) if 'devices' in request_body: return "Devices: %s" % request_body.split('[')[1].split( ']')[0] else: return request_body else: return True elif 400 <= request_status < 500: logger.error("Pushover request failed: %s" % str(request_body)) return False else: logger.error("Pushover notification failed: %s" % request_status) return False
def _get_auth(): logger.debug('Deluge: Authenticating...') global delugeweb_auth, delugeweb_url, headers delugeweb_auth = {} delugeweb_host = lazylibrarian.CONFIG['DELUGE_HOST'] delugeweb_url_base = lazylibrarian.CONFIG['DELUGE_URL_BASE'] delugeweb_port = check_int(lazylibrarian.CONFIG['DELUGE_PORT'], 0) if not delugeweb_host or not delugeweb_port: logger.error('Invalid delugeweb host or port, check your config') return None delugeweb_password = lazylibrarian.CONFIG['DELUGE_PASS'] if not delugeweb_host.startswith( "http://") and not delugeweb_host.startswith("https://"): delugeweb_host = 'http://%s' % delugeweb_host if delugeweb_host.endswith('/'): delugeweb_host = delugeweb_host[:-1] if delugeweb_url_base.endswith('/'): delugeweb_url_base = delugeweb_url_base[:-1] delugeweb_host = "%s:%s" % (delugeweb_host, delugeweb_port) delugeweb_url = delugeweb_host + delugeweb_url_base + '/json' post_data = json.dumps({ "method": "auth.login", "params": [delugeweb_password], "id": 1 }) if PY2: post_data = post_data.encode(lazylibrarian.SYS_ENCODING) try: response = requests.post(delugeweb_url, data=post_data, cookies=delugeweb_auth, headers=headers) # , verify=TORRENT_VERIFY_CERT) except Exception as err: logger.debug('Deluge %s: auth.login returned %s' % (type(err).__name__, str(err))) delugeweb_auth = {} return None auth = json.loads(response.text)["result"] if auth is False: logger.debug('Deluge: auth.login returned False') delugeweb_auth = {} return None delugeweb_auth = response.cookies post_data = json.dumps({"method": "web.connected", "params": [], "id": 10}) if PY2: post_data = post_data.encode(lazylibrarian.SYS_ENCODING) try: response = requests.post(delugeweb_url, data=post_data, cookies=delugeweb_auth, headers=headers) # , verify=TORRENT_VERIFY_CERT) except Exception as err: logger.debug('Deluge %s: web.connected returned %s' % (type(err).__name__, str(err))) delugeweb_auth = {} return None connected = json.loads(response.text)['result'] if not connected: post_data = json.dumps({ "method": "web.get_hosts", "params": [], "id": 11 }) if PY2: post_data = post_data.encode(lazylibrarian.SYS_ENCODING) try: response = requests.post(delugeweb_url, data=post_data, cookies=delugeweb_auth, headers=headers) # , verify=TORRENT_VERIFY_CERT) except Exception as err: logger.debug('Deluge %s: web.get_hosts returned %s' % (type(err).__name__, str(err))) delugeweb_auth = {} return None delugeweb_hosts = json.loads(response.text)['result'] if len(delugeweb_hosts) == 0: logger.error('Deluge: WebUI does not contain daemons') delugeweb_auth = {} return None post_data = json.dumps({ "method": "web.connect", "params": [delugeweb_hosts[0][0]], "id": 11 }) if PY2: post_data = post_data.encode(lazylibrarian.SYS_ENCODING) try: _ = requests.post(delugeweb_url, data=post_data, cookies=delugeweb_auth, headers=headers) # , verify=TORRENT_VERIFY_CERT) except Exception as err: logger.debug('Deluge %s: web.connect returned %s' % (type(err).__name__, str(err))) delugeweb_auth = {} return None post_data = json.dumps({ "method": "web.connected", "params": [], "id": 10 }) if PY2: post_data = post_data.encode(lazylibrarian.SYS_ENCODING) try: response = requests.post(delugeweb_url, data=post_data, cookies=delugeweb_auth, headers=headers) # , verify=TORRENT_VERIFY_CERT) except Exception as err: logger.debug('Deluge %s: web.connected returned %s' % (type(err).__name__, str(err))) delugeweb_auth = {} return None connected = json.loads(response.text)['result'] if not connected: logger.error('Deluge: WebUI could not connect to daemon') delugeweb_auth = {} return None return auth
def processDir(): # rename this thread threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) #TODO - try exception on os.listdir - it throws debug level #exception if dir doesn't exist - bloody hard to catch try: downloads = os.listdir(processpath) except OSError: logger.error('Could not access [%s] directory ' % processpath) myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if snatched is None: logger.info('No books are snatched. Nothing to process.') elif downloads is None: logger.info('No downloads are found. Nothing to process.') else: ppcount = 0 for book in snatched: if book['NZBtitle'] in downloads: pp_path = os.path.join(processpath, book['NZBtitle']) logger.info('Found folder %s.' % pp_path) data = myDB.select("SELECT * from books WHERE BookID='%s'" % book['BookID']) for metadata in data: authorname = metadata['AuthorName'] authorimg = metadata['AuthorLink'] bookname = metadata['BookName'] bookdesc = metadata['BookDesc'] bookisbn = metadata['BookIsbn'] bookrate = metadata['BookRate'] bookimg = metadata['BookImg'] bookpage = metadata['BookPages'] booklink = metadata['BookLink'] bookdate = metadata['BookDate'] booklang = metadata['BookLang'] bookpub = metadata['BookPub'] dest_path = authorname + '/' + bookname dic = { '<': '', '>': '', '=': '', '?': '', '"': '', ',': '', '*': '', ':': '', ';': '' } dest_path = formatter.latinToAscii( formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname) if processBook: ppcount = ppcount + 1 # If you use auto add by Calibre you need the book in a single directory, not nested #So take the file you Copied/Moved to Dest_path and copy it to a Calibre auto add folder. processAutoAdd(dest_path) # try image processIMG(dest_path, bookimg) # try metadata processOPF(dest_path, authorname, bookname, bookisbn, book['BookID'], bookpub, bookdate, bookdesc, booklang) #update nzbs controlValueDict = {"NZBurl": book['NZBurl']} newValueDict = {"Status": "Success"} myDB.upsert("wanted", newValueDict, controlValueDict) #update books controlValueDict = {"BookID": book['BookID']} newValueDict = {"Status": "Have"} myDB.upsert("books", newValueDict, controlValueDict) #update authors query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND Status="Have"' % authorname countbooks = myDB.action(query).fetchone() havebooks = int(countbooks[0]) controlValueDict = {"AuthorName": authorname} newValueDict = {"HaveBooks": havebooks} myDB.upsert("authors", newValueDict, controlValueDict) logger.info('Successfully processed: %s - %s' % (authorname, bookname)) else: logger.error( 'Postprocessing for %s has failed. Warning - AutoAdd will be repeated' % bookname) if ppcount: logger.info('%s books are downloaded and processed.' % ppcount) logger.debug(' - Completed all snatched/downloaded files')
def magazineScan(title=None): lazylibrarian.MAG_UPDATE = 1 onetitle = title # noinspection PyBroadException try: myDB = database.DBConnection() mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER'] mag_path = mag_path.split('$')[0] if lazylibrarian.CONFIG['MAG_RELATIVE']: mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path) if PY2: mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle: mags = myDB.select('select * from Issues') # check all the issues are still there, delete entry if not for mag in mags: title = mag['Title'] issuedate = mag['IssueDate'] issuefile = mag['IssueFile'] if issuefile and not os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile=?', (issuefile,)) logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "LatestCover": None, "IssueStatus": "Skipped" # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug('Magazine %s details reset' % title) mags = myDB.select('SELECT * from magazines') # now check the magazine titles and delete any with no issues for mag in mags: title = mag['Title'] count = myDB.select('SELECT COUNT(Title) as counter FROM issues WHERE Title=?', (title,)) issues = count[0]['counter'] if not issues: logger.debug('Magazine %s deleted as no issues found' % title) myDB.action('DELETE from magazines WHERE Title=?', (title,)) if onetitle: match = myDB.match('SELECT LatestCover from magazines where Title=?', (onetitle,)) if match: mag_path = os.path.dirname(match['LatestCover']) logger.info(' Checking [%s] for magazines' % mag_path) matchString = '' for char in lazylibrarian.CONFIG['MAG_DEST_FILE']: matchString = matchString + '\\' + char # massage the MAG_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type match = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']' title_pattern = re.compile(match, re.VERBOSE) match = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']' date_pattern = re.compile(match, re.VERBOSE) # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file # to utf-8 and fails (eg scandinavian characters in ascii 8bit) for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)): rootdir = makeUnicode(rootdir) filenames = [makeUnicode(item) for item in filenames] for fname in filenames: # maybe not all magazines will be pdf? if is_valid_booktype(fname, booktype='mag'): issuedate = '' # noinspection PyBroadException try: match = title_pattern.match(fname) if match: title = match.group("title") issuedate = match.group("issuedate") if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Title pattern [%s][%s]" % (title, issuedate)) match = True else: logger.debug("Title pattern match failed for [%s]" % fname) except Exception: match = False if not match: # noinspection PyBroadException try: match = date_pattern.match(fname) if match: issuedate = match.group("issuedate") title = os.path.basename(rootdir) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Date pattern [%s][%s]" % (title, issuedate)) match = True else: logger.debug("Date pattern match failed for [%s]" % fname) except Exception: match = False if not match: title = os.path.basename(rootdir) issuedate = '' dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ', '#': '# '} if issuedate: exploded = replace_all(issuedate, dic).strip() # remove extra spaces if they're in a row exploded = " ".join(exploded.split()) exploded = exploded.split(' ') regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(exploded) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("Date regex [%s][%s][%s]" % (regex_pass, issuedate, year)) if not regex_pass: issuedate = '' if not issuedate: exploded = replace_all(fname, dic).strip() exploded = " ".join(exploded.split()) exploded = exploded.split(' ') regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(exploded) if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates: logger.debug("File regex [%s][%s][%s]" % (regex_pass, issuedate, year)) if not regex_pass: issuedate = '' if not issuedate: logger.warn("Invalid name format for [%s]" % fname) continue issuefile = os.path.join(rootdir, fname) # full path to issue.pdf mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat(datetime.date.fromtimestamp(mtime)) if lazylibrarian.CONFIG['MAG_RENAME']: filedate = issuedate if issuedate and issuedate.isdigit(): if len(issuedate) == 8: if check_year(issuedate[:4]): filedate = 'Issue %d %s' % (int(issuedate[4:]), issuedate[:4]) else: filedate = 'Vol %d Iss %d' % (int(issuedate[:4]), int(issuedate[4:])) elif len(issuedate) == 12: filedate = 'Vol %d Iss %d %s' % (int(issuedate[4:8]), int(issuedate[8:]), issuedate[:4]) else: filedate = str(issuedate).zfill(4) extn = os.path.splitext(fname)[1] newfname = lazylibrarian.CONFIG['MAG_DEST_FILE'].replace('$Title', title).replace( '$IssueDate', filedate) newfname = newfname + extn if newfname and newfname != fname: logger.debug("Rename %s -> %s" % (fname, newfname)) newissuefile = os.path.join(rootdir, newfname) newissuefile = safe_move(issuefile, newissuefile) if os.path.exists(issuefile.replace(extn, '.jpg')): safe_move(issuefile.replace(extn, '.jpg'), newissuefile.replace(extn, '.jpg')) if os.path.exists(issuefile.replace(extn, '.opf')): safe_move(issuefile.replace(extn, '.opf'), newissuefile.replace(extn, '.opf')) issuefile = newissuefile logger.debug("Found %s Issue %s" % (title, issuedate)) controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.match( 'SELECT LastAcquired, IssueDate, MagazineAdded from magazines WHERE Title=?', (title,)) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Reject": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "LatestCover": None, "IssueDate": None, "IssueStatus": "Skipped", "Regex": None } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) magissuedate = None magazineadded = None maglastacquired = None else: maglastacquired = mag_entry['LastAcquired'] magissuedate = mag_entry['IssueDate'] magazineadded = mag_entry['MagazineAdded'] magissuedate = str(magissuedate).zfill(4) issuedate = str(issuedate).zfill(4) # for sorting issue numbers # is this issue already in the database? issue_id = create_id("%s %s" % (title, issuedate)) iss_entry = myDB.match('SELECT Title,IssueFile from issues WHERE Title=? and IssueDate=?', (title, issuedate)) new_opf = False if not iss_entry or iss_entry['IssueFile'] != issuefile: new_opf = True # new entry or name changed if not iss_entry: logger.debug("Adding issue %s %s" % (title, issuedate)) else: logger.debug("Updating issue %s %s" % (title, issuedate)) controlValueDict = {"Title": title, "IssueDate": issuedate} newValueDict = { "IssueAcquired": iss_acquired, "IssueID": issue_id, "IssueFile": issuefile } myDB.upsert("Issues", newValueDict, controlValueDict) ignorefile = os.path.join(os.path.dirname(issuefile), '.ll_ignore') with open(ignorefile, 'a'): os.utime(ignorefile, None) createMagCover(issuefile) lazylibrarian.postprocess.processMAGOPF(issuefile, title, issuedate, issue_id, overwrite=new_opf) # see if this issues date values are useful controlValueDict = {"Title": title} if not mag_entry: # new magazine, this is the only issue newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "LatestCover": os.path.splitext(issuefile)[0] + '.jpg', "IssueDate": issuedate, "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) else: # Set magazine_issuedate to issuedate of most recent issue we have # Set latestcover to most recent issue cover # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps newValueDict = {"IssueStatus": "Open"} if not magazineadded or iss_acquired < magazineadded: newValueDict["MagazineAdded"] = iss_acquired if not maglastacquired or iss_acquired > maglastacquired: newValueDict["LastAcquired"] = iss_acquired if not magissuedate or issuedate >= magissuedate: newValueDict["IssueDate"] = issuedate newValueDict["LatestCover"] = os.path.splitext(issuefile)[0] + '.jpg' myDB.upsert("magazines", newValueDict, controlValueDict) if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle: magcount = myDB.match("select count(*) from magazines") isscount = myDB.match("select count(*) from issues") logger.info("Magazine scan complete, found %s magazine%s, %s issue%s" % (magcount['count(*)'], plural(magcount['count(*)']), isscount['count(*)'], plural(isscount['count(*)']))) else: logger.info("Magazine scan complete") lazylibrarian.MAG_UPDATE = 0 except Exception: lazylibrarian.MAG_UPDATE = 0 logger.error('Unhandled exception in magazineScan: %s' % traceback.format_exc())