def getProgress(hashid): logger.debug('getProgress(%s)' % hashid) hashid = hashid.lower() qbclient = qbittorrentclient() if not len(qbclient.cookiejar): logger.debug("Failed to login to qBittorrent") return False # noinspection PyProtectedMember torrentList = qbclient._get_list() if torrentList: for torrent in torrentList: if torrent['hash'].lower() == hashid: if 'state' in torrent: state = torrent['state'] else: state = '' if 'progress' in torrent: try: progress = int(100 * float(torrent['progress'])) except ValueError: progress = 0 else: progress = 0 return progress, state return -1, ''
def SABnzbd(title=None, nzburl=None): # Changes https to http HOST = lazylibrarian.SAB_HOST + ":" + lazylibrarian.SAB_PORT if not str(HOST)[:4] == "http": HOST = "http://" + HOST params = {} # Login for user params["mode"] = "addurl" params["name"] = nzburl # Checks that all are defined and nothing is missing if lazylibrarian.SAB_USER: params["ma_username"] = lazylibrarian.SAB_USER if lazylibrarian.SAB_PASS: params["ma_password"] = lazylibrarian.SAB_PASS if lazylibrarian.SAB_API: params["apikey"] = lazylibrarian.SAB_API if lazylibrarian.SAB_CAT: params["cat"] = lazylibrarian.SAB_CAT if lazylibrarian.USENET_RETENTION: params["maxage"] = lazylibrarian.USENET_RETENTION ## FUTURE-CODE # if lazylibrarian.SAB_PRIO: # params["priority"] = lazylibrarian.SAB_PRIO # if lazylibrarian.SAB_PP: # params["script"] = lazylibrarian.SAB_SCRIPT # Encodes parameters URL = HOST + "/api?" + urllib.parse.urlencode(params) # to debug because of api logger.debug('Request url for <a href="%s">SABnzbd</a>' % URL) try: request = urllib.request.urlopen(URL) except (EOFError, IOError) as e: logger.error("Unable to connect to SAB with URL: %s" % url) return False except httplib.InvalidURL as e: logger.error("Invalid SAB host, check your config. Current host: %s" % HOST) return False result = request.read().strip() if not result: log.error("SABnzbd didn't return anything.") return False logger.debug("Result text from SAB: " + result) if result == "ok": logger.info("NZB sent to SAB successfully.") return True elif result == "Missing authentication": logger.error("Incorrect username/password.") return False else: logger.error("Unknown error: " + result) return False
def addKeyword(self, type=None, title=None, frequency=None, **args): myDB = database.DBConnection() if type == 'magazine': if len(title) == 0: raise cherrypy.HTTPRedirect("config") else: controlValueDict = {"Title": title} newValueDict = { "Frequency": frequency, "Regex": None, "Status": "Active", "MagazineAdded": formatter.today(), "IssueStatus": "Wanted" } myDB.upsert("magazines", newValueDict, controlValueDict) mags = [] mags.append({"bookid": title}) books=False if (lazylibrarian.USE_NZB): threading.Thread(target=search_nzb_book, args=[books, mags]).start() if (lazylibrarian.USE_TOR): threading.Thread(target=search_tor_book, args=[books, mags]).start() logger.debug("Searching for magazine with title: " + str(title)); raise cherrypy.HTTPRedirect("magazines")
def cache_img(img_type, img_ID, img_url, refresh=False): """ Cache the image from the given filename or URL in the local images cache linked to the id, return the link to the cached file, success, was_in_cache or error message, False, False if failed to cache """ if img_type not in ['book', 'author', 'magazine']: logger.error('Internal error in cache_img, img_type = [%s]' % img_type) img_type = 'book' cachefile = os.path.join(lazylibrarian.CACHEDIR, img_type, img_ID + '.jpg') link = 'cache/%s/%s.jpg' % (img_type, img_ID) if os.path.isfile(cachefile) and not refresh: # overwrite any cached image if lazylibrarian.LOGLEVEL & lazylibrarian.log_cache: logger.debug("Cached %s image exists %s" % (img_type, cachefile)) return link, True, True if img_url.startswith('http'): result, success = fetchURL(img_url, raw=True) if success: try: with open(cachefile, 'wb') as img: img.write(result) return link, True, False except Exception as e: logger.error("%s writing image to %s, %s" % (type(e).__name__, cachefile, str(e))) return str(e), False, False return result, False, False else: try: shutil.copyfile(img_url, cachefile) return link, True, True except Exception as e: logger.error("%s copying image to %s, %s" % (type(e).__name__, cachefile, str(e))) return str(e), False, False
def NewzNabPlus(book=None, host=None, api_key=None, searchType=None, searchMode=None): # logger.info('[NewzNabPlus] Searching term [%s] for author [%s] and title [%s] on host [%s] for a [%s] item' % (book['searchterm'], book['authorName'], book['bookName'], host, searchType)) logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % (searchType, host, searchMode, api_key, str(book))) results = [] params = ReturnSearchTypeStructure(api_key, book, searchType, searchMode) if not str(host)[:4] == "http": host = 'http://' + host URL = host + '/api?' + urllib.urlencode(params) try: request = urllib2.Request(URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', common.USER_AGENT) opener = urllib2.build_opener(SimpleCache.CacheHandler(".ProviderCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) try: data = ElementTree.parse(resp) except (urllib2.URLError, IOError, EOFError), e: logger.warn('Error fetching data from %s: %s' % (host, e)) data = None except Exception, e: logger.error("Error 403 opening url %s" % e) data = None
def _moveBook(self, **kwargs): if 'id' not in kwargs: self.data = 'Missing parameter: id' return if 'toid' not in kwargs: self.data = 'Missing parameter: toid' return try: myDB = database.DBConnection() authordata = myDB.match( 'SELECT AuthorName, AuthorLink from authors WHERE AuthorID="%s"' % kwargs['toid']) if not authordata: self.data = "No destination author [%s] in the database" % kwargs['toid'] else: bookdata = myDB.match( 'SELECT AuthorID, BookName from books where BookID="%s"' % kwargs['id']) if not bookdata: self.data = "No bookid [%s] in the database" % kwargs['id'] else: controlValueDict = {'BookID': kwargs['id']} newValueDict = { 'AuthorID': kwargs['toid'], 'AuthorName': authordata[0], 'AuthorLink': authordata[1] } myDB.upsert("books", newValueDict, controlValueDict) update_totals(bookdata[0]) # we moved from here update_totals(kwargs['toid']) # to here self.data = "Moved book [%s] to [%s]" % (bookdata[1], authordata[0]) logger.debug(self.data) except Exception as e: self.data = str(e)
def _moveBooks(self, **kwargs): if 'fromname' not in kwargs: self.data = 'Missing parameter: fromname' return if 'toname' not in kwargs: self.data = 'Missing parameter: toname' return try: myDB = database.DBConnection() fromhere = myDB.select( 'SELECT bookid,authorid from books where authorname="%s"' % kwargs['fromname']) tohere = myDB.match( 'SELECT authorid, authorlink from authors where authorname="%s"' % kwargs['toname']) if not len(fromhere): self.data = "No books by [%s] in the database" % kwargs['fromname'] else: if not tohere: self.data = "No destination author [%s] in the database" % kwargs['toname'] else: myDB.action( 'UPDATE books SET authorid="%s", authorname="%s", authorlink="%s" where authorname="%s"' % (tohere[0], kwargs['toname'], tohere[1], kwargs['fromname'])) self.data = "Moved %s books from %s to %s" % (len(fromhere), kwargs['fromname'], kwargs['toname']) update_totals(fromhere[0][1]) # we moved from here update_totals(tohere[0]) # to here logger.debug(self.data) except Exception as e: self.data = str(e)
def setSeedRatio(result): if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Deluge: Setting seed ratio') if not any(delugeweb_auth): _get_auth() timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) try: ratio = None if result['ratio']: ratio = result['ratio'] if not ratio: return True post_json = {"method": "core.set_torrent_stop_at_ratio", "params": [result['hash'], True], "id": 5} response = requests.post(delugeweb_url, json=post_json, cookies=delugeweb_auth, verify=deluge_verify_cert, headers=headers, timeout=timeout) if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Status code: %s' % response.status_code) logger.debug(response.text) post_json = {"method": "core.set_torrent_stop_ratio", "params": [result['hash'], float(ratio)], "id": 6} response = requests.post(delugeweb_url, json=post_json, cookies=delugeweb_auth, verify=deluge_verify_cert, headers=headers, timeout=timeout) if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Status code: %s' % response.status_code) logger.debug(response.text) return not response.json()['error'] except Exception as err: logger.error('Deluge %s: Setting seedratio failed: %s' % (type(err).__name__, str(err))) return False
def getServer(): host = lazylibrarian.CONFIG['RTORRENT_HOST'] if not host: logger.error("rtorrent error: No host found, check your config") return False if not host.startswith("http://") and not host.startswith("https://"): host = 'http://' + host if host.endswith('/'): host = host[:-1] if lazylibrarian.CONFIG['RTORRENT_USER']: user = lazylibrarian.CONFIG['RTORRENT_USER'] password = lazylibrarian.CONFIG['RTORRENT_PASS'] parts = host.split('://') host = parts[0] + '://' + user + ':' + password + '@' + parts[1] try: socket.setdefaulttimeout(20) # so we don't freeze if server is not there server = xmlrpc_client.ServerProxy(host) result = server.system.client_version() socket.setdefaulttimeout(None) # reset timeout logger.debug("rTorrent client version = %s" % result) except Exception as e: socket.setdefaulttimeout(None) # reset timeout if failed logger.error("xmlrpc_client error: %s" % repr(e)) return False if result: return server else: logger.warn('No response from rTorrent server') return False
def getTorrentFiles(torrentid): if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Deluge: Get torrent files') res = getTorrentStatus(torrentid, "files") # type: dict if res: return res['result']['files'] return ''
def _add_torrent_file(result): if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Deluge: Adding file') if not any(delugeweb_auth): _get_auth() timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) try: # content is torrent file contents that needs to be encoded to base64 post_json = {"method": "core.add_torrent_file", "params": [result['name'] + '.torrent', b64encode(result['content']), {}], "id": 2} response = requests.post(delugeweb_url, json=post_json, cookies=delugeweb_auth, verify=deluge_verify_cert, headers=headers, timeout=timeout) if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Status code: %s' % response.status_code) logger.debug(response.text) result['hash'] = response.json()['result'] msg = 'Deluge: Response was %s' % result['hash'] if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug(msg) if 'was None' in msg: logger.error('Deluge: Adding torrent file failed: Is the WebUI running?') return response.json()['result'] except Exception as err: logger.error('Deluge %s: Adding torrent file failed: %s' % (type(err).__name__, str(err))) if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: formatted_lines = traceback.format_exc().splitlines() logger.debug('; '.join(formatted_lines)) return False
def export_CSV(search_dir=None, status="Wanted", library='eBook'): """ Write a csv file to the search_dir containing all books marked as "Wanted" """ # noinspection PyBroadException try: if not search_dir: msg = "Alternate Directory not configured" logger.warn(msg) return msg elif not os.path.isdir(search_dir): msg = "Alternate Directory [%s] not found" % search_dir logger.warn(msg) return msg elif not os.access(search_dir, os.W_OK | os.X_OK): msg = "Alternate Directory [%s] not writable" % search_dir logger.warn(msg) return msg csvFile = os.path.join(search_dir, "%s %s - %s.csv" % (status, library, now().replace(':', '-'))) myDB = database.DBConnection() cmd = 'SELECT BookID,AuthorName,BookName,BookIsbn,books.AuthorID FROM books,authors ' if library == 'eBook': cmd += 'WHERE books.Status=? and books.AuthorID = authors.AuthorID' else: cmd += 'WHERE AudioStatus=? and books.AuthorID = authors.AuthorID' find_status = myDB.select(cmd, (status,)) if not find_status: msg = "No %s marked as %s" % (library, status) logger.warn(msg) else: count = 0 if PY2: fmode = 'wb' else: fmode = 'w' with open(csvFile, fmode) as csvfile: csvwrite = writer(csvfile, delimiter=',', quotechar='"', quoting=QUOTE_MINIMAL) # write headers, change AuthorName BookName BookIsbn to match import csv names csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID']) for resulted in find_status: logger.debug("Exported CSV for %s %s" % (library, resulted['BookName'])) row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'], resulted['BookIsbn'], resulted['AuthorID']]) if PY2: csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row]) else: csvwrite.writerow([("%s" % s) for s in row]) count += 1 msg = "CSV exported %s %s%s to %s" % (count, library, plural(count), csvFile) logger.info(msg) return msg except Exception: msg = 'Unhandled exception in exportCSV: %s' % traceback.format_exc() logger.error(msg) return msg
def getTorrentFolder(torrentid): if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Deluge: Get torrent folder name') res = getTorrentStatus(torrentid, "name") # type: dict if res: return res['result']['name'] return ''
def IterateOverNewzNabSites(book=None, searchType=None): """ Purpose of this function is to read the config file, and loop through all active NewsNab+ sites and return the compiled results list from all sites back to the caller We get called with book[] and searchType of "book", "mag", "general" etc """ resultslist = [] providers = 0 myDB = database.DBConnection() for provider in lazylibrarian.NEWZNAB_PROV: if (provider['ENABLED']): provider = get_capabilities(provider) providers += 1 logger.debug('[IterateOverNewzNabSites] - %s' % provider['HOST']) resultslist += NewzNabPlus(book, provider, searchType, "nzb") for provider in lazylibrarian.TORZNAB_PROV: if (provider['ENABLED']): provider = get_capabilities(provider) providers += 1 logger.debug('[IterateOverTorzNabSites] - %s' % provider['HOST']) resultslist += NewzNabPlus(book, provider, searchType, "torznab") return resultslist, providers
def getFolder(hash): logger.debug('getFolder(%s)' % hash) qbclient = qbittorrentclient() # Get Active Directory from settings settings = qbclient._get_settings() active_dir = settings['temp_path'] completed_dir = settings['save_path'] if not active_dir: logger.error( 'Could not get "Keep incomplete torrents in:" directory from QBitTorrent settings, please ensure it is set') return None # Get Torrent Folder Name torrent_folder = qbclient.get_savepath(hash) # If there's no folder yet then it's probably a magnet, try until folder is populated if torrent_folder == active_dir or not torrent_folder: tries = 1 while (torrent_folder == active_dir or torrent_folder is None) and tries <= 10: tries += 1 time.sleep(6) torrent_folder = qbclient.get_savepath(hash) if torrent_folder == active_dir or not torrent_folder: torrent_folder = qbclient.get_savepath(hash) return torrent_folder else: if 'windows' not in platform.system().lower(): torrent_folder = torrent_folder.replace('\\', '/') return os.path.basename(os.path.normpath(torrent_folder))
def sendNZB(nzb): addToTop = False nzbgetXMLrpc = "%(username)s:%(password)s@%(host)s/xmlrpc" if lazylibrarian.NZBGET_HOST is None: logger.error(u"No NZBget host found in configuration. Please configure it.") return False if lazylibrarian.NZBGET_HOST.startswith("https://"): nzbgetXMLrpc = "https://" + nzbgetXMLrpc lazylibrarian.NZBGET_HOST.replace("https://", "", 1) else: nzbgetXMLrpc = "http://" + nzbgetXMLrpc lazylibrarian.NZBGET_HOST.replace("http://", "", 1) url = nzbgetXMLrpc % { "host": lazylibrarian.NZBGET_HOST, "username": lazylibrarian.NZBGET_USER, "password": lazylibrarian.NZBGET_PASS, } nzbGetRPC = xmlrpclib.ServerProxy(url) try: if nzbGetRPC.writelog("INFO", "lazylibrarian connected to drop of %s any moment now." % (nzb.name + ".nzb")): logger.debug(u"Successfully connected to NZBget") else: logger.info(u"Successfully connected to NZBget, but unable to send a message" % (nzb.name + ".nzb")) except httplib.socket.error, e: logger.error( u"Please check your NZBget host and port (if it is running). NZBget is not responding to this combination" ) return False
def export_CSV(search_dir=None, status="Wanted"): """ Write a csv file to the search_dir containing all books marked as "Wanted" """ if not search_dir or os.path.isdir(search_dir) is False: logger.warn("Please check Alternate Directory setting") return False csvFile = os.path.join(search_dir, "%s - %s.csv" % (status, now().replace(':', '-'))) myDB = database.DBConnection() find_status = myDB.select('SELECT * FROM books WHERE Status = "%s"' % status) if not find_status: logger.warn(u"No books marked as %s" % status) else: count = 0 with open(csvFile, 'wb') as csvfile: csvwrite = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # write headers, change AuthorName BookName BookIsbn to match import csv names (Author, Title, ISBN10) csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID']) for resulted in find_status: logger.debug(u"Exported CSV for book %s" % resulted['BookName']) row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'], resulted['BookIsbn'], resulted['AuthorID']]) csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row]) count = count + 1 logger.info(u"CSV exported %s book%s to %s" % (count, plural(count), csvFile))
def search_nzb_book(books=None, mags=None): if not(lazylibrarian.USE_NZB): logger.debug('NZB Search is disabled') return # rename this thread threading.currentThread().name = "SEARCHNZBBOOKS" myDB = database.DBConnection() searchlist = [] #searchlist1 = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') # Clear cache providercache = os.path.join(lazylibrarian.DATADIR, ".ProviderCache") if os.path.exists(providercache): try: shutil.rmtree(providercache) os.mkdir(providercache) except OSError, e: logger.info('Failed to clear cache: ' + str(e)) #if os.path.exists(".ProviderCache"): # for f in os.listdir(".ProviderCache"): # os.unlink("%s/%s" % (".ProviderCache", f)) # Clearing throttling timeouts t = SimpleCache.ThrottlingProcessor() t.lastRequestTime.clear()
def NZBDownloadMethod(bookid=None, nzbprov=None, nzbtitle=None, nzburl=None): myDB = database.DBConnection() if (lazylibrarian.NZB_DOWNLOADER_SABNZBD and lazylibrarian.SAB_HOST) and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE: download = sabnzbd.SABnzbd(nzbtitle, nzburl) elif ( lazylibrarian.NZB_DOWNLOADER_NZBGET and lazylibrarian.NZBGET_HOST ) and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE: headers = {"User-Agent": USER_AGENT} data = request.request_content(url=nzburl, headers=headers) nzb = classes.NZBDataSearchResult() nzb.extraInfo.append(data) nzb.name = nzbtitle nzb.url = nzburl download = nzbget.sendNZB(nzb) elif lazylibrarian.NZB_DOWNLOADER_BLACKHOLE: try: req = urllib2.Request(nzburl) if lazylibrarian.PROXY_HOST: req.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) req.add_header("User-Agent", USER_AGENT) nzbfile = urllib2.urlopen(req, timeout=90).read() except (urllib2.URLError, socket.timeout) as e: logger.warn("Error fetching nzb from url: %s, %s" % (nzburl, e)) nzbfile = False if nzbfile: nzbname = str(nzbtitle) + ".nzb" nzbpath = os.path.join(lazylibrarian.NZB_BLACKHOLEDIR, nzbname) try: with open(nzbpath, "w") as f: f.write(nzbfile) logger.debug("NZB file saved to: " + nzbpath) download = True # try: # os.chmod(nzbpath, 0777) # except Exception, e: # logger.error("Could not chmod path: " + str(nzbpath)) except Exception as e: logger.error("%s not writable, NZB not saved. Error: %s" % (nzbpath, e)) download = False else: logger.warn("No NZB download method is enabled, check config.") return False if download: logger.debug("Nzbfile has been downloaded from " + str(nzburl)) myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid) myDB.action('UPDATE wanted SET status = "Snatched" WHERE NZBurl="%s"' % nzburl) return True else: logger.error(u'Failed to download nzb @ <a href="%s">%s</a>' % (nzburl, nzbprov)) myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % nzburl) return False
def NZBMatrix(book=None): results = [] if ((lazylibrarian.EBOOK_TYPE == None) or (lazylibrarian.EBOOK_TYPE == "")): params = { "page": "download", "username": lazylibrarian.NZBMATRIX_USER, "apikey": lazylibrarian.NZBMATRIX_API, "subcat": 36, "age": lazylibrarian.USENET_RETENTION, "term": book['searchterm'] } else: params = { "page": "download", "username": lazylibrarian.NZBMATRIX_USER, "apikey": lazylibrarian.NZBMATRIX_API, "subcat": 36, "age": lazylibrarian.USENET_RETENTION, "term": book['searchterm'] } logger.debug('Searching for: ' + book['searchterm']) URL = "http://rss.nzbmatrix.com/rss.php?" + urllib.urlencode(params) # to debug because of api logger.debug(u'Parsing results from <a href="%s">NZBMatrix</a>' % (URL)) try: data = ElementTree.parse(urllib2.urlopen(URL, timeout=30)) except (urllib2.URLError, IOError, EOFError), e: logger.warn('Error fetching data from NZBMatrix: %s' % e) data = None
def markBooks(self, AuthorName=None, action=None, **args): myDB = database.DBConnection() for bookid in args: # ouch dirty workaround... if not bookid == 'book_table_length': controlValueDict = {'BookID': bookid} newValueDict = {'Status': action} myDB.upsert("books", newValueDict, controlValueDict) logger.debug('Status set to %s for BookID: %s' % (action, bookid)) #update authors needs to be updated every time a book is marked differently query = 'SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % AuthorName countbooks = myDB.action(query).fetchone() havebooks = int(countbooks[0]) controlValueDict = {"AuthorName": AuthorName} newValueDict = {"HaveBooks": havebooks} myDB.upsert("authors", newValueDict, controlValueDict) # start searchthreads books = [] for bookid in args: # ouch dirty workaround... if not bookid == 'book_table_length': if action == 'Wanted': books.append({"bookid": bookid}) threading.Thread(target=searchbook, args=[books]).start() if AuthorName: raise cherrypy.HTTPRedirect("authorPage?AuthorName=%s" % AuthorName)
def processAutoAdd(src_path=None): # Called to copy the book files to an auto add directory for the likes of Calibre which can't do nested dirs autoadddir = lazylibrarian.IMP_AUTOADD logger.debug('AutoAdd - Attempt to copy from [%s] to [%s]' % (src_path, autoadddir)) if not os.path.exists(autoadddir): logger.error('AutoAdd directory [%s] is missing or not set - cannot perform autoadd copy' % autoadddir) return False else: # Now try and copy all the book files into a single dir. try: names = os.listdir(src_path) # TODO : n files jpg, opf & book(s) should have same name # Caution - book may be pdf, mobi, epub or all 3. # for now simply copy all files, and let the autoadder sort it out # os.makedirs(autoadddir) #errors = [] for name in names: srcname = os.path.join(src_path, name) dstname = os.path.join(autoadddir, name) logger.debug('AutoAdd Copying named file [%s] as copy [%s] to [%s]' % (name, srcname, dstname)) try: shutil.copy2(srcname, dstname) except (IOError, os.error) as why: logger.error('AutoAdd - Failed to copy file because [%s] ' % str(why)) except OSError as why: logger.error('AutoAdd - Failed because [%s]' % str(why)) return False logger.info('Auto Add completed for [%s]' % dstname) return True
def db_v5(myDB, upgradelog): issues = myDB.select( 'SELECT IssueID,IssueDate from issues WHERE length(IssueDate) < 4 and length(IssueDate) > 0') if issues: lazylibrarian.UPDATE_MSG = 'Updating issues table to hold 4 digit issue numbers' upgradelog.write("%s v5: %s\n" % (time.ctime(), lazylibrarian.UPDATE_MSG)) logger.debug(lazylibrarian.UPDATE_MSG) tot = len(issues) cnt = 0 for issue in issues: cnt += 1 lazylibrarian.UPDATE_MSG = 'Updating issues table 4 digits: %s of %s' % (cnt, tot) issueid = issue['IssueID'] issuedate = str(issue['IssueDate']) issuedate = issuedate.zfill(4) myDB.action('UPDATE issues SET IssueDate=? WHERE IssueID=?', (issuedate, issueid)) upgradelog.write("%s v5: %s\n" % (time.ctime(), lazylibrarian.UPDATE_MSG)) mags = myDB.select( 'SELECT Title,IssueDate from magazines WHERE length(IssueDate) < 4 and length(IssueDate) > 0') if mags: lazylibrarian.UPDATE_MSG = 'Updating magazines table to 4 digits' upgradelog.write("%s v5: %s\n" % (time.ctime(), lazylibrarian.UPDATE_MSG)) logger.debug(lazylibrarian.UPDATE_MSG) tot = len(mags) cnt = 0 for mag in mags: cnt += 1 lazylibrarian.UPDATE_MSG = 'Updating magazines table to 4 digits: %s of %s' % (cnt, tot) title = mag['Title'] issuedate = str(mag['IssueDate']) issuedate = issuedate.zfill(4) myDB.action('UPDATE magazines SET IssueDate=? WHERE Title=?', (issuedate, title)) upgradelog.write("%s v5: %s\n" % (time.ctime(), lazylibrarian.UPDATE_MSG)) upgradelog.write("%s v5: complete\n" % time.ctime())
def get_author_info(self, authorid=None, authorname=None, refresh=False): URL = 'http://www.goodreads.com/author/show/' + authorid + '.xml?' + urllib.urlencode(self.params) author_dict = {} try: rootxml, in_cache = get_xml_request(URL) except Exception as e: logger.error("Error getting author info: %s" % e) return author_dict if rootxml is None: logger.debug("Error requesting author info") return author_dict resultxml = rootxml.find('author') if not len(resultxml): logger.warn('No author found with ID: ' + authorid) else: logger.debug("[%s] Processing info for authorID: %s" % (authorname, authorid)) # PAB added authorname to author_dict - this holds the intact name preferred by GR author_dict = { 'authorid': resultxml[0].text, 'authorlink': resultxml.find('link').text, 'authorimg': resultxml.find('image_url').text, 'authorborn': resultxml.find('born_at').text, 'authordeath': resultxml.find('died_at').text, 'totalbooks': resultxml.find('works_count').text, 'authorname': authorname } return author_dict
def db_v4(myDB, upgradelog): if not has_column(myDB, "stats", "duplicates"): lazylibrarian.UPDATE_MSG = 'Updating stats table to hold duplicates' upgradelog.write("%s v4: %s\n" % (time.ctime(), lazylibrarian.UPDATE_MSG)) logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE stats ADD COLUMN duplicates INT') upgradelog.write("%s v4: complete\n" % time.ctime())
def OLDUsenetCrawler(book=None): HOST = lazylibrarian.USENETCRAWLER_HOST results = [] print book.keys() logger.info('UsenetCrawler: Searching term [%s] for author [%s] and title [%s]' % (book['searchterm'], book['authorName'], book['bookName'])) params = { "apikey": lazylibrarian.USENETCRAWLER_API, "t": "book", "title": book['bookName'], "author": book['authorName'] } #sample request #http://www.usenet-crawler.com/api?apikey=7xxxxxxxxxxxxxyyyyyyyyyyyyyyzzz4&t=book&author=Daniel logger.debug("%s" % params) if not str(HOST)[:4] == "http": HOST = 'http://' + HOST URL = HOST + '/api?' + urllib.urlencode(params) logger.debug('UsenetCrawler: searching on [%s] ' % URL) data = None try: data = ElementTree.parse(urllib2.urlopen(URL, timeout=30)) except (urllib2.URLError, IOError, EOFError), e: logger.Error('Error fetching data from %s: %s' % (HOST, e)) data = None
def removeTorrent(torrentid, remove_data=False): method = 'torrent-get' arguments = {'ids': [torrentid], 'fields': ['isFinished', 'name']} response, _ = torrentAction(method, arguments) # type: dict if not response: return False try: finished = response['arguments']['torrents'][0]['isFinished'] name = response['arguments']['torrents'][0]['name'] if finished: logger.debug('%s has finished seeding, removing torrent and data' % name) method = 'torrent-remove' if remove_data: arguments = {'delete-local-data': True, 'ids': [torrentid]} else: arguments = {'ids': [torrentid]} _, _ = torrentAction(method, arguments) return True else: logger.debug('%s has not finished seeding yet, torrent will not be removed' % name) except IndexError: # no torrents, already removed? return True except Exception as e: logger.warn('Unable to remove torrent %s, %s %s' % (torrentid, type(e).__name__, str(e))) return False return False
def db_v13(myDB, upgradelog): if not has_column(myDB, "authors", "Manual"): lazylibrarian.UPDATE_MSG = 'Updating authors table to hold Manual setting' upgradelog.write("%s v13: %s\n" % (time.ctime(), lazylibrarian.UPDATE_MSG)) logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE authors ADD COLUMN Manual TEXT') upgradelog.write("%s v13: complete\n" % time.ctime())
def ReturnSearchTypeStructure(api_key, book, searchType): params = None if searchType == "book": params = { "t": "book", "apikey": api_key, "title": common.removeDisallowedFilenameChars(book['bookName']), "author": common.removeDisallowedFilenameChars(book['authorName']), "cat": 7020, #7020=ebook } elif searchType == "mag": params = { "t": "search", "apikey": api_key, "cat": "7000,7010,7020", #7000=Other,7010=Misc,7020 Ebook "q": common.removeDisallowedFilenameChars(book['searchterm']), "extended": 1, } else: params = { "t": "search", "apikey": api_key, #"cat": 7020, "q": book['searchterm'], "extended": 1, } logger.debug('NewzNabPlus] - Search parameters set to '+str(params)) return params
def exportCSV(search_dir=None, status="Wanted"): """ Write a csv file to the search_dir containing all books marked as "Wanted" """ if not search_dir: logger.warn("Alternate Directory must not be empty") return False csvFile = os.path.join(search_dir, "%s - %s.csv" % (status, formatter.now())) myDB = database.DBConnection() find_status = myDB.select('SELECT * FROM books WHERE Status = "%s"' % status) if not find_status: logger.warn("No books marked as %s" % status) else: with open(csvFile, 'wb') as csvfile: csvwrite = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # write headers, change AuthorName BookName BookIsbn to match import csv names (Author, Title, ISBN10) csvwrite.writerow([ 'BookID', 'Author', 'Title', 'ISBN', 'AuthorID' ]) for resulted in find_status: logger.debug("Exported CSV for book %s" % resulted['BookName'].encode('utf-8')) row = ([ resulted['BookID'], resulted['AuthorName'], resulted['BookName'], resulted['BookIsbn'], resulted['AuthorID'] ]) csvwrite.writerow([("%s" % s).encode('utf-8') for s in row]) logger.info("CSV exported to %s" % csvFile)
def NZBDownloadMethod(bookid=None, nzbprov=None, nzbtitle=None, nzburl=None): myDB = database.DBConnection() if lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE: download = sabnzbd.SABnzbd(nzbtitle, nzburl) elif lazylibrarian.NZBGET_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE: headers = {'User-Agent': USER_AGENT} data = request.request_content(url=nzburl, headers=headers) nzb = classes.NZBDataSearchResult() nzb.extraInfo.append(data) nzb.name = nzbtitle nzb.url = nzburl download = nzbget.sendNZB(nzb) elif lazylibrarian.NZB_DOWNLOADER_BLACKHOLE: try: req = urllib2.Request(nzburl) if lazylibrarian.PROXY_HOST: req.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) req.add_header('User-Agent', USER_AGENT) nzbfile = urllib2.urlopen(req, timeout=90).read() except urllib2.URLError as e: logger.warn('Error fetching nzb from url: ' + nzburl + ' %s' % e.reason) nzbfile = False if (nzbfile): nzbname = str(nzbtitle) + '.nzb' nzbpath = os.path.join(lazylibrarian.NZB_BLACKHOLEDIR, nzbname) try: with open(nzbpath, 'w') as f: f.write(nzbfile) logger.debug('NZB file saved to: ' + nzbpath) download = True # try: # os.chmod(nzbpath, 0777) # except Exception, e: # logger.error("Could not chmod path: " + str(nzbpath)) except Exception as e: logger.error('%s not writable, NZB not saved. Error: %s' % (nzbpath, e)) download = False else: logger.warn('No NZB download method is enabled, check config.') return False if download: logger.debug('Nzbfile has been downloaded from ' + str(nzburl)) myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid) myDB.action('UPDATE wanted SET status = "Snatched" WHERE NZBurl="%s"' % nzburl) return True else: logger.error(u'Failed to download nzb @ <a href="%s">%s</a>' % (nzburl, nzbprov)) myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % nzburl) return False
def getSeriesAuthors(seriesid): """ Get a list of authors contributing to a series and import those authors (and their books) into the database Return how many authors you added """ myDB = database.DBConnection() result = myDB.match("select count(*) as counter from authors") start = int(result['counter']) result = myDB.match('select SeriesName from series where SeriesID=?', (seriesid,)) seriesname = result['SeriesName'] members, api_hits = getSeriesMembers(seriesid, seriesname) dic = {u'\u2018': "", u'\u2019': "", u'\u201c': '', u'\u201d': '', "'": "", '"': ''} if members: myDB = database.DBConnection() for member in members: # order = member[0] bookname = member[1] authorname = member[2] # workid = member[3] authorid = member[4] # pubyear = member[5] bookname = replace_all(bookname, dic) if not authorid: # goodreads gives us all the info we need, librarything/google doesn't base_url = 'https://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} searchname = bookname + ' ' + authorname searchname = cleanName(unaccented(searchname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) try: rootxml, in_cache = gr_xml_request(set_url) if not in_cache: api_hits += 1 if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: try: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) except (KeyError, AttributeError): booktitle = "" book_fuzz = fuzz.token_set_ratio(booktitle, bookname) if book_fuzz >= 98: try: author = item.find('./best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find('./best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug("Author Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: # try again with title only searchname = cleanName(unaccented(bookname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) rootxml, in_cache = gr_xml_request(set_url) if not in_cache: api_hits += 1 if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) book_fuzz = fuzz.token_set_ratio(booktitle, bookname) if book_fuzz >= 98: try: author = item.find('./best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find('./best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug("Title Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname)) except Exception as e: logger.error("Error finding goodreads results: %s %s" % (type(e).__name__, str(e))) if authorid: lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid) result = myDB.match("select count(*) as counter from authors") finish = int(result['counter']) newauth = finish - start logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname)) return newauth
def getBookWork(bookID=None, reason=None, seriesID=None): """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given preferably from the cache. If not already cached cache the results Return None if no workpage/seriespage available """ global ALLOW_NEW, LAST_NEW if not bookID and not seriesID: logger.error("getBookWork - No bookID or seriesID") return None if not reason: reason = "" myDB = database.DBConnection() if bookID: cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' cacheLocation = "WorkCache" item = myDB.match(cmd, (bookID,)) else: cmd = 'select SeriesName from series where SeriesID=?' cacheLocation = "SeriesCache" item = myDB.match(cmd, (seriesID,)) if item: cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if bookID: workfile = os.path.join(cacheLocation, str(bookID) + '.html') else: workfile = os.path.join(cacheLocation, str(seriesID) + '.html') # does the workpage need to expire? For now only expire if it was an error page # (small file) or a series page as librarything might get better info over time, more series members etc if os.path.isfile(workfile): if seriesID or os.path.getsize(workfile) < 500: cache_modified_time = os.stat(workfile).st_mtime time_now = time.time() expiry = lazylibrarian.CONFIG['CACHE_AGE'] * 24 * 60 * 60 # expire cache after this many seconds if cache_modified_time < time_now - expiry: # Cache entry is too old, delete it if ALLOW_NEW: os.remove(workfile) if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 if bookID: if reason: logger.debug("getBookWork: Returning Cached entry for %s %s" % (bookID, reason)) else: logger.debug("getBookWork: Returning Cached workpage for %s" % bookID) else: logger.debug("getBookWork: Returning Cached seriespage for %s" % item['seriesName']) if PY2: with open(workfile, "r") as cachefile: source = cachefile.read() else: # noinspection PyArgumentList with open(workfile, "r", errors="backslashreplace") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 if not ALLOW_NEW: # don't nag. Show message no more than every 12 hrs timenow = int(time.time()) if check_int(LAST_NEW, 0) + 43200 < timenow: logger.warn("New WhatWork is disabled") LAST_NEW = timenow return None if bookID: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \ (quote_plus(author), quote_plus(title)) else: seriesname = safe_unicode(item['seriesName']) if PY2: seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/series/%s' % quote_plus(seriesname) librarything_wait() result, success = fetchURL(URL) if bookID and success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split('</error>')[0] except IndexError: errmsg = "Unknown Error" # if no workpage link, try isbn instead if item['BookISBN']: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item['BookISBN'] librarything_wait() result, success = fetchURL(URL) if success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: # no workpage link found by isbn try: errmsg = result.split('<error>')[1].split('</error>')[0] except IndexError: errmsg = "Unknown Error" # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug("Librarything: [%s] for ISBN %s" % (errmsg, item['BookISBN'])) success = True else: # still cache if whatwork returned a result without a link, so we don't keep retrying msg = "Librarything: [" + errmsg + "] for " logger.debug(msg + item['AuthorName'] + ' ' + item['BookName']) success = True if success: with open(workfile, "w") as cachefile: cachefile.write(result) if bookID: logger.debug("getBookWork: Caching workpage for %s" % workfile) else: logger.debug("getBookWork: Caching series page for %s" % workfile) # return None if we got an error page back if '</request><error>' in result: return None return result else: if bookID: logger.debug("getBookWork: Unable to cache workpage, got %s" % result) else: logger.debug("getBookWork: Unable to cache series page, got %s" % result) return None else: if bookID: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) else: logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID) return None
def setWorkID(books=None): """ Set the goodreads workid for any books that don't already have one books is a comma separated list of bookids or if empty, select from database Paginate requests to reduce api hits """ myDB = database.DBConnection() pages = [] if books: page = books pages.append(page) else: cmd = "select BookID,BookName from books where WorkID='' or WorkID is null" books = myDB.select(cmd) if books: counter = 0 logger.debug('Setting WorkID for %s book%s' % (len(books), plural(len(books)))) page = '' for book in books: bookid = book['BookID'] if not bookid: logger.debug("No bookid for %s" % book['BookName']) else: if page: page = page + ',' page = page + bookid counter += 1 if counter == 50: counter = 0 pages.append(page) page = '' if page: pages.append(page) counter = 0 params = {"key": lazylibrarian.CONFIG['GR_API']} for page in pages: URL = 'https://www.goodreads.com/book/id_to_work_id/' + page + '?' + urlencode(params) try: rootxml, in_cache = gr_xml_request(URL, useCache=False) if rootxml is None: logger.debug("Error requesting id_to_work_id page") else: resultxml = rootxml.find('work-ids') if len(resultxml): ids = resultxml.getiterator('item') books = getList(page) cnt = 0 for item in ids: workid = item.text if not workid: logger.debug("No workid returned for %s" % books[cnt]) else: counter += 1 controlValueDict = {"BookID": books[cnt]} newValueDict = {"WorkID": workid} myDB.upsert("books", newValueDict, controlValueDict) cnt += 1 except Exception as e: logger.error("%s parsing id_to_work_id page: %s" % (type(e).__name__, str(e))) msg = 'Updated %s id%s' % (counter, plural(counter)) logger.debug("setWorkID complete: " + msg) return msg
def setSeries(serieslist=None, bookid=None, authorid=None, workid=None): """ set series details in series/member tables from the supplied dict and a displayable summary in book table serieslist is a tuple (SeriesID, SeriesNum, SeriesName) Return how many api hits and the original publication date if known """ myDB = database.DBConnection() api_hits = 0 originalpubdate = '' if bookid: # delete any old series-member entries myDB.action('DELETE from member WHERE BookID=?', (bookid,)) for item in serieslist: match = myDB.match('SELECT SeriesID from series where SeriesName=? COLLATE NOCASE', (item[2],)) if match: seriesid = match['SeriesID'] members, _api_hits = getSeriesMembers(seriesid, item[2]) api_hits += _api_hits else: # new series, need to set status and get SeriesID if item[0]: seriesid = item[0] members, _api_hits = getSeriesMembers(seriesid, item[2]) api_hits += _api_hits else: # no seriesid so generate it (row count + 1) cnt = myDB.match("select count(*) as counter from series") res = check_int(cnt['counter'], 0) seriesid = str(res + 1) members = [] myDB.action('INSERT into series VALUES (?, ?, ?, ?, ?)', (seriesid, item[2], "Active", 0, 0), suppress='UNIQUE') if not workid or not authorid: book = myDB.match('SELECT AuthorID,WorkID from books where BookID=?', (bookid,)) if book: authorid = book['AuthorID'] workid = book['WorkID'] if seriesid and authorid and workid: for member in members: if member[3] == workid: if check_year(member[5], past=1800, future=0): controlValueDict = {"BookID": bookid} newValueDict = {"BookDate": member[5], "OriginalPubDate": member[5]} myDB.upsert("books", newValueDict, controlValueDict) originalpubdate = member[5] break controlValueDict = {"BookID": bookid, "SeriesID": seriesid} newValueDict = {"SeriesNum": item[1], "WorkID": workid} myDB.upsert("member", newValueDict, controlValueDict) myDB.action('INSERT INTO seriesauthors ("SeriesID", "AuthorID") VALUES (?, ?)', (seriesid, authorid), suppress='UNIQUE') else: if not authorid: logger.debug('Unable to set series for book %s, no authorid' % bookid) elif not workid: logger.debug('Unable to set series for book %s, no workid' % bookid) elif not seriesid: logger.debug('Unable to set series for book %s, no seriesid' % bookid) return api_hits, originalpubdate series = '' for item in serieslist: newseries = "%s %s" % (item[2], item[1]) newseries.strip() if series and newseries: series += '<br>' series += newseries myDB.action('UPDATE books SET SeriesDisplay=? WHERE BookID=?', (series, bookid)) return api_hits, originalpubdate
def build_monthtable(): table = [] json_file = os.path.join(DATADIR, 'monthnames.json') if os.path.isfile(json_file): try: with open(json_file) as json_data: table = json.load(json_data) mlist = '' # list alternate entries as each language is in twice (long and short month names) for item in table[0][::2]: mlist += item + ' ' logger.debug('Loaded monthnames.json : %s' % mlist) except Exception as e: logger.error('Failed to load monthnames.json, %s' % str(e)) if not table: # Default Month names table to hold long/short month names for multiple languages # which we can match against magazine issues table = [ ['en_GB.UTF-8', 'en_GB.UTF-8'], ['january', 'jan'], ['february', 'feb'], ['march', 'mar'], ['april', 'apr'], ['may', 'may'], ['june', 'jun'], ['july', 'jul'], ['august', 'aug'], ['september', 'sep'], ['october', 'oct'], ['november', 'nov'], ['december', 'dec'] ] if len(getList(CONFIG['IMP_MONTHLANG'])) == 0: # any extra languages wanted? return table try: current_locale = locale.setlocale(locale.LC_ALL, '') # read current state. # getdefaultlocale() doesnt seem to work as expected on windows, returns 'None' logger.debug('Current locale is %s' % current_locale) except locale.Error as e: logger.debug("Error getting current locale : %s" % str(e)) return table lang = str(current_locale) # check not already loaded, also all english variants and 'C' use the same month names if lang in table[0] or ((lang.startswith('en_') or lang == 'C') and 'en_' in str(table[0])): logger.debug('Month names for %s already loaded' % lang) else: logger.debug('Loading month names for %s' % lang) table[0].append(lang) for f in range(1, 13): table[f].append(unaccented(calendar.month_name[f]).lower()) table[0].append(lang) for f in range(1, 13): table[f].append(unaccented(calendar.month_abbr[f]).lower().strip('.')) logger.info("Added month names for locale [%s], %s, %s ..." % ( lang, table[1][len(table[1]) - 2], table[1][len(table[1]) - 1])) for lang in getList(CONFIG['IMP_MONTHLANG']): try: if lang in table[0] or ((lang.startswith('en_') or lang == 'C') and 'en_' in str(table[0])): logger.debug('Month names for %s already loaded' % lang) else: locale.setlocale(locale.LC_ALL, lang) logger.debug('Loading month names for %s' % lang) table[0].append(lang) for f in range(1, 13): table[f].append(unaccented(calendar.month_name[f]).lower()) table[0].append(lang) for f in range(1, 13): table[f].append(unaccented(calendar.month_abbr[f]).lower().strip('.')) locale.setlocale(locale.LC_ALL, current_locale) # restore entry state logger.info("Added month names for locale [%s], %s, %s ..." % ( lang, table[1][len(table[1]) - 2], table[1][len(table[1]) - 1])) except Exception as e: locale.setlocale(locale.LC_ALL, current_locale) # restore entry state logger.warn("Unable to load requested locale [%s] %s" % (lang, str(e))) try: wanted_lang = lang.split('_')[0] params = ['locale', '-a'] all_locales = subprocess.check_output(params).split() locale_list = [] for a_locale in all_locales: if a_locale.startswith(wanted_lang): locale_list.append(a_locale) if locale_list: logger.warn("Found these alternatives: " + str(locale_list)) else: logger.warn("Unable to find an alternative") except Exception as e: logger.warn("Unable to get a list of alternatives, %s" % str(e)) logger.info("Set locale back to entry state %s" % current_locale) #with open(json_file, 'w') as f: # json.dump(table, f) return table
def config_write(): for key in CONFIG_DEFINITIONS.keys(): item_type, section, default = CONFIG_DEFINITIONS[key] check_section(section) value = CONFIG[key] if key in ['LOGDIR', 'DESTINATION_DIR', 'ALTERNATE_DIR', 'DOWLOAD_DIR', 'EBOOK_DEST_FILE', 'EBOOK_DEST_FOLDER', 'MAG_DEST_FILE', 'MAG_DEST_FOLDER']: value = value.encode(SYS_ENCODING) elif key in ['REJECT_WORDS', 'MAG_TYPE', 'EBOOK_TYPE']: value = value.encode(SYS_ENCODING).lower() CFG.set(section, key.lower(), value) # sanity check for typos... for key in CONFIG.keys(): if key not in CONFIG_DEFINITIONS.keys(): logger.warn('Unsaved config key: %s' % key) for provider in NEWZNAB_PROV: check_section(provider['NAME']) CFG.set(provider['NAME'], 'ENABLED', provider['ENABLED']) oldprovider = check_setting('str', provider['NAME'], 'HOST', '', log=False) CFG.set(provider['NAME'], 'HOST', provider['HOST']) CFG.set(provider['NAME'], 'API', provider['API']) CFG.set(provider['NAME'], 'GENERALSEARCH', provider['GENERALSEARCH']) CFG.set(provider['NAME'], 'BOOKSEARCH', provider['BOOKSEARCH']) CFG.set(provider['NAME'], 'MAGSEARCH', provider['MAGSEARCH']) CFG.set(provider['NAME'], 'BOOKCAT', provider['BOOKCAT']) CFG.set(provider['NAME'], 'MAGCAT', provider['MAGCAT']) CFG.set(provider['NAME'], 'EXTENDED', provider['EXTENDED']) if provider['HOST'] == oldprovider: CFG.set(provider['NAME'], 'UPDATED', provider['UPDATED']) CFG.set(provider['NAME'], 'MANUAL', provider['MANUAL']) else: logger.debug('Reset %s as provider changed' % provider['NAME']) CFG.set(provider['NAME'], 'UPDATED', '') CFG.set(provider['NAME'], 'MANUAL', False) add_newz_slot() # for provider in TORZNAB_PROV: check_section(provider['NAME']) CFG.set(provider['NAME'], 'ENABLED', provider['ENABLED']) oldprovider = check_setting('str', provider['NAME'], 'HOST', '', log=False) CFG.set(provider['NAME'], 'HOST', provider['HOST']) CFG.set(provider['NAME'], 'API', provider['API']) CFG.set(provider['NAME'], 'GENERALSEARCH', provider['GENERALSEARCH']) CFG.set(provider['NAME'], 'BOOKSEARCH', provider['BOOKSEARCH']) CFG.set(provider['NAME'], 'MAGSEARCH', provider['MAGSEARCH']) CFG.set(provider['NAME'], 'BOOKCAT', provider['BOOKCAT']) CFG.set(provider['NAME'], 'MAGCAT', provider['MAGCAT']) CFG.set(provider['NAME'], 'EXTENDED', provider['EXTENDED']) if provider['HOST'] == oldprovider: CFG.set(provider['NAME'], 'UPDATED', provider['UPDATED']) CFG.set(provider['NAME'], 'MANUAL', provider['MANUAL']) else: logger.debug('Reset %s as provider changed' % provider['NAME']) CFG.set(provider['NAME'], 'UPDATED', '') CFG.set(provider['NAME'], 'MANUAL', False) add_torz_slot() # for provider in RSS_PROV: check_section(provider['NAME']) CFG.set(provider['NAME'], 'ENABLED', provider['ENABLED']) CFG.set(provider['NAME'], 'HOST', provider['HOST']) add_rss_slot() with open(CONFIGFILE + '.new', 'wb') as configfile: CFG.write(configfile) try: os.remove(CONFIGFILE + '.bak') except OSError as e: if e.errno is not 2: # doesn't exist is ok logger.debug('{} {}{} {}'.format('Error deleting backup file:', CONFIGFILE, '.bak', e.strerror)) try: os.rename(CONFIGFILE, CONFIGFILE + '.bak') except OSError as e: if e.errno is not 2: # doesn't exist is ok as wouldn't exist until first save logger.debug('{} {} {}'.format('Unable to backup config file:', CONFIGFILE, e.strerror)) try: os.rename(CONFIGFILE + '.new', CONFIGFILE) except OSError as e: logger.debug('{} {} {}'.format('Unable to create new config file:', CONFIGFILE, e.strerror))
def processResultList(resultlist, book, searchtype): myDB = database.DBConnection() dictrepl = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '', '-': ' ', '\s\s': ' ' } # ' the ': ' ', ' a ': ' ', ' and ': ' ', # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '} dic = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '' } match_ratio = int(lazylibrarian.MATCH_RATIO) reject_list = formatter.getList(lazylibrarian.REJECT_WORDS) for nzb in resultlist: nzbTitle = formatter.latinToAscii( formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip() nzbTitle = re.sub(r"\s\s+", " ", nzbTitle) # remove extra whitespace author = formatter.latinToAscii( formatter.replace_all(book['authorName'], dic)) title = formatter.latinToAscii( formatter.replace_all(book['bookName'], dic)) # nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle) # logger.debug(u"NZB Title sort Match %: " + str(nzbTitle_match) + " for " + nzbTitle) nzbAuthor_match = fuzz.token_set_ratio(author, nzbTitle) nzbBook_match = fuzz.token_set_ratio(title, nzbTitle) logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzbTitle)) rejected = False for word in reject_list: if word in nzbTitle.lower( ) and not word in author.lower() and not word in title.lower(): rejected = True logger.debug("Rejecting %s, contains %s" % (nzbTitle, word)) break if (nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected): logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype)) bookid = book['bookid'] nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip() nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb[ 'nzbsize'] # Need to cater for when this is NONE (Issue 35) if nzbsize_temp is None: nzbsize_temp = 1000 nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB' nzbdate = formatter.nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.now(), # when we asked for it "NZBsize": nzbsize, "NZBtitle": nzbTitle, "NZBmode": nzbmode, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action( 'SELECT * from books WHERE BookID="%s" and Status="Snatched"' % bookid).fetchone() if not snatchedbooks: if nzbmode == "torznab": snatch = TORDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) else: snatch = NZBDownloadMethod(bookid, nzbprov, nzbTitle, nzburl) if snatch: notifiers.notify_snatch(nzbTitle + ' at ' + formatter.now()) common.schedule_job(action='Start', target='processDir') return True logger.debug("No nzb's found for " + (book["authorName"] + ' ' + book['bookName']).strip() + " using searchtype " + searchtype) return False
def create_id(issuename=None): hashID = sha1(issuename).hexdigest() logger.debug('Issue %s Hash: %s' % (issuename, hashID)) return hashID
def search_nzb_book(books=None, reset=False): if not lazylibrarian.USE_NZB(): logger.warn('No NEWZNAB/TORZNAB providers set, check config') return # rename this thread threading.currentThread().name = "SEARCHNZBBOOKS" myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select( 'SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"' ) else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select( 'SELECT BookID, AuthorName, BookName from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("NZB search requested for no books") return elif len(searchbooks) == 1: logger.info('NZB Searching for one book') else: logger.info('NZB Searching for %i books' % len(searchbooks)) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = { '...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '' } dicSearchFormatting = {'.': ' +', ' + ': ' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) if '(' in book: # may have title (series/extended info) book = book.split('(')[0] # TRY SEARCH TERM just using author name and book author = formatter.latinToAscii( formatter.replace_all(author, dicSearchFormatting)) searchterm = author + ' ' + book searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r'\(.*?\)', '', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+", " ", searchterm) # strip any double white space searchlist.append({ "bookid": bookid, "bookName": searchbook[2], "authorName": searchbook[1], "searchterm": searchterm.strip() }) if not lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE and not lazylibrarian.NZBGET_HOST: logger.warn( 'No download method is set, use SABnzbd/NZBGet or blackhole, check config' ) nzb_count = 0 for book in searchlist: # first attempt, try author/title in category "book" resultlist, nproviders = providers.IterateOverNewzNabSites( book, 'book') if not nproviders: logger.warn( 'No NewzNab or TorzNab providers are set, check config') return # no point in continuing found = processResultList(resultlist, book, "book") # if you can't find the book, try author/title without any "(extended details, series etc)" if not found and '(' in book['bookName']: resultlist, nproviders = providers.IterateOverNewzNabSites( book, 'shortbook') found = processResultList(resultlist, book, "shortbook") # if you can't find the book under "books", you might find under general search if not found: resultlist, nproviders = providers.IterateOverNewzNabSites( book, 'general') found = processResultList(resultlist, book, "general") # if you still can't find the book, try with author only if not found: resultlist, nproviders = providers.IterateOverNewzNabSites( book, 'author') found = processResultList(resultlist, book, "author") if not found: logger.debug( "NZB Searches returned no results. Adding book %s to queue." % book['searchterm']) else: nzb_count = nzb_count + 1 if nzb_count == 1: logger.info("NZBSearch for Wanted items complete, found %s book" % nzb_count) else: logger.info("NZBSearch for Wanted items complete, found %s books" % nzb_count) if reset == True: common.schedule_job(action='Restart', target='search_nzb_book')
def update(): if lazylibrarian.INSTALL_TYPE == 'win': logger.debug('(update) Windows install - no update available') logger.info('(update) Windows .exe updating not supported yet.') # pass elif lazylibrarian.INSTALL_TYPE == 'git': branch = getCurrentGitBranch() output, err = runGit('stash clear') output, err = runGit('pull origin ' + branch) if not output: logger.error('(update) Couldn\'t download latest version') for line in output.split('\n'): if 'Already up-to-date.' in line: logger.info('(update) No update available, not updating') logger.info('(update) Output: ' + str(output)) elif line.endswith('Aborting.'): logger.error('(update) Unable to update from git: ' + line) logger.info('(update) Output: ' + str(output)) elif lazylibrarian.INSTALL_TYPE == 'source': # As this is a non GIT install, we assume that the comparison is # always to master. branch = lazylibrarian.CURRENT_BRANCH tar_download_url = 'https://github.com/%s/%s/tarball/%s' % ( lazylibrarian.GIT_USER, lazylibrarian.GIT_REPO, lazylibrarian.GIT_BRANCH) update_dir = os.path.join(lazylibrarian.PROG_DIR, 'update') # version_path = os.path.join(lazylibrarian.PROG_DIR, 'version.txt') try: logger.info('(update) Downloading update from: ' + tar_download_url) data = urllib2.urlopen(tar_download_url, timeout=30) except (IOError, urllib2.URLError, socket.timeout) as e: logger.error( "(update) Unable to retrieve new version from " + tar_download_url + ", can't update: %s" % e) return download_name = data.geturl().split('/')[-1] tar_download_path = os.path.join(lazylibrarian.PROG_DIR, download_name) # Save tar to disk f = open(tar_download_path, 'wb') f.write(data.read()) f.close() # Extract the tar to update folder logger.info('(update) Extracting file' + tar_download_path) tar = tarfile.open(tar_download_path) tar.extractall(update_dir) tar.close() # Delete the tar.gz logger.info('(update) Deleting file' + tar_download_path) os.remove(tar_download_path) # Find update dir name update_dir_contents = [x for x in os.listdir(update_dir) if os.path.isdir(os.path.join(update_dir, x))] if len(update_dir_contents) != 1: logger.error(u"(update) Invalid update data, update failed: " + str(update_dir_contents)) return content_dir = os.path.join(update_dir, update_dir_contents[0]) # walk temp folder and move files to main folder for dirname, dirnames, filenames in os.walk(content_dir): dirname = dirname[len(content_dir) + 1:] for curfile in filenames: old_path = os.path.join(content_dir, dirname, curfile) new_path = os.path.join(lazylibrarian.PROG_DIR, dirname, curfile) if os.path.isfile(new_path): os.remove(new_path) os.renames(old_path, new_path) # Update version.txt updateVersionFile(lazylibrarian.LATEST_VERSION) else: logger.error("(update) Cannot perform update - Install Type not set") return
def magazineScan(thread=None): # rename this thread if thread is None: threading.currentThread().name = "MAGAZINESCAN" myDB = database.DBConnection() mag_path = lazylibrarian.MAG_DEST_FOLDER if '$' in mag_path: mag_path = mag_path.split('$')[0] if lazylibrarian.MAG_RELATIVE: if mag_path[0] not in '._': mag_path = '_' + mag_path mag_path = os.path.join(lazylibrarian.DESTINATION_DIR, mag_path).encode(lazylibrarian.SYS_ENCODING) else: mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING) if lazylibrarian.FULL_SCAN: mags = myDB.select('select * from Issues') # check all the issues are still there, delete entry if not for mag in mags: title = mag['Title'] issuedate = mag['IssueDate'] issuefile = mag['IssueFile'] if issuefile and not os.path.isfile(issuefile): myDB.action('DELETE from Issues where issuefile="%s"' % issuefile) logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate)) controlValueDict = {"Title": title} newValueDict = { "LastAcquired": None, # clear magazine dates "IssueDate": None, # we will fill them in again later "IssueStatus": "Skipped" # assume there are no issues now } myDB.upsert("magazines", newValueDict, controlValueDict) logger.debug('Magazine %s details reset' % title) mags = myDB.select('SELECT * from magazines') # now check the magazine titles and delete any with no issues for mag in mags: title = mag['Title'] count = myDB.select( 'SELECT COUNT(Title) as counter FROM issues WHERE Title="%s"' % title) issues = count[0]['counter'] if not issues: logger.debug('Magazine %s deleted as no issues found' % title) myDB.action('DELETE from magazines WHERE Title="%s"' % title) logger.info(' Checking [%s] for magazines' % mag_path) matchString = '' for char in lazylibrarian.MAG_DEST_FILE: matchString = matchString + '\\' + char # massage the MAG_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = formatter.getList(lazylibrarian.MAG_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace( "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for dirname, dirnames, filenames in os.walk(mag_path): for fname in filenames[:]: # maybe not all magazines will be pdf? if formatter.is_valid_booktype(fname, booktype='mag'): try: match = pattern.match(fname) if match: issuedate = match.group("issuedate") title = match.group("title") #print issuedate #print title else: logger.debug("Pattern match failed for [%s]" % fname) continue #title = fname.split('-')[3] #title = title.split('.')[-2] #title = title.strip() #issuedate = fname.split(' ')[0] except: logger.debug("Invalid name format for [%s]" % fname) continue logger.debug("Found Issue %s" % fname) issuefile = os.path.join(dirname, fname) # full path to issue.pdf mtime = os.path.getmtime(issuefile) iss_acquired = datetime.date.isoformat( datetime.date.fromtimestamp(mtime)) # magazines : Title, Frequency, Regex, Status, MagazineAdded, LastAcquired, IssueDate, IssueStatus # issues : Title, IssueAcquired, IssueDate, IssueFile controlValueDict = {"Title": title} # is this magazine already in the database? mag_entry = myDB.select( 'SELECT * from magazines WHERE Title="%s"' % title) if not mag_entry: # need to add a new magazine to the database newValueDict = { "Frequency": None, # unused currently "Regex": None, "Status": "Active", "MagazineAdded": None, "LastAcquired": None, "IssueDate": None, "IssueStatus": "Skipped" } logger.debug("Adding magazine %s" % title) myDB.upsert("magazines", newValueDict, controlValueDict) lastacquired = None magissuedate = None magazineadded = None else: maglastacquired = mag_entry[0]['LastAcquired'] magissuedate = mag_entry[0]['IssueDate'] magazineadded = mag_entry[0]['MagazineAdded'] # is this issue already in the database? controlValueDict = {"Title": title, "IssueDate": issuedate} issue_id = create_id("%s %s" % (title, issuedate)) iss_entry = myDB.select( 'SELECT * from issues WHERE Title="%s" and IssueDate="%s"' % (title, issuedate)) if not iss_entry: newValueDict = { "IssueAcquired": iss_acquired, "IssueID": issue_id, "IssueFile": issuefile } logger.debug("Adding issue %s %s" % (title, issuedate)) else: # don't really need to do this each time newValueDict = {"IssueID": issue_id} myDB.upsert("Issues", newValueDict, controlValueDict) create_cover(issuefile) # see if this issues date values are useful # if its a new magazine, magazineadded,magissuedate,lastacquired are all None # if magazineadded is NOT None, but the others are, we've deleted one or more issues # so the most recent dates may be wrong and need to be updated. # Set magazine_issuedate to issuedate of most recent issue we have # Set magazine_added to acquired date of earliest issue we have # Set magazine_lastacquired to acquired date of most recent issue we have # acquired dates are read from magazine file timestamps if magazineadded is None: # new magazine, this might be the only issue controlValueDict = {"Title": title} newValueDict = { "MagazineAdded": iss_acquired, "LastAcquired": iss_acquired, "IssueDate": issuedate, "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) else: if iss_acquired < magazineadded: controlValueDict = {"Title": title} newValueDict = {"MagazineAdded": iss_acquired} myDB.upsert("magazines", newValueDict, controlValueDict) if maglastacquired is None or iss_acquired > maglastacquired: controlValueDict = {"Title": title} newValueDict = {"LastAcquired": iss_acquired} myDB.upsert("magazines", newValueDict, controlValueDict) if magissuedate is None or issuedate > magissuedate: controlValueDict = {"Title": title} newValueDict = {"IssueDate": issuedate} myDB.upsert("magazines", newValueDict, controlValueDict) magcount = myDB.action("select count(*) from magazines").fetchone() isscount = myDB.action("select count(*) from issues").fetchone() logger.info("Magazine scan complete, found %s magazines, %s issues" % (magcount['count(*)'], isscount['count(*)']))
def get_author_books(self, authorid=None, authorname=None, refresh=False): api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode( self.params) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) books_dict = [] try: rootxml, in_cache = self.get_request(URL) except Exception as e: logger.error("Error fetching author books: %s" % e) return books_dict if rootxml is None: logger.debug("Error requesting author books") return books_dict if not in_cache: api_hits = api_hits + 1 resultxml = rootxml.getiterator('book') valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if not len(resultxml): logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid)) else: logger.debug("[%s] Now processing books with GoodReads API" % authorname) resultsCount = 0 removedResults = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 logger.debug(u"url " + URL) authorNameResult = rootxml.find('./author/name').text logger.debug(u"author name " + authorNameResult) loopCount = 1 while resultxml is not None: for book in resultxml: total_count = total_count + 1 if (book.find('publication_year').text is None): pubyear = "0000" else: pubyear = book.find('publication_year').text try: bookimg = book.find('image_url').text if ('nocover' in bookimg): bookimg = 'images/nocover.png' except KeyError, AttributeError: bookimg = 'images/nocover.png' # PAB this next section tries to get the book language using the isbn13 to look it up. If no isbn13 we skip the # book entirely, rather than including it with an "Unknown" language. Changed this so we can still include the book # with language set to "Unknown". There is a setting in config.ini to allow or skip books with "Unknown" language # if you really don't want to include them. # Not all GR books have isbn13 filled in, but all have a GR bookid, which we've already got, so use that. # Also, with GR API rules we can only call the API once per second, which slows us down a lot when all we want # is to get the language. We sleep for one second per book that GR knows about for each author you have in your # library. The libraryThing API has the same 1 second restriction, and is limited to 1000 hits per day, but has # fewer books with unknown language. To get around this and speed up the process, see if we already have a book # in the database with a similar start to the ISBN. The way ISBNs work, digits 3-5 of a 13 char ISBN or digits 0-2 # of a 10 digit ISBN indicate the region/language so if two books have the same 3 digit isbn code, they _should_ # be the same language. # I ran a simple python script on my library of 1500 books, and these codes were 100% correct on matching book # languages, no mis-matches. It did result in a small number of books with "unknown" language being wrongly matched # but most "unknown" were matched to the correct language. # We could look up ISBNs we already know about in the database, but this only holds books in the languages we want # to keep, which reduces the number of cache hits, so we create a new database table, holding ALL results including # the ISBNs for languages we don't want and books we reject. # The new table is created (if not exists) in init.py so by the time we get here there is an existing table. # If we haven't an already matching partial ISBN, look up language code from libraryThing # "http://www.librarything.com/api/thingLang.php?isbn=1234567890" # If you find a matching language, add it to the database. If "unknown" or "invalid", try GR as maybe GR can # provide a match. # If both LT and GR return unknown, add isbn to db as "unknown". No point in repeatedly asking LT for a code # it's told you it doesn't know. # As an extra option, if language includes "All" in config.ini, we can skip this whole section and process # everything much faster by not querying for language at all. # It does mean we include a lot of unwanted foreign translations in the database, but it's _much_ faster. bookLanguage = "Unknown" find_field = "id" isbn = "" isbnhead = "" if "All" not in valid_langs: # do we care about language if (book.find('isbn').text is not None): find_field = "isbn" isbn = book.find('isbn').text isbnhead = isbn[0:3] else: if (book.find('isbn13').text is not None): find_field = "isbn13" isbn = book.find('isbn13').text isbnhead = isbn[3:6] if (find_field != 'id'): # isbn or isbn13 found match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if (match): bookLanguage = match['lang'] cache_hits = cache_hits + 1 logger.debug( "Found cached language [%s] for %s [%s]" % (bookLanguage, find_field, isbnhead)) else: # no match in cache, try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn try: bookwork.librarything_wait() resp = urllib2.urlopen(BOOK_URL, timeout=30).read() lt_lang_hits = lt_lang_hits + 1 logger.debug( "LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if (resp == 'invalid' or resp == 'unknown'): find_field = "id" # reset the field to force search on goodreads else: bookLanguage = resp # found a language code myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug(u"LT language: " + bookLanguage) except Exception as e: find_field = "id" # reset the field to search on goodreads logger.error( "Error finding LT language result: %s" % e) if (find_field == 'id'): # [or bookLanguage == "Unknown"] no earlier match, we'll have to search the goodreads api try: if (book.find(find_field).text is not None): BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \ book.find(find_field).text + '&' + urllib.urlencode(self.params) logger.debug(u"Book URL: " + BOOK_URL) try: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) BOOK_rootxml, in_cache = self.get_request( BOOK_URL) if BOOK_rootxml is None: logger.debug( 'Error requesting book language code' ) bookLanguage = "" else: if not in_cache: # only update last_goodreads if the result wasn't found in the cache lazylibrarian.LAST_GOODREADS = time_now bookLanguage = BOOK_rootxml.find( './book/language_code').text except Exception as e: logger.error( "Error finding book results: %s" % e) if not in_cache: gr_lang_hits = gr_lang_hits + 1 if not bookLanguage: bookLanguage = "Unknown" if (isbnhead != ""): # GR didn't give an isbn so we can't cache it, just use language for this book myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug( "GoodReads reports language [%s] for %s" % (bookLanguage, isbnhead)) else: not_cached = not_cached + 1 logger.debug(u"GR language: " + bookLanguage) else: logger.debug( "No %s provided for [%s]" % (find_field, book.find('title').text)) # continue except Exception as e: logger.debug(u"An error has occured: %s" % e) if bookLanguage not in valid_langs: logger.debug('Skipped a book with language %s' % bookLanguage) ignored = ignored + 1 continue bookname = book.find('title').text bookid = book.find('id').text bookdesc = book.find('description').text bookisbn = book.find('isbn').text bookpub = book.find('publisher').text booklink = book.find('link').text bookrate = float(book.find('average_rating').text) bookpages = book.find('num_pages').text series, seriesNum = formatter.bookSeries(bookname) find_book_status = myDB.select( 'SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_book_status: for resulted in find_book_status: book_status = resulted['Status'] else: book_status = lazylibrarian.NEWBOOK_STATUS bookname = bookname.replace(':', '').replace('"', '').replace( "'", "") bookname = unidecode(u'%s' % bookname) bookname = bookname.strip() # strip whitespace if not (re.match('[^\w-]', bookname) ): # remove books with bad characters in title if book_status != "Ignored": controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorNameResult, "AuthorID": authorid, "AuthorLink": None, "BookName": bookname, "BookSub": None, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": pubyear, "BookLang": bookLanguage, "Status": book_status, "BookAdded": formatter.today(), "Series": series, "SeriesNum": seriesNum } resultsCount = resultsCount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + book.find('title').text + " " + pubyear) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = bookwork.getBookCover(bookid) if workcover: logger.debug( u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg.startswith('http'): link = bookwork.cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = bookwork.getWorkSeries( bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = bookwork.getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not find_book_status: logger.debug(u"[%s] Added book: %s" % (authorname, bookname)) added_count = added_count + 1 else: logger.debug(u"[%s] Updated book: %s" % (authorname, bookname)) updated_count = updated_count + 1 else: book_ignore_count = book_ignore_count + 1 else: logger.debug(u"removed result [" + bookname + "] for bad characters") removedResults = removedResults + 1 loopCount = loopCount + 1 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \ urllib.urlencode(self.params) + '&page=' + str(loopCount) resultxml = None try: rootxml, in_cache = self.get_request(URL) if rootxml is None: logger.debug('Error requesting next page of results') else: resultxml = rootxml.getiterator('book') if not in_cache: api_hits = api_hits + 1 except Exception as e: resultxml = None logger.error("Error finding next page of results: %s" % e) if resultxml is not None: if all(False for book in resultxml): # returns True if iterator is empty resultxml = None
def create_cover(issuefile=None): if not lazylibrarian.IMP_CONVERT == 'None': # special flag to say "no covers required" # create a thumbnail cover if there isn't one if '.' in issuefile: words = issuefile.split('.') extn = '.' + words[len(words) - 1] coverfile = issuefile.replace(extn, '.jpg') else: logger.debug('Unable to create cover for %s, no extension?' % issuefile) return if not os.path.isfile(coverfile): converter = lazylibrarian.MAGICK if len(lazylibrarian.IMP_CONVERT): converter = lazylibrarian.IMP_CONVERT logger.debug("Creating cover for %s using %s" % (issuefile, converter)) try: # No PythonMagick in python3, hence allow wand, but more complicated # to install - try to use external imagemagick convert? # should work on win/mac/linux as long as imagemagick is installed # and config points to external "convert" program if len(lazylibrarian.IMP_CONVERT ): # allow external convert to override libraries try: params = [ lazylibrarian.IMP_CONVERT, issuefile + '[0]', coverfile ] res = subprocess.check_output(params, stderr=subprocess.STDOUT) if res: logger.debug('%s reports: %s' % (lazylibrarian.IMP_CONVERT, res)) except subprocess.CalledProcessError as e: logger.debug(params) logger.debug('ImageMagick "convert" failed %s' % e.output) elif lazylibrarian.MAGICK == 'wand': with Image(filename=issuefile + '[0]') as img: img.save(filename=coverfile) elif lazylibrarian.MAGICK == 'pythonmagick': img = PythonMagick.Image() img.read(issuefile + '[0]') img.write(coverfile) except: logger.debug("Unable to create cover for %s using %s" % (issuefile, lazylibrarian.MAGICK))
class GoodReads: # http://www.goodreads.com/api/ def __init__(self, name=None): self.name = name.encode('utf-8') # self.type = type if not lazylibrarian.GR_API: logger.warn('No Goodreads API key, check config') self.params = {"key": lazylibrarian.GR_API} def get_request(self, my_url): request = urllib2.Request(my_url) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) # Original simplecache # opener = urllib.request.build_opener(SimpleCache.CacheHandler(".AuthorCache"), # SimpleCache.ThrottlingProcessor(5)) # resp = opener.open(request) # Simplified simplecache, no throttling, no headers as we dont use them, added cache expiry # we can simply cache the xml with... # hashfilename = hash url # if hashfilename exists, return its contents # if not, urllib2.urlopen() # store the xml # return the xml, and whether it was found in the cache # Need to expire the cache entries, or we won't search for anything new # default to 30 days for now. Authors dont write that quickly. # cacheLocation = "XMLCache" expireafter = lazylibrarian.CACHE_AGE cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if not os.path.exists(cacheLocation): os.mkdir(cacheLocation) myhash = md5.new(request.get_full_url()).hexdigest() valid_cache = False hashname = cacheLocation + os.sep + myhash + ".xml" if os.path.isfile(hashname): cache_modified_time = os.stat(hashname).st_mtime time_now = time.time() if cache_modified_time < time_now - ( expireafter * 24 * 60 * 60): # expire after this many seconds # Cache is old, delete entry os.remove(hashname) else: valid_cache = True if valid_cache: lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"CacheHandler: Returning CACHED response for %s" % request.get_full_url()) with open(hashname, "r") as cachefile: source_xml = cachefile.read() else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 try: resp = urllib2.urlopen(request, timeout=30) # don't get stuck except socket.timeout as e: logger.warn(u"Retrying - got timeout on %s" % my_url) try: resp = urllib2.urlopen(request, timeout=30) # don't get stuck except (urllib2.URLError, socket.timeout) as e: logger.error(u"Error getting response for %s: %s" % (my_url, e)) return None, False except urllib2.URLError as e: logger.error(u"URLError getting response for %s: %s" % (my_url, e)) return None, False if str(resp.getcode()).startswith("2"): # (200 OK etc) logger.debug(u"CacheHandler: Caching response for %s" % my_url) try: source_xml = resp.read() # .decode('utf-8') except socket.error as e: logger.error(u"Error reading xml: %s" % e) return None, False with open(hashname, "w") as cachefile: cachefile.write(source_xml) else: logger.warn(u"Got error response for %s: %s" % (my_url, resp.getcode())) return None, False root = ElementTree.fromstring(source_xml) return root, valid_cache def find_results(self, authorname=None, queue=None): threading.currentThread().name = "GR-SEARCH" resultlist = [] api_hits = 0 # Goodreads doesn't like initials followed by spaces, # eg "M L Hamilton", needs "M. L. Hamilton" or "M.L.Hamilton" # but DOES need spaces if not initials eg "Tom.Holt" fails, but "Tom Holt" works if authorname[1] == ' ': authorname = authorname.replace(' ', '.') authorname = authorname.replace('..', '.') url = urllib.quote_plus(authorname.encode('utf-8')) set_url = 'http://www.goodreads.com/search.xml?q=' + url + '&' + urllib.urlencode( self.params) logger.debug('Now searching GoodReads API with keyword: ' + authorname) logger.debug('Searching for %s at: %s' % (authorname, set_url)) try: try: rootxml, in_cache = self.get_request(set_url) except Exception as e: logger.error("Error finding results: %s" % e) return if not len(rootxml): logger.debug("Error requesting results") return resultxml = rootxml.getiterator('work') resultcount = 0 for author in resultxml: bookdate = "0001-01-01" if (author.find('original_publication_year').text is None): bookdate = "0000" else: bookdate = author.find('original_publication_year').text authorNameResult = author.find('./best_book/author/name').text booksub = "" bookpub = "" booklang = "Unknown" try: bookimg = author.find('./best_book/image_url').text if (bookimg == 'http://www.goodreads.com/assets/nocover/111x148.png' ): bookimg = 'images/nocover.png' except KeyError: bookimg = 'images/nocover.png' except AttributeError: bookimg = 'images/nocover.png' try: bookrate = author.find('average_rating').text except KeyError: bookrate = 0 bookpages = '0' bookgenre = '' bookdesc = '' bookisbn = '' booklink = 'http://www.goodreads.com/book/show/' + author.find( './best_book/id').text if (author.find('./best_book/title').text is None): bookTitle = "" else: bookTitle = author.find('./best_book/title').text author_fuzz = fuzz.token_set_ratio(authorNameResult, authorname) book_fuzz = fuzz.token_set_ratio(bookTitle, authorname) try: isbn_check = int(authorname[:-1]) if (len(str(isbn_check)) == 9) or (len(str(isbn_check)) == 12): isbn_fuzz = int(100) else: isbn_fuzz = int(0) except: isbn_fuzz = int(0) highest_fuzz = max(author_fuzz, book_fuzz, isbn_fuzz) bookid = author.find('./best_book/id').text resultlist.append({ 'authorname': author.find('./best_book/author/name').text, 'bookid': bookid, 'authorid': author.find('./best_book/author/id').text, 'bookname': bookTitle.encode("ascii", "ignore"), 'booksub': None, 'bookisbn': bookisbn, 'bookpub': bookpub, 'bookdate': bookdate, 'booklang': booklang, 'booklink': booklink, 'bookrate': float(bookrate), 'bookimg': bookimg, 'bookpages': bookpages, 'bookgenre': bookgenre, 'bookdesc': bookdesc, 'author_fuzz': author_fuzz, 'book_fuzz': book_fuzz, 'isbn_fuzz': isbn_fuzz, 'highest_fuzz': highest_fuzz, 'num_reviews': float(bookrate) }) resultcount = resultcount + 1 except urllib2.HTTPError as err: if err.code == 404: logger.error('Received a 404 error when searching for author') if err.code == 403: logger.warn('Access to api is denied: usage exceeded') else: logger.error( 'An unexpected error has occurred when searching for an author' ) logger.debug('Found %s results with keyword: %s' % (resultcount, authorname)) logger.debug('The GoodReads API was hit %s times for keyword %s' % (str(api_hits), authorname)) queue.put(resultlist) def find_author_id(self, refresh=False): author = self.name # Goodreads doesn't like initials followed by spaces, # eg "M L Hamilton", needs "M. L. Hamilton" or "M.L.Hamilton" # but DOES need spaces if not initials eg "Tom.Holt" fails, but "Tom Holt" works if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') URL = 'http://www.goodreads.com/api/author_url/' + urllib.quote( author) + '?' + urllib.urlencode(self.params) logger.debug("Searching for author with name: %s" % author) authorlist = [] try: rootxml, in_cache = self.get_request(URL) except Exception as e: logger.error("Error finding authorid: %s, %s" % (e, URL)) return authorlist if rootxml is None: logger.debug("Error requesting authorid") return authorlist resultxml = rootxml.getiterator('author') if not len(resultxml): logger.warn('No authors found with name: %s' % author) else: # In spite of how this looks, goodreads only returns one result, even if there are multiple matches # we just have to hope we get the right one. eg search for "James Lovelock" returns "James E. Lovelock" # who only has one book listed under googlebooks, the rest are under "James Lovelock" # goodreads has all his books under "James E. Lovelock". Can't come up with a good solution yet. # For now we'll have to let the user handle this by selecting/adding the author manually for author in resultxml: authorid = author.attrib.get("id") authorname = author[0].text authorlist = self.get_author_info(authorid, authorname, refresh) return authorlist def get_author_info(self, authorid=None, authorname=None, refresh=False): URL = 'http://www.goodreads.com/author/show/' + authorid + '.xml?' + urllib.urlencode( self.params) author_dict = {} try: rootxml, in_cache = self.get_request(URL) except Exception as e: logger.error("Error getting author info: %s" % e) return author_dict if rootxml is None: logger.debug("Error requesting author info") return author_dict resultxml = rootxml.find('author') if not len(resultxml): logger.warn('No author found with ID: ' + authorid) else: logger.debug("[%s] Processing info for authorID: %s" % (authorname, authorid)) # PAB added authorname to author_dict - this holds the intact name preferred by GR author_dict = { 'authorid': resultxml[0].text, 'authorlink': resultxml.find('link').text, 'authorimg': resultxml.find('image_url').text, 'authorborn': resultxml.find('born_at').text, 'authordeath': resultxml.find('died_at').text, 'totalbooks': resultxml.find('works_count').text, 'authorname': authorname } return author_dict def get_author_books(self, authorid=None, authorname=None, refresh=False): api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode( self.params) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) books_dict = [] try: rootxml, in_cache = self.get_request(URL) except Exception as e: logger.error("Error fetching author books: %s" % e) return books_dict if rootxml is None: logger.debug("Error requesting author books") return books_dict if not in_cache: api_hits = api_hits + 1 resultxml = rootxml.getiterator('book') valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if not len(resultxml): logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid)) else: logger.debug("[%s] Now processing books with GoodReads API" % authorname) resultsCount = 0 removedResults = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 logger.debug(u"url " + URL) authorNameResult = rootxml.find('./author/name').text logger.debug(u"author name " + authorNameResult) loopCount = 1 while resultxml is not None: for book in resultxml: total_count = total_count + 1 if (book.find('publication_year').text is None): pubyear = "0000" else: pubyear = book.find('publication_year').text try: bookimg = book.find('image_url').text if ('nocover' in bookimg): bookimg = 'images/nocover.png' except KeyError, AttributeError: bookimg = 'images/nocover.png' # PAB this next section tries to get the book language using the isbn13 to look it up. If no isbn13 we skip the # book entirely, rather than including it with an "Unknown" language. Changed this so we can still include the book # with language set to "Unknown". There is a setting in config.ini to allow or skip books with "Unknown" language # if you really don't want to include them. # Not all GR books have isbn13 filled in, but all have a GR bookid, which we've already got, so use that. # Also, with GR API rules we can only call the API once per second, which slows us down a lot when all we want # is to get the language. We sleep for one second per book that GR knows about for each author you have in your # library. The libraryThing API has the same 1 second restriction, and is limited to 1000 hits per day, but has # fewer books with unknown language. To get around this and speed up the process, see if we already have a book # in the database with a similar start to the ISBN. The way ISBNs work, digits 3-5 of a 13 char ISBN or digits 0-2 # of a 10 digit ISBN indicate the region/language so if two books have the same 3 digit isbn code, they _should_ # be the same language. # I ran a simple python script on my library of 1500 books, and these codes were 100% correct on matching book # languages, no mis-matches. It did result in a small number of books with "unknown" language being wrongly matched # but most "unknown" were matched to the correct language. # We could look up ISBNs we already know about in the database, but this only holds books in the languages we want # to keep, which reduces the number of cache hits, so we create a new database table, holding ALL results including # the ISBNs for languages we don't want and books we reject. # The new table is created (if not exists) in init.py so by the time we get here there is an existing table. # If we haven't an already matching partial ISBN, look up language code from libraryThing # "http://www.librarything.com/api/thingLang.php?isbn=1234567890" # If you find a matching language, add it to the database. If "unknown" or "invalid", try GR as maybe GR can # provide a match. # If both LT and GR return unknown, add isbn to db as "unknown". No point in repeatedly asking LT for a code # it's told you it doesn't know. # As an extra option, if language includes "All" in config.ini, we can skip this whole section and process # everything much faster by not querying for language at all. # It does mean we include a lot of unwanted foreign translations in the database, but it's _much_ faster. bookLanguage = "Unknown" find_field = "id" isbn = "" isbnhead = "" if "All" not in valid_langs: # do we care about language if (book.find('isbn').text is not None): find_field = "isbn" isbn = book.find('isbn').text isbnhead = isbn[0:3] else: if (book.find('isbn13').text is not None): find_field = "isbn13" isbn = book.find('isbn13').text isbnhead = isbn[3:6] if (find_field != 'id'): # isbn or isbn13 found match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if (match): bookLanguage = match['lang'] cache_hits = cache_hits + 1 logger.debug( "Found cached language [%s] for %s [%s]" % (bookLanguage, find_field, isbnhead)) else: # no match in cache, try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn try: bookwork.librarything_wait() resp = urllib2.urlopen(BOOK_URL, timeout=30).read() lt_lang_hits = lt_lang_hits + 1 logger.debug( "LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if (resp == 'invalid' or resp == 'unknown'): find_field = "id" # reset the field to force search on goodreads else: bookLanguage = resp # found a language code myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug(u"LT language: " + bookLanguage) except Exception as e: find_field = "id" # reset the field to search on goodreads logger.error( "Error finding LT language result: %s" % e) if (find_field == 'id'): # [or bookLanguage == "Unknown"] no earlier match, we'll have to search the goodreads api try: if (book.find(find_field).text is not None): BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \ book.find(find_field).text + '&' + urllib.urlencode(self.params) logger.debug(u"Book URL: " + BOOK_URL) try: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) BOOK_rootxml, in_cache = self.get_request( BOOK_URL) if BOOK_rootxml is None: logger.debug( 'Error requesting book language code' ) bookLanguage = "" else: if not in_cache: # only update last_goodreads if the result wasn't found in the cache lazylibrarian.LAST_GOODREADS = time_now bookLanguage = BOOK_rootxml.find( './book/language_code').text except Exception as e: logger.error( "Error finding book results: %s" % e) if not in_cache: gr_lang_hits = gr_lang_hits + 1 if not bookLanguage: bookLanguage = "Unknown" if (isbnhead != ""): # GR didn't give an isbn so we can't cache it, just use language for this book myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug( "GoodReads reports language [%s] for %s" % (bookLanguage, isbnhead)) else: not_cached = not_cached + 1 logger.debug(u"GR language: " + bookLanguage) else: logger.debug( "No %s provided for [%s]" % (find_field, book.find('title').text)) # continue except Exception as e: logger.debug(u"An error has occured: %s" % e) if bookLanguage not in valid_langs: logger.debug('Skipped a book with language %s' % bookLanguage) ignored = ignored + 1 continue bookname = book.find('title').text bookid = book.find('id').text bookdesc = book.find('description').text bookisbn = book.find('isbn').text bookpub = book.find('publisher').text booklink = book.find('link').text bookrate = float(book.find('average_rating').text) bookpages = book.find('num_pages').text series, seriesNum = formatter.bookSeries(bookname) find_book_status = myDB.select( 'SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_book_status: for resulted in find_book_status: book_status = resulted['Status'] else: book_status = lazylibrarian.NEWBOOK_STATUS bookname = bookname.replace(':', '').replace('"', '').replace( "'", "") bookname = unidecode(u'%s' % bookname) bookname = bookname.strip() # strip whitespace if not (re.match('[^\w-]', bookname) ): # remove books with bad characters in title if book_status != "Ignored": controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorNameResult, "AuthorID": authorid, "AuthorLink": None, "BookName": bookname, "BookSub": None, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": pubyear, "BookLang": bookLanguage, "Status": book_status, "BookAdded": formatter.today(), "Series": series, "SeriesNum": seriesNum } resultsCount = resultsCount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + book.find('title').text + " " + pubyear) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = bookwork.getBookCover(bookid) if workcover: logger.debug( u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg.startswith('http'): link = bookwork.cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = bookwork.getWorkSeries( bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = bookwork.getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not find_book_status: logger.debug(u"[%s] Added book: %s" % (authorname, bookname)) added_count = added_count + 1 else: logger.debug(u"[%s] Updated book: %s" % (authorname, bookname)) updated_count = updated_count + 1 else: book_ignore_count = book_ignore_count + 1 else: logger.debug(u"removed result [" + bookname + "] for bad characters") removedResults = removedResults + 1 loopCount = loopCount + 1 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \ urllib.urlencode(self.params) + '&page=' + str(loopCount) resultxml = None try: rootxml, in_cache = self.get_request(URL) if rootxml is None: logger.debug('Error requesting next page of results') else: resultxml = rootxml.getiterator('book') if not in_cache: api_hits = api_hits + 1 except Exception as e: resultxml = None logger.error("Error finding next page of results: %s" % e) if resultxml is not None: if all(False for book in resultxml): # returns True if iterator is empty resultxml = None lastbook = myDB.action( 'SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \ AND Status != "Ignored" order by BookDate DESC' % authorid).fetchone() if lastbook: lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] else: lastbookname = None lastbooklink = None lastbookdate = None controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate } myDB.upsert("authors", newValueDict, controlValueDict) # This is here because GoodReads sometimes has several entries with the same BookID! modified_count = added_count + updated_count logger.debug("Found %s total books for author" % total_count) logger.debug("Removed %s bad language results for author" % ignored) logger.debug("Removed %s bad character results for author" % removedResults) logger.debug("Ignored %s books by author marked as Ignored" % book_ignore_count) logger.debug("Imported/Updated %s books for author" % modified_count) myDB.action( 'insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i)' % (authorname, api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached)) if refresh: logger.info( "[%s] Book processing complete: Added %s books / Updated %s books" % (authorname, str(added_count), str(updated_count))) else: logger.info( "[%s] Book processing complete: Added %s books to the database" % (authorname, str(added_count))) return books_dict
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GR-ADD-BOOK" myDB = database.DBConnection() URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode( self.params) try: rootxml, in_cache = self.get_request(URL) if rootxml is None: logger.debug("Error requesting book") return except Exception as e: logger.error("Error finding book: %s" % e) return bookLanguage = rootxml.find('./book/language_code').text bookname = rootxml.find('./book/title').text if not bookLanguage: bookLanguage = "Unknown" # # PAB user has said they want this book, don't block for bad language, just warn # valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if bookLanguage not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) if (rootxml.find('./book/publication_year').text is None): bookdate = "0000" else: bookdate = rootxml.find('./book/publication_year').text try: bookimg = rootxml.find('./book/img_url').text if (bookimg == 'http://www.goodreads.com/assets/nocover/111x148.png'): bookimg = 'images/nocover.png' except KeyError: bookimg = 'images/nocover.png' except AttributeError: bookimg = 'images/nocover.png' authorname = rootxml.find('./book/authors/author/name').text bookdesc = rootxml.find('./book/description').text bookisbn = rootxml.find('./book/isbn').text bookpub = rootxml.find('./book/publisher').text booklink = rootxml.find('./book/link').text bookrate = float(rootxml.find('./book/average_rating').text) bookpages = rootxml.find('.book/num_pages').text name = authorname GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] result = re.search(r"\(([\S\s]+),? #(\d+\.?-?\d{0,})", bookname) if result: series = result.group(1) if series[-1] == ',': series = series[:-1] seriesNum = result.group(2) else: series = None seriesNum = None bookname = bookname.replace(':', '').replace('"', '').replace("'", "") bookname = unidecode(u'%s' % bookname) bookname = bookname.strip() # strip whitespace controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": None, "BookName": bookname, "BookSub": None, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": bookLanguage, "Status": "Wanted", "BookAdded": formatter.today(), "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = bookwork.getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg.startswith('http'): link = bookwork.cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = bookwork.getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = {"Series": series, "SeriesNum": seriesNum} myDB.upsert("books", newValueDict, controlValueDict) worklink = bookwork.getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def search_book(books=None, library=None): """ books is a list of new books to add, or None for backlog search library is "eBook" or "AudioBook" or None to search all book types """ # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if books is None: threading.currentThread().name = "SEARCHALLBOOKS" else: threading.currentThread().name = "SEARCHBOOKS" myDB = database.DBConnection() searchlist = [] searchbooks = [] if books is None: # We are performing a backlog search cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus ' cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") ' cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc' results = myDB.select(cmd) for terms in results: searchbooks.append(terms) else: # The user has added a new book for book in books: if not book['bookid'] in ['booklang', 'library', 'ignored']: cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus ' cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID' results = myDB.select(cmd, (book['bookid'], )) if results: for terms in results: searchbooks.append(terms) else: logger.debug( "SearchBooks - BookID %s is not in the database" % book['bookid']) if len(searchbooks) == 0: logger.debug("SearchBooks - No books to search for") return nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR( ) + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT() if nprov == 0: logger.debug("SearchBooks - No providers to search") return modelist = [] if lazylibrarian.USE_NZB(): modelist.append('nzb') if lazylibrarian.USE_TOR(): modelist.append('tor') if lazylibrarian.USE_DIRECT(): modelist.append('direct') if lazylibrarian.USE_RSS(): modelist.append('rss') logger.info('Searching %s provider%s %s for %i book%s' % (nprov, plural(nprov), str(modelist), len(searchbooks), plural(len(searchbooks)))) for searchbook in searchbooks: # searchterm is only used for display purposes searchterm = '' if searchbook['AuthorName']: searchterm = searchbook['AuthorName'] else: logger.warn("No AuthorName for %s" % searchbook['BookID']) if searchbook['BookName']: if len(searchterm): searchterm += ' ' searchterm += searchbook['BookName'] else: logger.warn("No BookName for %s" % searchbook['BookID']) if searchbook['BookSub']: if len(searchterm): searchterm += ': ' searchterm += searchbook['BookSub'] if library is None or library == 'eBook': if searchbook[ 'Status'] == "Wanted": # not just audiobook wanted cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="eBook" and Status="Snatched"' snatched = myDB.match(cmd, (searchbook["BookID"], )) if snatched: logger.warn( 'eBook %s %s already marked snatched in wanted table' % (searchbook['AuthorName'], searchbook['BookName'])) else: searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "eBook", "searchterm": searchterm }) if library is None or library == 'AudioBook': if searchbook[ 'AudioStatus'] == "Wanted": # in case we just wanted eBook cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="AudioBook" and Status="Snatched"' snatched = myDB.match(cmd, (searchbook["BookID"], )) if snatched: logger.warn( 'AudioBook %s %s already marked snatched in wanted table' % (searchbook['AuthorName'], searchbook['BookName'])) else: searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "AudioBook", "searchterm": searchterm }) # only get rss results once per run, as they are not search specific rss_resultlist = None if 'rss' in modelist: rss_resultlist, nprov = IterateOverRSSSites() if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow: logger.warn( 'No rss providers are available. Check config and blocklist' ) lazylibrarian.NO_RSS_MSG = timenow modelist.remove('rss') book_count = 0 for book in searchlist: matches = [] for mode in modelist: # first attempt, try author/title in category "book" if book['library'] == 'AudioBook': searchtype = 'audio' else: searchtype = 'book' resultlist = None if mode == 'nzb' and 'nzb' in modelist: resultlist, nprov = IterateOverNewzNabSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow modelist.remove('nzb') elif mode == 'tor' and 'tor' in modelist: resultlist, nprov = IterateOverTorrentSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn( 'No tor providers are available. Check config and blocklist' ) lazylibrarian.NO_TOR_MSG = timenow modelist.remove('tor') elif mode == 'direct' and 'direct' in modelist: resultlist, nprov = IterateOverDirectSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn( 'No direct providers are available. Check config and blocklist' ) lazylibrarian.NO_DIRECT_MSG = timenow modelist.remove('direct') elif mode == 'rss' and 'rss' in modelist: if rss_resultlist: resultlist = rss_resultlist else: logger.debug("No active rss providers found") modelist.remove('rss') if resultlist: match = findBestResult(resultlist, book, searchtype, mode) else: match = None # if you can't find the book, try author/title without any "(extended details, series etc)" if not goodEnough(match) and '(' in book['bookName']: searchtype = 'short' + searchtype if mode == 'nzb' and 'nzb' in modelist: resultlist, nprov = IterateOverNewzNabSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow modelist.remove('nzb') elif mode == 'tor' and 'tor' in modelist: resultlist, nprov = IterateOverTorrentSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn( 'No tor providers are available. Check config and blocklist' ) lazylibrarian.NO_TOR_MSG = timenow modelist.remove('tor') elif mode == 'direct' and 'direct' in modelist: resultlist, nprov = IterateOverDirectSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn( 'No direct providers are available. Check config and blocklist' ) lazylibrarian.NO_DIRECT_MSG = timenow modelist.remove('direct') elif mode == 'rss' and 'rss' in modelist: resultlist = rss_resultlist if resultlist: match = findBestResult(resultlist, book, searchtype, mode) else: match = None # if you can't find the book under "books", you might find under general search # general search is the same as booksearch for torrents and rss, no need to check again if not goodEnough(match): searchtype = 'general' if mode == 'nzb' and 'nzb' in modelist: resultlist, nprov = IterateOverNewzNabSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow modelist.remove('nzb') if resultlist: match = findBestResult(resultlist, book, searchtype, mode) else: match = None # if still not found, try general search again without any "(extended details, series etc)" if not goodEnough(match) and '(' in book['searchterm']: searchtype = 'shortgeneral' if mode == 'nzb' and 'nzb' in modelist: resultlist, _ = IterateOverNewzNabSites( book, searchtype) if not nprov: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow modelist.remove('nzb') if resultlist: match = findBestResult(resultlist, book, searchtype, mode) else: match = None if not goodEnough(match): logger.info( "%s Searches for %s %s returned no results." % (mode.upper(), book['library'], book['searchterm'])) else: logger.info("Found %s result: %s %s%%, %s priority %s" % (mode.upper(), searchtype, match[0], match[2]['NZBprov'], match[4])) matches.append(match) if matches: highest = max(matches, key=lambda s: (s[0], s[4])) # sort on percentage and priority logger.info("Requesting %s download: %s%% %s: %s" % (book['library'], highest[0], highest[2]['NZBprov'], highest[1])) if downloadResult(highest, book) > True: book_count += 1 # we found it logger.info("Search for Wanted items complete, found %s book%s" % (book_count, plural(book_count))) except Exception: logger.error('Unhandled exception in search_book: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def get_request(self, my_url): request = urllib2.Request(my_url) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) # Original simplecache # opener = urllib.request.build_opener(SimpleCache.CacheHandler(".AuthorCache"), # SimpleCache.ThrottlingProcessor(5)) # resp = opener.open(request) # Simplified simplecache, no throttling, no headers as we dont use them, added cache expiry # we can simply cache the xml with... # hashfilename = hash url # if hashfilename exists, return its contents # if not, urllib2.urlopen() # store the xml # return the xml, and whether it was found in the cache # Need to expire the cache entries, or we won't search for anything new # default to 30 days for now. Authors dont write that quickly. # cacheLocation = "XMLCache" expireafter = lazylibrarian.CACHE_AGE cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if not os.path.exists(cacheLocation): os.mkdir(cacheLocation) myhash = md5.new(request.get_full_url()).hexdigest() valid_cache = False hashname = cacheLocation + os.sep + myhash + ".xml" if os.path.isfile(hashname): cache_modified_time = os.stat(hashname).st_mtime time_now = time.time() if cache_modified_time < time_now - ( expireafter * 24 * 60 * 60): # expire after this many seconds # Cache is old, delete entry os.remove(hashname) else: valid_cache = True if valid_cache: lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"CacheHandler: Returning CACHED response for %s" % request.get_full_url()) with open(hashname, "r") as cachefile: source_xml = cachefile.read() else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 try: resp = urllib2.urlopen(request, timeout=30) # don't get stuck except socket.timeout as e: logger.warn(u"Retrying - got timeout on %s" % my_url) try: resp = urllib2.urlopen(request, timeout=30) # don't get stuck except (urllib2.URLError, socket.timeout) as e: logger.error(u"Error getting response for %s: %s" % (my_url, e)) return None, False except urllib2.URLError as e: logger.error(u"URLError getting response for %s: %s" % (my_url, e)) return None, False if str(resp.getcode()).startswith("2"): # (200 OK etc) logger.debug(u"CacheHandler: Caching response for %s" % my_url) try: source_xml = resp.read() # .decode('utf-8') except socket.error as e: logger.error(u"Error reading xml: %s" % e) return None, False with open(hashname, "w") as cachefile: cachefile.write(source_xml) else: logger.warn(u"Got error response for %s: %s" % (my_url, resp.getcode())) return None, False root = ElementTree.fromstring(source_xml) return root, valid_cache
def id3read(filename): if not TinyTag: return None, None try: id3r = TinyTag.get(filename) performer = id3r.artist composer = id3r.composer book = id3r.album albumartist = id3r.albumartist if performer: performer = performer.strip() else: performer = '' if composer: composer = composer.strip() else: composer = '' if book: book = book.strip() else: book = '' if albumartist: albumartist = albumartist.strip() else: albumartist = '' if lazylibrarian.LOGLEVEL & lazylibrarian.log_libsync: logger.debug("id3r.filename [%s]" % filename) logger.debug("id3r.performer [%s]" % performer) logger.debug("id3r.composer [%s]" % composer) logger.debug("id3r.album [%s]" % book) logger.debug("id3r.albumartist [%s]" % albumartist) if composer: # if present, should be author author = composer elif performer: # author, or narrator if composer == author author = performer elif albumartist: author = albumartist else: author = None if author and type(author) is list: lst = ', '.join(author) logger.debug("id3reader author list [%s]" % lst) author = author[0] # if multiple authors, just use the first one if author and book: return makeUnicode(author), makeUnicode(book) except Exception as e: logger.error("tinytag error %s %s [%s]" % (type(e).__name__, str(e), filename)) return None, None
def find_results(self, authorname=None, queue=None): threading.currentThread().name = "GR-SEARCH" resultlist = [] api_hits = 0 # Goodreads doesn't like initials followed by spaces, # eg "M L Hamilton", needs "M. L. Hamilton" or "M.L.Hamilton" # but DOES need spaces if not initials eg "Tom.Holt" fails, but "Tom Holt" works if authorname[1] == ' ': authorname = authorname.replace(' ', '.') authorname = authorname.replace('..', '.') url = urllib.quote_plus(authorname.encode('utf-8')) set_url = 'http://www.goodreads.com/search.xml?q=' + url + '&' + urllib.urlencode( self.params) logger.debug('Now searching GoodReads API with keyword: ' + authorname) logger.debug('Searching for %s at: %s' % (authorname, set_url)) try: try: rootxml, in_cache = self.get_request(set_url) except Exception as e: logger.error("Error finding results: %s" % e) return if not len(rootxml): logger.debug("Error requesting results") return resultxml = rootxml.getiterator('work') resultcount = 0 for author in resultxml: bookdate = "0001-01-01" if (author.find('original_publication_year').text is None): bookdate = "0000" else: bookdate = author.find('original_publication_year').text authorNameResult = author.find('./best_book/author/name').text booksub = "" bookpub = "" booklang = "Unknown" try: bookimg = author.find('./best_book/image_url').text if (bookimg == 'http://www.goodreads.com/assets/nocover/111x148.png' ): bookimg = 'images/nocover.png' except KeyError: bookimg = 'images/nocover.png' except AttributeError: bookimg = 'images/nocover.png' try: bookrate = author.find('average_rating').text except KeyError: bookrate = 0 bookpages = '0' bookgenre = '' bookdesc = '' bookisbn = '' booklink = 'http://www.goodreads.com/book/show/' + author.find( './best_book/id').text if (author.find('./best_book/title').text is None): bookTitle = "" else: bookTitle = author.find('./best_book/title').text author_fuzz = fuzz.token_set_ratio(authorNameResult, authorname) book_fuzz = fuzz.token_set_ratio(bookTitle, authorname) try: isbn_check = int(authorname[:-1]) if (len(str(isbn_check)) == 9) or (len(str(isbn_check)) == 12): isbn_fuzz = int(100) else: isbn_fuzz = int(0) except: isbn_fuzz = int(0) highest_fuzz = max(author_fuzz, book_fuzz, isbn_fuzz) bookid = author.find('./best_book/id').text resultlist.append({ 'authorname': author.find('./best_book/author/name').text, 'bookid': bookid, 'authorid': author.find('./best_book/author/id').text, 'bookname': bookTitle.encode("ascii", "ignore"), 'booksub': None, 'bookisbn': bookisbn, 'bookpub': bookpub, 'bookdate': bookdate, 'booklang': booklang, 'booklink': booklink, 'bookrate': float(bookrate), 'bookimg': bookimg, 'bookpages': bookpages, 'bookgenre': bookgenre, 'bookdesc': bookdesc, 'author_fuzz': author_fuzz, 'book_fuzz': book_fuzz, 'isbn_fuzz': isbn_fuzz, 'highest_fuzz': highest_fuzz, 'num_reviews': float(bookrate) }) resultcount = resultcount + 1 except urllib2.HTTPError as err: if err.code == 404: logger.error('Received a 404 error when searching for author') if err.code == 403: logger.warn('Access to api is denied: usage exceeded') else: logger.error( 'An unexpected error has occurred when searching for an author' ) logger.debug('Found %s results with keyword: %s' % (resultcount, authorname)) logger.debug('The GoodReads API was hit %s times for keyword %s' % (str(api_hits), authorname)) queue.put(resultlist)
def _sendBoxcar(self, msg, title, token, subscribe=False): """ Sends a boxcar notification to the address provided msg: The message to send (unicode) title: The title of the message email: The email address to send the message to (or to subscribe with) subscribe: If true then instead of sending a message this function will send a subscription notification (optional, default is False) returns: True if the message succeeded, False otherwise """ logger.debug('Boxcar notification: %s' % msg) logger.debug('Title: %s' % title) logger.debug('Token: %s' % token) logger.debug('Subscribe: %s' % subscribe) # build up the URL and parameters msg = msg.strip() curUrl = API_URL # if this is a subscription notification then act accordingly if subscribe: data = urllib.urlencode({'email': token}) curUrl += "/subscribe" # for normal requests we need all these parameters else: # data = urllib.urlencode({ # 'email': email, # 'notification[from_screen_name]': title, # 'notification[message]': msg.encode('utf-8'), # 'notification[from_remote_service_id]': int(time.time()) # }) data = urllib.urlencode({ 'user_credentials': token, 'notification[title]': title.encode('utf-8'), 'notification[long_message]': msg.encode('utf-8'), 'notification[sound]': "done" }) # send the request to boxcar try: # TODO: Use our getURL from helper? req = urllib2.Request(curUrl) handle = urllib2.urlopen(req, data) handle.close() except (urllib2.URLError, urllib2.HTTPError) as e: # if we get an error back that doesn't have an error code then who knows what's really happening # URLError doesn't return a code, just a reason. HTTPError gives a code if not hasattr(e, 'code'): logger.error(u"BOXCAR: Boxcar notification failed." + str(e)) return False else: logger.error( u"BOXCAR: Boxcar notification failed. Error code: " + str(e.code)) # HTTP status 404 if the provided email address isn't a Boxcar user. if e.code == 404: logger.warn( u"BOXCAR: Username is wrong/not a boxcar email. Boxcar will send an email to it" ) return False # For HTTP status code 401's, it is because you are passing in either an # invalid token, or the user has not added your service. elif e.code == 401: # If the user has already added your service, we'll return an HTTP status code of 401. if subscribe: logger.error(u"BOXCAR: Already subscribed to service") # i dont know if this is true or false ... its neither but i also dont # know how we got here in the first place return False # HTTP status 401 if the user doesn't have the service added else: subscribeNote = self._sendBoxcar(msg, title, token, True) if subscribeNote: logger.debug(u"BOXCAR: Subscription sent.") return True else: logger.error( u"BOXCAR: Subscription could not be sent.") return False # If you receive an HTTP status code of 400, it is because you failed to send the proper parameters elif e.code == 400: logger.error(u"BOXCAR: Wrong data send to boxcar.") return False logger.debug(u"BOXCAR: Boxcar notification successful.") return True
def audioProcess(bookid, rename=False, playlist=False): """ :param bookid: book to process :param rename: rename to match audiobook filename pattern :param playlist: generate a playlist for popup :return: filename of part 01 of the audiobook """ for item in ['$Part', '$Title']: if rename and item not in lazylibrarian.CONFIG['AUDIOBOOK_DEST_FILE']: logger.error("Unable to audioProcess, check AUDIOBOOK_DEST_FILE") return '' myDB = database.DBConnection() cmd = 'select AuthorName,BookName,AudioFile from books,authors where books.AuthorID = authors.AuthorID and bookid=?' exists = myDB.match(cmd, (bookid,)) if exists: book_filename = exists['AudioFile'] if book_filename: r = os.path.dirname(book_filename) else: logger.debug("No filename for %s in audioProcess" % bookid) return '' else: logger.debug("Invalid bookid in audioProcess %s" % bookid) return '' if not TinyTag: logger.warn("TinyTag library not available") return '' cnt = 0 parts = [] total = 0 author = '' book = '' audio_file = '' abridged = '' for f in os.listdir(makeBytestr(r)): f = makeUnicode(f) if is_valid_booktype(f, booktype='audiobook'): cnt += 1 audio_file = f try: audio_path = os.path.join(r, f) performer = '' composer = '' albumartist = '' book = '' title = '' track = 0 total = 0 if TinyTag.is_supported(audio_path): id3r = TinyTag.get(audio_path) performer = id3r.artist composer = id3r.composer albumartist = id3r.albumartist book = id3r.album title = id3r.title track = id3r.track total = id3r.track_total track = check_int(track, 0) total = check_int(total, 0) if performer: performer = performer.strip() if composer: composer = composer.strip() if book: book = book.strip() if albumartist: albumartist = albumartist.strip() if composer: # if present, should be author author = composer elif performer: # author, or narrator if composer == author author = performer elif albumartist: author = albumartist if author and book: parts.append([track, book, author, f]) if not abridged: for tag in [book, title, albumartist, performer, composer]: if tag and 'unabridged' in tag.lower(): abridged = 'Unabridged' break if not abridged: for tag in [book, title, albumartist, performer, composer]: if tag and 'abridged' in tag.lower(): abridged = 'Abridged' break except Exception as e: logger.error("tinytag %s %s" % (type(e).__name__, str(e))) pass finally: if not abridged: if audio_file and 'unabridged' in audio_file.lower(): abridged = 'Unabridged' break if not abridged: if audio_file and 'abridged' in audio_file.lower(): abridged = 'Abridged' break logger.debug("%s found %s audiofile%s" % (exists['BookName'], cnt, plural(cnt))) if cnt == 1 and not parts: # single file audiobook with no tags parts = [[1, exists['BookName'], exists['AuthorName'], audio_file]] if cnt != len(parts): logger.warn("%s: Incorrect number of parts (found %i from %i)" % (exists['BookName'], len(parts), cnt)) return book_filename if total and total != cnt: logger.warn("%s: Reported %i parts, got %i" % (exists['BookName'], total, cnt)) return book_filename # check all parts have the same author and title if len(parts) > 1: for part in parts: if part[1] != book: logger.warn("%s: Inconsistent title: [%s][%s]" % (exists['BookName'], part[1], book)) return book_filename if part[2] != author: logger.warn("%s: Inconsistent author: [%s][%s]" % (exists['BookName'], part[2], author)) return book_filename # do we have any track info (value is 0 if not) if parts[0][0] == 0: tokmatch = '' # try to extract part information from filename. Search for token style of part 1 in this order... for token in [' 001.', ' 01.', ' 1.', ' 001 ', ' 01 ', ' 1 ', '01']: if tokmatch: break for part in parts: if token in part[3]: tokmatch = token break if tokmatch: # we know the numbering style, get numbers for the other parts cnt = 0 while cnt < len(parts): cnt += 1 if tokmatch == ' 001.': pattern = ' %s.' % str(cnt).zfill(3) elif tokmatch == ' 01.': pattern = ' %s.' % str(cnt).zfill(2) elif tokmatch == ' 1.': pattern = ' %s.' % str(cnt) elif tokmatch == ' 001 ': pattern = ' %s ' % str(cnt).zfill(3) elif tokmatch == ' 01 ': pattern = ' %s ' % str(cnt).zfill(2) elif tokmatch == ' 1 ': pattern = ' %s ' % str(cnt) else: pattern = '%s' % str(cnt).zfill(2) # standardise numbering of the parts for part in parts: if pattern in part[3]: part[0] = cnt break parts.sort(key=lambda x: x[0]) # check all parts are present cnt = 0 while cnt < len(parts): if parts[cnt][0] != cnt + 1: logger.warn("%s: No part %i found" % (exists['BookName'], cnt + 1)) return book_filename cnt += 1 if abridged: abridged = ' (%s)' % abridged # if we get here, looks like we have all the parts needed to rename properly seriesinfo = nameVars(bookid, abridged) dest_path = seriesinfo['FolderName'] dest_dir = lazylibrarian.DIRECTORY('Audio') dest_path = os.path.join(dest_dir, dest_path) if rename and r != dest_path: try: dest_path = safe_move(r, dest_path) r = dest_path except Exception as why: if not os.path.isdir(dest_path): logger.error('Unable to create directory %s: %s' % (dest_path, why)) if playlist: try: playlist = open(os.path.join(r, 'playlist.ll'), 'w') except Exception as why: logger.error('Unable to create playlist in %s: %s' % (r, why)) playlist = None for part in parts: pattern = seriesinfo['AudioFile'] pattern = pattern.replace( '$Part', str(part[0]).zfill(len(str(len(parts))))).replace( '$Total', str(len(parts))) pattern = ' '.join(pattern.split()).strip() pattern = pattern + os.path.splitext(part[3])[1] if playlist: if rename: playlist.write(pattern + '\n') else: playlist.write(part[3] + '\n') if rename: n = os.path.join(r, pattern) o = os.path.join(r, part[3]) if o != n: try: n = safe_move(o, n) if part[0] == 1: book_filename = n # return part 1 of set logger.debug('%s: audioProcess [%s] to [%s]' % (exists['BookName'], o, n)) except Exception as e: logger.error('Unable to rename [%s] to [%s] %s %s' % (o, n, type(e).__name__, str(e))) if playlist: playlist.close() return book_filename
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GB-ADD-BOOK" myDB = database.DBConnection() URL = 'https://www.googleapis.com/books/v1/volumes/' + str( bookid) + "?key=" + lazylibrarian.GB_API jsonresults = json.JSONDecoder().decode( urllib2.urlopen(URL, timeout=30).read()) # Darkie67: # replacing German Umlauts and filtering out ":" # booknamealt = jsonresults['volumeInfo']['title'] booknametmp1 = booknamealt.replace(u'\xf6', u'oe') booknametmp2 = booknametmp1.replace(u'\xe4', u'ae') booknametmp3 = booknametmp2.replace(u'\xdf', u'ss') booknametmp4 = booknametmp3.replace(u'\xc4', u'Ae') booknametmp5 = booknametmp4.replace(u'\xdc', u'Ue') booknametmp6 = booknametmp5.replace(u'\xd6', u'Oe') booknametmp7 = booknametmp6.replace(':', '') bookname = booknametmp7.replace(u'\xfc', u'ue') # Darkie67 end try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug('Book %s does not contain author field' % bookname) try: #skip if language is in ignore list booklang = jsonresults['volumeInfo']['language'] valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if booklang not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) except KeyError: logger.debug('Book does not have language field') try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = jsonresults['volumeInfo']['subtitle'] except KeyError: booksub = None try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = None try: if jsonresults['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][ 'identifier'] else: bookisbn = None except KeyError: bookisbn = None booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) name = jsonresults['volumeInfo']['authors'][0] GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname)
def bookRename(bookid): myDB = database.DBConnection() cmd = 'select AuthorName,BookName,BookFile from books,authors where books.AuthorID = authors.AuthorID and bookid=?' exists = myDB.match(cmd, (bookid,)) if not exists: logger.debug("Invalid bookid in bookRename %s" % bookid) return '' f = exists['BookFile'] if not f: logger.debug("No filename for %s in BookRename %s" % bookid) return '' r = os.path.dirname(f) if not lazylibrarian.CONFIG['CALIBRE_RENAME']: try: # noinspection PyTypeChecker calibreid = r.rsplit('(', 1)[1].split(')')[0] if not calibreid.isdigit(): calibreid = '' except IndexError: calibreid = '' if calibreid: msg = '[%s] looks like a calibre directory: not renaming book' % os.path.basename(r) logger.debug(msg) return f reject = multibook(r) if reject: logger.debug("Not renaming %s, found multiple %s" % (f, reject)) return f seriesinfo = nameVars(bookid) dest_path = seriesinfo['FolderName'] dest_dir = lazylibrarian.DIRECTORY('eBook') dest_path = os.path.join(dest_dir, dest_path) dest_path = stripspaces(dest_path) oldpath = r if oldpath != dest_path: try: dest_path = safe_move(oldpath, dest_path) except Exception as why: if not os.path.isdir(dest_path): logger.error('Unable to create directory %s: %s' % (dest_path, why)) book_basename, prefextn = os.path.splitext(os.path.basename(f)) new_basename = seriesinfo['BookFile'] if ' / ' in new_basename: # used as a separator in goodreads omnibus logger.warn("bookRename [%s] looks like an omnibus? Not renaming %s" % (new_basename, book_basename)) new_basename = book_basename if book_basename != new_basename: # only rename bookname.type, bookname.jpg, bookname.opf, not cover.jpg or metadata.opf for fname in os.listdir(makeBytestr(dest_path)): fname = makeUnicode(fname) extn = '' if is_valid_booktype(fname, booktype='ebook'): extn = os.path.splitext(fname)[1] elif fname.endswith('.opf') and not fname == 'metadata.opf': extn = '.opf' elif fname.endswith('.jpg') and not fname == 'cover.jpg': extn = '.jpg' if extn: ofname = os.path.join(dest_path, fname) nfname = os.path.join(dest_path, new_basename + extn) if ofname != nfname: try: nfname = safe_move(ofname, nfname) logger.debug("bookRename %s to %s" % (ofname, nfname)) oldname = os.path.join(oldpath, fname) if oldname == exists['BookFile']: # if we renamed/moved the preferred file, return new name f = nfname except Exception as e: logger.error('Unable to rename [%s] to [%s] %s %s' % (ofname, nfname, type(e).__name__, str(e))) return f
def find_results(self, authorname=None, queue=None): threading.currentThread().name = "GB-SEARCH" resultlist = [] #See if we should check ISBN field, otherwise ignore it try: isbn_check = int(authorname[:-1]) if (len(str(isbn_check)) == 9) or (len(str(isbn_check)) == 12): api_strings = ['isbn:'] else: api_strings = ['inauthor:', 'intitle:'] except: api_strings = ['inauthor:', 'intitle:'] api_hits = 0 logger.info('Now searching Google Books API with keyword: ' + self.name) for api_value in api_strings: startindex = 0 if api_value == "isbn:": set_url = self.url + urllib.quote(api_value + self.name) else: set_url = self.url + urllib.quote(api_value + '"' + self.name + '"') try: startindex = 0 resultcount = 0 removedResults = 0 ignored = 0 total_count = 0 no_author_count = 0 while True: self.params['startIndex'] = startindex URL = set_url + '&' + urllib.urlencode(self.params) try: jsonresults = json.JSONDecoder().decode( urllib2.urlopen(URL, timeout=30).read()) api_hits = api_hits + 1 number_results = jsonresults['totalItems'] logger.debug('Searching url: ' + URL) if number_results == 0: logger.info( 'Found no results for %s with value: %s' % (api_value, self.name)) break else: pass except HTTPError, err: logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % err.msg) break startindex = startindex + 40 for item in jsonresults['items']: total_count = total_count + 1 # skip if no author, no author is no book. try: Author = item['volumeInfo']['authors'][0] except KeyError: logger.debug( 'Skipped a result without authorfield.') no_author_count = no_author_count + 1 continue try: #skip if language is in ignore list booklang = item['volumeInfo']['language'] valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if booklang not in valid_langs: logger.debug( 'Skipped a book with language %s' % booklang) ignored = ignored + 1 continue except KeyError: ignored = ignored + 1 logger.debug( 'Skipped a result where no language is found') continue try: bookpub = item['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = item['volumeInfo']['subtitle'] except KeyError: booksub = None try: bookdate = item['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' bookdate = bookdate[:4] try: bookimg = item['volumeInfo']['imageLinks'][ 'thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = item['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = item['volumeInfo']['pageCount'] except KeyError: bookpages = '0' try: bookgenre = item['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = item['volumeInfo']['description'] except KeyError: bookdesc = 'Not available' try: num_reviews = item['volumeInfo']['ratingsCount'] except KeyError: num_reviews = 0 try: if item['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = item['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = 0 except KeyError: bookisbn = 0 author_fuzz = fuzz.ratio(Author.lower(), authorname.lower()) book_fuzz = fuzz.ratio( item['volumeInfo']['title'].lower(), authorname.lower()) try: isbn_check = int(authorname[:-1]) if (len(str(isbn_check)) == 9) or (len( str(isbn_check)) == 12): isbn_fuzz = int(100) else: isbn_fuzz = int(0) except: isbn_fuzz = int(0) highest_fuzz = max(author_fuzz, book_fuzz, isbn_fuzz) # Darkie67: # replacing German Umlauts and filtering out ":" # booknamealt = item['volumeInfo']['title'] booknametmp1 = booknamealt.replace(u'\xf6', u'oe') booknametmp2 = booknametmp1.replace(u'\xe4', u'ae') booknametmp3 = booknametmp2.replace(u'\xdf', u'ss') booknametmp4 = booknametmp3.replace(u'\xc4', u'Ae') booknametmp5 = booknametmp4.replace(u'\xdc', u'Ue') booknametmp6 = booknametmp5.replace(u'\xd6', u'Oe') booknametmp7 = booknametmp6.replace(':', '') bookname = booknametmp7.replace(u'\xfc', u'ue') # Darkie67 end resultlist.append({ 'authorname': Author, 'bookid': item['id'], 'bookname': bookname, 'booksub': booksub, 'bookisbn': bookisbn, 'bookpub': bookpub, 'bookdate': bookdate, 'booklang': booklang, 'booklink': item['volumeInfo']['canonicalVolumeLink'], 'bookrate': float(bookrate), 'bookimg': bookimg, 'bookpages': bookpages, 'bookgenre': bookgenre, 'bookdesc': bookdesc, 'author_fuzz': author_fuzz, 'book_fuzz': book_fuzz, 'isbn_fuzz': isbn_fuzz, 'highest_fuzz': highest_fuzz, 'num_reviews': num_reviews }) resultcount = resultcount + 1 if startindex >= number_results: logger.debug("Found %s total results" % total_count) logger.debug("Removed %s bad language results" % ignored) logger.debug("Removed %s books with no author" % no_author_count) logger.info( "Showing %s results for (%s) with keyword: %s" % (resultcount, api_value, authorname)) break else: continue except KeyError: break logger.info('The Google Books API was hit %s times for keyword %s' % (str(api_hits), self.name)) queue.put(resultlist)
def _sendBoxcar(self, msg, title, token, subscribe=False): """ Sends a boxcar notification to the address provided msg: The message to send (unicode) title: The title of the message email: The email address to send the message to (or to subscribe with) subscribe: If true then instead of sending a message this function will send a subscription notification (optional, default is False) returns: True if the message succeeded, False otherwise """ logger.debug('Boxcar notification: %s' % msg) logger.debug('Title: %s' % title) logger.debug('Token: %s' % token) logger.debug('Subscribe: %s' % subscribe) # build up the URL and parameters msg = msg.strip() msg = msg.encode(lazylibrarian.SYS_ENCODING) title = title.encode(lazylibrarian.SYS_ENCODING) curUrl = API_URL # if this is a subscription notification then act accordingly if subscribe: data = {'email': token} curUrl += "/subscribe" # for normal requests we need all these parameters else: data = { 'user_credentials': token, 'notification[title]': title, 'notification[long_message]': msg, 'notification[sound]': "done" } proxies = proxyList() # send the request to boxcar try: timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) r = requests.get(curUrl, params=data, timeout=timeout, proxies=proxies) status = str(r.status_code) if status.startswith('2'): logger.debug("BOXCAR: Notification successful.") return True # HTTP status 404 if the provided email address isn't a Boxcar user. if status == '404': logger.warn( "BOXCAR: Username is wrong/not a boxcar email. Boxcar will send an email to it" ) # For HTTP status code 401's, it is because you are passing in either an # invalid token, or the user has not added your service. elif status == '401': # If the user has already added your service, we'll return an HTTP status code of 401. if subscribe: logger.error("BOXCAR: Already subscribed to service") # HTTP status 401 if the user doesn't have the service added else: subscribeNote = self._sendBoxcar(msg, title, token, True) if subscribeNote: logger.debug("BOXCAR: Subscription sent.") return True else: logger.error("BOXCAR: Subscription could not be sent.") # If you receive an HTTP status code of 400, it is because you failed to send the proper parameters elif status == '400': logger.error("BOXCAR: Wrong data send to boxcar.") else: logger.error("BOXCAR: Got error code %s" % status) return False except Exception as e: # if we get an error back that doesn't have an error code then who knows what's really happening # URLError doesn't return a code, just a reason. HTTPError gives a code if not hasattr(e, 'code'): logger.error("BOXCAR: Boxcar notification failed: %s" % str(e)) else: logger.error( "BOXCAR: Boxcar notification failed. Error code: %s" % str(e.code)) return False
def ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode): params = None if searchType in ["book", "shortbook"]: authorname, bookname = get_searchterm(book, searchType) if provider['BOOKSEARCH'] and provider['BOOKCAT']: # if specific booksearch, use it params = { "t": provider['BOOKSEARCH'], "apikey": api_key, "title": bookname, "author": authorname, "cat": provider['BOOKCAT'] } elif provider['GENERALSEARCH'] and provider['BOOKCAT']: # if not, try general search params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "q": authorname + ' ' + bookname, "cat": provider['BOOKCAT'] } elif searchType in ["audio", "shortaudio"]: authorname, bookname = get_searchterm(book, searchType) if provider['AUDIOSEARCH'] and provider['AUDIOCAT']: # if specific audiosearch, use it params = { "t": provider['AUDIOSEARCH'], "apikey": api_key, "title": bookname, "author": authorname, "cat": provider['AUDIOCAT'] } elif provider['GENERALSEARCH'] and provider['AUDIOCAT']: # if not, try general search params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "q": authorname + ' ' + bookname, "cat": provider['AUDIOCAT'] } elif searchType == "mag": if provider['MAGSEARCH'] and provider['MAGCAT']: # if specific magsearch, use it params = { "t": provider['MAGSEARCH'], "apikey": api_key, "cat": provider['MAGCAT'], "q": unaccented(book['searchterm'].replace(':', '')), "extended": provider['EXTENDED'], } elif provider['GENERALSEARCH'] and provider['MAGCAT']: params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "cat": provider['MAGCAT'], "q": unaccented(book['searchterm'].replace(':', '')), "extended": provider['EXTENDED'], } else: if provider['GENERALSEARCH']: if searchType == "shortgeneral": searchterm = unaccented(book['searchterm'].split('(')[0].replace(':', '')) else: searchterm = unaccented(book['searchterm'].replace(':', '')) params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "q": searchterm, "extended": provider['EXTENDED'], } if params: logger.debug('[NewzNabPlus] - %s Search parameters set to %s' % (searchMode, str(params))) else: logger.debug('[NewzNabPlus] - %s No matching search parameters for %s' % (searchMode, searchType)) return params
def get_author_books(self, authorid=None, authorname=None, refresh=False): books_dict = [] set_url = self.url + urllib.quote('inauthor:' + '"' + authorname + '"') URL = set_url + '&' + urllib.urlencode(self.params) api_hits = 0 logger.info('[%s] Now processing books with Google Books API' % authorname) #Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: startindex = 0 resultcount = 0 removedResults = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 while True: self.params['startIndex'] = startindex URL = set_url + '&' + urllib.urlencode(self.params) try: jsonresults = json.JSONDecoder().decode( urllib2.urlopen(URL, timeout=30).read()) api_hits = api_hits + 1 number_results = jsonresults['totalItems'] logger.debug('[%s] Searching url: %s' % (authorname, URL)) if number_results == 0: logger.info('Found no results for %s with value: %s' % (api_value, self.name)) break else: pass except HTTPError, err: logger.Error( 'Google API returned HTTP Error - probably time/rate limiting - [%s]' % err.msg) startindex = startindex + 40 for item in jsonresults['items']: total_count = total_count + 1 # skip if no author, no author is no book. try: Author = item['volumeInfo']['authors'][0] except KeyError: logger.debug('Skipped a result without authorfield.') continue try: #skip if language is in ignore list booklang = item['volumeInfo']['language'] valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if booklang not in valid_langs: logger.debug('Skipped a book with language %s' % booklang) ignored = ignored + 1 continue except KeyError: ignored = ignored + 1 logger.debug( 'Skipped a result where no language is found') continue try: bookpub = item['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = item['volumeInfo']['subtitle'] except KeyError: booksub = None try: bookdate = item['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = item['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = item['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = item['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = item['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = item['volumeInfo']['description'] except KeyError: bookdesc = None try: if item['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = item['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = None except KeyError: bookisbn = None bookid = item['id'] # Darkie67: # replacing German Umlauts and filtering out ":" # booknamealt = item['volumeInfo']['title'] booknametmp1 = booknamealt.replace(u'\xf6', u'oe') booknametmp2 = booknametmp1.replace(u'\xe4', u'ae') booknametmp3 = booknametmp2.replace(u'\xdf', u'ss') booknametmp4 = booknametmp3.replace(u'\xc4', u'Ae') booknametmp5 = booknametmp4.replace(u'\xdc', u'Ue') booknametmp6 = booknametmp5.replace(u'\xd6', u'Oe') booknametmp7 = booknametmp6.replace(':', '') bookname = booknametmp7.replace(u'\xfc', u'ue') # Darkie67 end booklink = item['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) find_book_status = myDB.select( "SELECT * FROM books WHERE BookID = '%s'" % bookid) if find_book_status: for resulted in find_book_status: book_status = resulted['Status'] else: book_status = "Skipped" if not (re.match('[^\w-]', bookname) ): #remove books with bad caracters in title if book_status != "Ignored": controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": authorid, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": book_status, "BookAdded": formatter.today() } resultcount = resultcount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"book found " + bookname + " " + bookdate) if not find_book_status: logger.info("[%s] Added book: %s" % (authorname, bookname)) added_count = added_count + 1 else: updated_count = updated_count + 1 logger.info("[%s] Updated book: %s" % (authorname, bookname)) else: book_ignore_count = book_ignore_count + 1 else: removedResults = removedResults + 1 if startindex >= number_results: break else: continue except KeyError: pass logger.info( '[%s] The Google Books API was hit %s times to populate book list' % (authorname, str(api_hits))) lastbook = myDB.action( "SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' AND Status != 'Ignored' order by BookDate DESC" % authorid).fetchone() unignoredbooks = myDB.select( "SELECT COUNT(BookName) as unignored FROM books WHERE AuthorID='%s' AND Status != 'Ignored'" % authorid) bookCount = myDB.select( "SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" % authorid) controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "TotalBooks": bookCount[0]['counter'], "UnignoredBooks": unignoredbooks[0]['unignored'], "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'], "LastDate": lastbook['BookDate'] } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total books for author" % total_count) logger.debug("Removed %s bad language results for author" % ignored) logger.debug("Removed %s bad character results for author" % removedResults) logger.debug("Ignored %s books by author marked as Ignored" % book_ignore_count) logger.debug("Imported/Updated %s books for author" % resultcount) if refresh: logger.info( "[%s] Book processing complete: Added %s books / Updated %s books" % (authorname, str(added_count), str(updated_count))) else: logger.info( "[%s] Book processing complete: Added %s books to the database" % (authorname, str(added_count))) return books_dict
def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None, test=False): """ Generic NewzNabplus query function takes in host+key+type and returns the result set regardless of who based on site running NewzNab+ ref http://usenetreviewz.com/nzb-sites/ """ host = provider['HOST'] api_key = provider['API'] logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % ( searchType, host, searchMode, api_key, str(book))) results = [] params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode) if params: if not str(host)[:4] == "http": host = 'http://' + host if host[-1:] == '/': host = host[:-1] URL = host + '/api?' + urlencode(params) sterm = makeUnicode(book['searchterm']) rootxml = None logger.debug("[NewzNabPlus] URL = %s" % URL) result, success = fetchURL(URL) if test: if result.startswith('<') and result.endswith('/>') and "error code" in result: result = result[1:-2] success = False if not success: logger.debug(result) return success if success: try: rootxml = ElementTree.fromstring(result) except Exception as e: logger.error('Error parsing data from %s: %s %s' % (host, type(e).__name__, str(e))) rootxml = None else: if not result or result == "''": result = "Got an empty response" logger.error('Error reading data from %s: %s' % (host, result)) # maybe the host doesn't support the search type cancelled = cancelSearchType(searchType, result, provider) if not cancelled: # it was some other problem BlockProvider(provider['HOST'], result) if rootxml is not None: # to debug because of api logger.debug('Parsing results from <a href="%s">%s</a>' % (URL, host)) if rootxml.tag == 'error': errormsg = rootxml.get('description', default='unknown error') logger.error("%s - %s" % (host, errormsg)) # maybe the host doesn't support the search type cancelled = cancelSearchType(searchType, errormsg, provider) if not cancelled: # it was some other problem BlockProvider(provider['HOST'], errormsg) else: resultxml = rootxml.getiterator('item') nzbcount = 0 maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0) for nzb in resultxml: try: thisnzb = ReturnResultsFieldsBySearchType(book, nzb, host, searchMode, provider['DLPRIORITY']) if not maxage: nzbcount += 1 results.append(thisnzb) else: # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200 nzbdate = thisnzb['nzbdate'] try: parts = nzbdate.split(' ') nzbdate = ' '.join(parts[:5]) # strip the +0200 dt = datetime.datetime.strptime(nzbdate, "%a, %d %b %Y %H:%M:%S").timetuple() nzbage = age('%04d-%02d-%02d' % (dt.tm_year, dt.tm_mon, dt.tm_mday)) except Exception as e: logger.debug('Unable to get age from [%s] %s %s' % (thisnzb['nzbdate'], type(e).__name__, str(e))) nzbage = 0 if nzbage <= maxage: nzbcount += 1 results.append(thisnzb) else: logger.debug('%s is too old (%s day%s)' % (thisnzb['nzbtitle'], nzbage, plural(nzbage))) except IndexError: logger.debug('No results from %s for %s' % (host, sterm)) logger.debug('Found %s nzb at %s for: %s' % (nzbcount, host, sterm)) else: logger.debug('No data returned from %s for %s' % (host, sterm)) return results
def ReturnResultsFieldsBySearchType(book=None, nzbdetails=None, host=None, searchMode=None, priority=0): """ # searchType has multiple query params for t=, which return different results sets. # books have a dedicated check, so will use that. # mags don't so will have more generic search term. # http://newznab.readthedocs.org/en/latest/misc/api/#predefined-categories # results when searching for t=book # <item> # <title>David Gemmell - Troy 03 - Fall of Kings</title> # <guid isPermaLink="true"> # https://www.usenet-crawler.com/details/091c8c0e18ca34201899b91add52e8c0 # </guid> # <link> # https://www.usenet-crawler.com/getnzb/091c8c0e18ca34201899b91add52e8c0.nzb&i=155518&r=78c0509 # </link> # <comments> # https://www.usenet-crawler.com/details/091c8c0e18ca34201899b91add52e8c0#comments # </comments> # <pubDate>Fri, 11 Jan 2013 16:49:34 +0100</pubDate> # <category>Books > Ebook</category> # <description>David Gemmell - Troy 03 - Fall of Kings</description> # <enclosure url="https://www.usenet-crawler.com/getnzb/091c8c0e18ca34201899b91add52e8c0.nzb&i=155518&r=78c0> # <newznab:attr name="category" value="7000"/> # <newznab:attr name="category" value="7020"/> # <newznab:attr name="size" value="4909563"/> # <newznab:attr name="guid" value="091c8c0e18ca34201899b91add52e8c0"/> # </item> # # t=search results # <item> # <title>David Gemmell - [Troy 03] - Fall of Kings</title> # <guid isPermaLink="true"> # https://www.usenet-crawler.com/details/5d7394b2386683d079d8bd8f16652b18 # </guid> # <link> # https://www.usenet-crawler.com/getnzb/5d7394b2386683d079d8bd8f16652b18.nzb&i=155518&r=78c0509bc6bb9174 # </link> # <comments> # https://www.usenet-crawler.com/details/5d7394b2386683d079d8bd8f16652b18#comments # </comments> # <pubDate>Mon, 27 May 2013 02:12:09 +0200</pubDate> # <category>Books > Ebook</category> # <description>David Gemmell - [Troy 03] - Fall of Kings</description> # <enclosure url="https://www.usenet-crawler.com/getnzb/5d7394b2386683d079d8bd8f16652b18.nzb&i=155518&r=78c05> # <newznab:attr name="category" value="7000"/> # <newznab:attr name="category" value="7020"/> # <newznab:attr name="size" value="4909563"/> # <newznab:attr name="guid" value="5d7394b2386683d079d8bd8f16652b18"/> # <newznab:attr name="files" value="2"/> # <newznab:attr name="poster" value="[email protected] (N.E.R.Ds)"/> # <newznab:attr name="grabs" value="0"/> # <newznab:attr name="comments" value="0"/> # <newznab:attr name="password" value="0"/> # <newznab:attr name="usenetdate" value="Fri, 11 Mar 2011 13:45:15 +0100"/> # <newznab:attr name="group" value="alt.binaries.e-book.flood"/> # </item> # -------------------------------TORZNAB RETURN DATA-- book --------------------------------------------- # <item> # <title>Tom Holt - Blonde Bombshell (Dystop; SFX; Humour) ePUB+MOBI</title> # <guid>https://getstrike.net/torrents/1FDBE6466738EED3C7FD915E1376BA0A63088D4D</guid> # <comments>https://getstrike.net/torrents/1FDBE6466738EED3C7FD915E1376BA0A63088D4D</comments> # <pubDate>Sun, 27 Sep 2015 23:10:56 +0200</pubDate> # <size>24628</size> # <description>Tom Holt - Blonde Bombshell (Dystop; SFX; Humour) ePUB+MOBI</description> # <link>http://192.168.2.2:9117/dl/strike/pkl4u83iz41up73m4zsigqsd4zyie50r/aHR0cHM6Ly9nZXRzdHJpa2UubmV0L3RvcnJl # bnRzL2FwaS9kb3dubG9hZC8xRkRCRTY0NjY3MzhFRUQzQzdGRDkxNUUxMzc2QkEwQTYzMDg4RDRELnRvcnJlbnQ1/t.torrent</link> # <category>8000</category> # <enclosure url="http://192.168.2.2:9117/dl/strike/pkl4u83iz41up73m4zsigqsd4zyie50r/aHR0cHM6Ly9nZXRzdHJpa2UubmV # 0L3RvcnJlbnRzL2FwaS9kb3dubG9hZC8xRkRCRTY0NjY3MzhFRUQzQzdGRDkxNUUxMzc2QkEwQTYzMDg4RDRELnRvcnJlbnQ1/t.torrent" # length="24628" type="application/x-bittorrent" /> # <torznab:attr name="magneturl" value="magnet:?xt=urn:btih:1FDBE6466738EED3C7FD915E1376BA0A63088D4D& # dn=Tom+Holt+-+Blonde+Bombshell+(Dystop%3B+SFX%3B+Humour)+ePUB%2BMOBI&tr=udp://open.demonii.com:1337& # tr=udp://tracker.coppersurfer.tk:6969&tr=udp://tracker.leechers-paradise.org:6969& # tr=udp://exodus.desync.com:6969" /> # <torznab:attr name="seeders" value="1" /> # <torznab:attr name="peers" value="2" /> # <torznab:attr name="infohash" value="1FDBE6466738EED3C7FD915E1376BA0A63088D4D" /> # <torznab:attr name="minimumratio" value="1" /> # <torznab:attr name="minimumseedtime" value="172800" /> # </item> # ---------------------------------------- magazine ---------------------------------------- # <item> # <title>Linux Format Issue 116 - KDE Issue</title> # <guid>https://getstrike.net/torrents/f3fc8df4fdd850132072a435a7d112d6c9d77d16</guid> # <comments>https://getstrike.net/torrents/f3fc8df4fdd850132072a435a7d112d6c9d77d16</comments> # <pubDate>Wed, 04 Mar 2009 01:57:20 +0100</pubDate> # <size>1309195</size> # <description>Linux Format Issue 116 - KDE Issue</description> # <link>http://192.168.2.2:9117/dl/strike/pkl4u83iz41up73m4zsigqsd4zyie50r/aHR0cHM6Ly9nZXRzdHJpa2UubmV0L3R # vcnJlbnRzL2FwaS9kb3dubG9hZC9mM2ZjOGRmNGZkZDg1MDEzMjA3MmE0MzVhN2QxMTJkNmM5ZDc3ZDE2LnRvcnJlbnQ1/t.torrent</link> # <enclosure url="http://192.168.2.2:9117/dl/strike/pkl4u83iz41up73m4zsigqsd4zyie50r/aHR0cHM6Ly9nZXRzdHJpa2Uubm # V0L3RvcnJlbnRzL2FwaS9kb3dubG9hZC9mM2ZjOGRmNGZkZDg1MDEzMjA3MmE0MzVhN2QxMTJkNmM5ZDc3ZDE2LnRvcnJlbnQ1/t.torrent" # length="1309195" type="application/x-bittorrent" /> # <torznab:attr name="magneturl" value="magnet:?xt=urn:btih:f3fc8df4fdd850132072a435a7d112d6c9d77d16& # dn=Linux+Format+Issue+116+-+KDE+Issue&tr=udp://open.demonii.com:1337&tr=udp://tracker.coppersurfer. # tk:6969&tr=udp://tracker.leechers-paradise.org:6969&tr=udp://exodus.desync.com:6969" /> # <torznab:attr name="seeders" value="2" /> # <torznab:attr name="peers" value="3" /> # <torznab:attr name="infohash" value="f3fc8df4fdd850132072a435a7d112d6c9d77d16" /> # <torznab:attr name="minimumratio" value="1" /> # <torznab:attr name="minimumseedtime" value="172800" /> # </item> """ nzbtitle = '' nzbdate = '' nzburl = '' nzbsize = 0 n = 0 while n < len(nzbdetails): tag = str(nzbdetails[n].tag).lower() if tag == 'title': nzbtitle = nzbdetails[n].text elif tag == 'size': nzbsize = nzbdetails[n].text elif tag == 'pubdate': nzbdate = nzbdetails[n].text elif tag == 'link': if not nzburl or (nzburl and not lazylibrarian.CONFIG['PREFER_MAGNET']): nzburl = nzbdetails[n].text elif nzbdetails[n].attrib.get('name') == 'magneturl': nzburl = nzbdetails[n].attrib.get('value') elif nzbdetails[n].attrib.get('name') == 'size': nzbsize = nzbdetails[n].attrib.get('value') n += 1 resultFields = { 'bookid': book['bookid'], 'nzbprov': host, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbdate': nzbdate, 'nzbsize': nzbsize, 'nzbmode': searchMode, 'priority': priority } logger.debug('[NewzNabPlus] - result fields from NZB are ' + str(resultFields)) return resultFields