def BookToList(book_id, shelf_name, action='add'): global client if action == 'remove': body = urlencode({ 'name': shelf_name, 'book_id': book_id, 'a': 'remove' }) else: body = urlencode({'name': shelf_name, 'book_id': book_id}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now try: response, content = client.request( '%s/shelf/add_to_shelf.xml' % 'https://www.goodreads.com', 'POST', body, headers) except Exception as e: return False, "Exception in client.request: %s %s" % ( type(e).__name__, str(e)) if response['status'] != '200' and response['status'] != '201': msg = 'Failure status: %s' % response['status'] return False, msg return True, content
def BookToList(book_id, shelf_name, action='add'): global client if action == 'remove': body = urlencode({ 'name': shelf_name, 'book_id': book_id, 'a': 'remove' }) else: body = urlencode({'name': shelf_name, 'book_id': book_id}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} gr_api_sleep() try: response, content = client.request( '%s/shelf/add_to_shelf.xml' % 'https://www.goodreads.com', 'POST', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return False, "Error in client.request: see error log" if not response['status'].startswith('2'): msg = 'Failure status: %s' % response['status'] return False, msg return True, content
def follow_author(self, authorid=None, follow=True): global consumer, client, token, user_id if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \ lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']: logger.warn( "Goodreads follow author error: Please authorise first") return False, 'Unauthorised' consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']), secret=str( lazylibrarian.CONFIG['GR_SECRET'])) token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET']) client = oauth.Client(consumer, token) user_id = self.getUserId() # follow https://www.goodreads.com/author_followings?id=AUTHOR_ID&format=xml # unfollow https://www.goodreads.com/author_followings/AUTHOR_FOLLOWING_ID?format=xml time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now if follow: body = urlencode({'id': authorid, 'format': 'xml'}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} try: response, content = client.request( '%s/author_followings' % 'https://www.goodreads.com', 'POST', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return False, "Error in client.request: see error log" else: body = urlencode({'format': 'xml'}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} try: response, content = client.request( '%s/author_followings/%s' % ('https://www.goodreads.com', authorid), 'DELETE', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return False, "Error in client.request: see error log" if follow and response['status'] == '422': return True, 'Already following' if response['status'].startswith('2'): if follow: return True, content.split('<id>')[1].split('</id>')[0] return True, '' return False, 'Failure status: %s' % response['status']
def create_shelf(self, shelf='lazylibrarian'): global consumer, client, token, user_id if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \ lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']: logger.warn("Goodreads create shelf error: Please authorise first") return False, 'Unauthorised' consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']), secret=str(lazylibrarian.CONFIG['GR_SECRET'])) token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET']) client = oauth.Client(consumer, token) user_id = self.getUserId() # could also pass [featured] [exclusive_flag] [sortable_flag] all default to False body = urlencode({'user_shelf[name]': shelf.lower()}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} gr_api_sleep() try: response, content = client.request('%s/user_shelves.xml' % 'https://www.goodreads.com', 'POST', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return False, "Error in client.request: see error log" if not response['status'].startswith('2'): msg = 'Failure status: %s' % response['status'] return False, msg return True, ''
def getSeriesMembers(seriesID=None): """ Ask librarything or goodreads for details on all books in a series order, bookname, authorname, workid, authorid (workid and authorid are goodreads only) Return as a list of lists """ results = [] if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': params = {"format": "xml", "key": lazylibrarian.CONFIG['GR_API']} URL = 'https://www.goodreads.com/series/' + seriesID + '?' + urlencode(params) try: rootxml, in_cache = gr_xml_request(URL) if rootxml is None: logger.debug("Error requesting series %s" % seriesID) return [] except Exception as e: logger.error("%s finding series %s: %s" % (type(e).__name__, seriesID, str(e))) return [] works = rootxml.find('series/series_works') books = works.getiterator('series_work') if books is None: logger.warn('No books found for %s' % seriesID) return [] for book in books: mydict = {} for mykey, location in [('order', 'user_position'), ('bookname', 'work/best_book/title'), ('authorname', 'work/best_book/author/name'), ('workid', 'work/id'), ('authorid', 'work/best_book/author/id') ]: if book.find(location) is not None: mydict[mykey] = book.find(location).text else: mydict[mykey] = "" results.append([mydict['order'], mydict['bookname'], mydict['authorname'], mydict['workid'], mydict['authorid']]) else: data = getBookWork(None, "SeriesPage", seriesID) if data: try: table = data.split('class="worksinseries"')[1].split('</table>')[0] rows = table.split('<tr') for row in rows: if 'href=' in row: booklink = row.split('href="')[1] bookname = booklink.split('">')[1].split('<')[0] # booklink = booklink.split('"')[0] try: authorlink = row.split('href="')[2] authorname = authorlink.split('">')[1].split('<')[0] # authorlink = authorlink.split('"')[0] order = row.split('class="order">')[1].split('<')[0] results.append([order, bookname, authorname, '', '']) except IndexError: logger.debug('Incomplete data in series table for series %s' % seriesID) except IndexError: if 'class="worksinseries"' in data: # error parsing, or just no series data available? logger.debug('Error in series table for series %s' % seriesID) return results
def get_shelf_list(self): global consumer, client, token, user_id if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \ lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']: logger.warn("Goodreads get shelf error: Please authorise first") return [] else: # # loop over each page of shelves # loop over each shelf # add shelf to list # consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']), secret=str(lazylibrarian.CONFIG['GR_SECRET'])) token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET']) client = oauth.Client(consumer, token) user_id = self.getUserId() current_page = 0 shelves = [] page_shelves = 1 while page_shelves: current_page = current_page + 1 page_shelves = 0 shelf_template = Template('${base}/shelf/list.xml?user_id=${user_id}&key=${key}&page=${page}') body = urlencode({}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} request_url = shelf_template.substitute(base='https://www.goodreads.com', user_id=user_id, page=current_page, key=lazylibrarian.CONFIG['GR_API']) gr_api_sleep() try: response, content = client.request(request_url, 'GET', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return shelves if not response['status'].startswith('2'): logger.error('Failure status: %s for page %s' % (response['status'], current_page)) if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync: logger.debug(request_url) else: xmldoc = xml.dom.minidom.parseString(content) shelf_list = xmldoc.getElementsByTagName('shelves')[0] for item in shelf_list.getElementsByTagName('user_shelf'): shelf_name = item.getElementsByTagName('name')[0].firstChild.nodeValue shelf_count = item.getElementsByTagName('book_count')[0].firstChild.nodeValue shelf_exclusive = item.getElementsByTagName('exclusive_flag')[0].firstChild.nodeValue shelves.append({'name': shelf_name, 'books': shelf_count, 'exclusive': shelf_exclusive}) page_shelves += 1 if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync: logger.debug('Shelf %s : %s: Exclusive %s' % (shelf_name, shelf_count, shelf_exclusive)) if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync: logger.debug('Found %s shelves on page %s' % (page_shelves, current_page)) logger.debug('Found %s shelves on %s page%s' % (len(shelves), current_page - 1, plural(current_page - 1))) # print shelves return shelves
def create_shelf(self, shelf='lazylibrarian'): global consumer, client, token, user_id if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \ lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']: logger.warn("Goodreads create shelf error: Please authorise first") return False, 'Unauthorised' consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']), secret=str( lazylibrarian.CONFIG['GR_SECRET'])) token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET']) client = oauth.Client(consumer, token) user_id = self.getUserId() # could also pass [featured] [exclusive_flag] [sortable_flag] all default to False body = urlencode({'user_shelf[name]': shelf.lower()}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} gr_api_sleep() try: response, content = client.request( '%s/user_shelves.xml' % 'https://www.goodreads.com', 'POST', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return False, "Error in client.request: see error log" if not response['status'].startswith('2'): msg = 'Failure status: %s' % response['status'] return False, msg return True, ''
def create_shelf(self, shelf='lazylibrarian'): global consumer, client, token, user_id if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \ lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']: logger.warn("Goodreads create shelf error: Please authorise first") return False, 'Unauthorised' consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']), secret=str( lazylibrarian.CONFIG['GR_SECRET'])) token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET']) client = oauth.Client(consumer, token) user_id = self.getUserId() # could also pass [featured] [exclusive_flag] [sortable_flag] all default to False body = urlencode({'user_shelf[name]': shelf.lower()}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now try: response, content = client.request( '%s/user_shelves.xml' % 'https://www.goodreads.com', 'POST', body, headers) except Exception as e: return False, "Exception in client.request: %s %s" % ( type(e).__name__, str(e)) if response['status'] != '200' and response['status'] != '201': msg = 'Failure status: %s' % response['status'] return False, msg return True, ''
def getShelfBooks(page, shelf_name): global client, user_id data = '${base}/review/list?format=xml&v=2&id=${user_id}&sort=author&order=a' data += '&key=${key}&page=${page}&per_page=100&shelf=${shelf_name}' owned_template = Template(data) body = urlencode({}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} request_url = owned_template.substitute( base='https://www.goodreads.com', user_id=user_id, page=page, key=lazylibrarian.CONFIG['GR_API'], shelf_name=shelf_name) gr_api_sleep() try: response, content = client.request(request_url, 'GET', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return "Error in client.request: see error log" if not response['status'].startswith('2'): logger.error('Failure status: %s for page %s' % (response['status'], page)) return content
def getShelfBooks(page, shelf_name): global client, user_id data = '${base}/review/list?format=xml&v=2&id=${user_id}&sort=author&order=a' data += '&key=${key}&page=${page}&per_page=100&shelf=${shelf_name}' owned_template = Template(data) body = urlencode({}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} request_url = owned_template.substitute( base='https://www.goodreads.com', user_id=user_id, page=page, key=lazylibrarian.CONFIG['GR_API'], shelf_name=shelf_name) time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now try: response, content = client.request(request_url, 'GET', body, headers) except Exception as e: return "Exception in client.request: %s %s" % (type(e).__name__, str(e)) if response['status'] != '200': raise Exception('Failure status: %s for page %s' % (response['status'], page)) return content
def __str__(self): data = { 'oauth_consumer_key': self.key, 'oauth_consumer_secret': self.secret } return urlencode(data)
def _action(self, params, body=None, content_type=None): # noinspection PyTypeChecker url = self.base_url + '/gui/' + '?token=' + self.token + '&' + urlencode( params) request = Request(url) if lazylibrarian.CONFIG['PROXY_HOST']: for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']): request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item) request.add_header('User-Agent', getUserAgent()) if body: if PY2: request.add_data(body) else: request.data(body) request.add_header('Content-length', len(body)) if content_type: request.add_header('Content-type', content_type) try: response = self.opener.open(request) return response.code, json.loads(response.read()) except HTTPError as err: logger.debug('URL: %s' % url) logger.debug('uTorrent webUI raised the following error: ' + str(err))
def getBookAuthors(bookid): """ Get a list of authors contributing to a book from the goodreads bookpage or the librarything bookwork file """ authorlist = [] if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': params = {"key": lazylibrarian.CONFIG['GR_API']} URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urlencode(params) try: rootxml, in_cache = gr_xml_request(URL) if rootxml is None: logger.debug("Error requesting book %s" % bookid) return [] except Exception as e: logger.error("%s finding book %s: %s" % (type(e).__name__, bookid, str(e))) return [] book = rootxml.find('book') authors = book.find('authors') anames = authors.getiterator('author') if anames is None: logger.warn('No authors found for %s' % bookid) return [] for aname in anames: author = {} if aname.find('id') is not None: author['id'] = aname.find('id').text if aname.find('name') is not None: author['name'] = aname.find('name').text if aname.find('role') is not None: role = aname.find('role').text if not role: role = '' author['role'] = role if author: authorlist.append(author) else: data = getBookWork(bookid, "Authors") if data: try: data = data.split('otherauthors_container')[1].split('</table>')[0].split('<table')[1].split('>', 1)[1] except IndexError: data = '' authorlist = [] if data and 'Work?' in data: try: rows = data.split('<tr') for row in rows[2:]: author = {} col = row.split('<td>') author['name'] = col[1].split('">')[1].split('<')[0] author['role'] = col[2].split('<')[0] author['type'] = col[3].split('<')[0] author['work'] = col[4].split('<')[0] author['status'] = col[5].split('<')[0] authorlist.append(author) except IndexError: logger.debug('Error parsing authorlist for %s' % bookid) return authorlist
def get_normalized_parameters(self): """Return a string that contains the parameters that must be signed.""" items = [(k, v) for k, v in list(self.items()) if k != 'oauth_signature'] encoded_str = urlencode(sorted(items), True) # Encode signature parameters per Oauth Core 1.0 protocol # spec draft 7, section 3.6 # (http://tools.ietf.org/html/draft-hammer-oauth-07#section-3.6) # Spaces must be encoded with "%20" instead of "+" return encoded_str.replace('+', '%20')
def follow_author(self, authorid=None, follow=True): global consumer, client, token, user_id if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \ lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']: logger.warn("Goodreads follow author error: Please authorise first") return False, 'Unauthorised' consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']), secret=str(lazylibrarian.CONFIG['GR_SECRET'])) token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET']) client = oauth.Client(consumer, token) user_id = self.getUserId() # follow https://www.goodreads.com/author_followings?id=AUTHOR_ID&format=xml # unfollow https://www.goodreads.com/author_followings/AUTHOR_FOLLOWING_ID?format=xml gr_api_sleep() if follow: body = urlencode({'id': authorid, 'format': 'xml'}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} try: response, content = client.request('%s/author_followings' % 'https://www.goodreads.com', 'POST', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return False, "Error in client.request: see error log" else: body = urlencode({'format': 'xml'}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} try: response, content = client.request('%s/author_followings/%s' % ('https://www.goodreads.com', authorid), 'DELETE', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return False, "Error in client.request: see error log" if follow and response['status'] == '422': return True, 'Already following' if response['status'].startswith('2'): if follow: return True, content.split('<id>')[1].split('</id>')[0] return True, '' return False, 'Failure status: %s' % response['status']
def BookToList(book_id, shelf_name, action='add'): global client if action == 'remove': body = urlencode({'name': shelf_name, 'book_id': book_id, 'a': 'remove'}) else: body = urlencode({'name': shelf_name, 'book_id': book_id}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} gr_api_sleep() try: response, content = client.request('%s/shelf/add_to_shelf.xml' % 'https://www.goodreads.com', 'POST', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return False, "Error in client.request: see error log" if not response['status'].startswith('2'): msg = 'Failure status: %s' % response['status'] return False, msg return True, content
def _get_sid(self, base_url, username, password): # login so we can capture SID cookie login_data = makeBytestr(urlencode({'username': username, 'password': password})) try: _ = self.opener.open(base_url + '/login', login_data) except Exception as err: logger.error('Error getting SID. qBittorrent %s: %s' % (type(err).__name__, str(err))) logger.warn('Unable to log in to %s/login' % base_url) return for cookie in self.cookiejar: logger.debug('login cookie: ' + cookie.name + ', value: ' + cookie.value) return
def _sendProwl(prowl_api=None, prowl_priority=None, event=None, message=None, force=False): title = "LazyLibrarian" # suppress notifications if the notifier is disabled but the notify options are checked if not lazylibrarian.CONFIG['USE_PROWL'] and not force: return False if prowl_api is None: prowl_api = lazylibrarian.CONFIG['PROWL_APIKEY'] if prowl_priority is None: prowl_priority = lazylibrarian.CONFIG['PROWL_PRIORITY'] if PY2: message = message.encode(lazylibrarian.SYS_ENCODING) logger.debug(u"Prowl: title: " + title) logger.debug(u"Prowl: event: " + event) logger.debug(u"Prowl: message: " + message) data = {'event': event, 'description': message, 'application': title, 'apikey': prowl_api, 'priority': prowl_priority } try: http_handler = HTTPSConnection("api.prowlapp.com") http_handler.request("POST", "/publicapi/add", headers={'Content-type': "application/x-www-form-urlencoded"}, body=urlencode(data)) response = http_handler.getresponse() request_status = response.status if request_status == 200: logger.info('Prowl notifications sent.') return True elif request_status == 401: logger.info('Prowl auth failed: %s' % response.reason) return False else: logger.info('Prowl notification failed.') return False except Exception as e: logger.warn('Error sending to Prowl: %s' % e) return False
def _command(self, command, args=None, content_type=None, files=None): logger.debug('QBittorrent WebAPI Command: %s' % command) url = self.base_url + '/' + command data = None headers = dict() if files or content_type == 'multipart/form-data': data, headers = encode_multipart( args, files, '-------------------------acebdf13572468') else: if args: data = makeBytestr(urlencode(args)) if content_type: headers['Content-Type'] = content_type request = Request(url, data, headers) if lazylibrarian.CONFIG['PROXY_HOST']: for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']): request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item) request.add_header('User-Agent', USER_AGENT) try: response = self.opener.open(request) try: contentType = response.headers['content-type'] except KeyError: contentType = '' resp = response.read() # some commands return json if contentType == 'application/json': if resp: return json.loads(resp) return '' else: # some commands return plain text resp = makeUnicode(resp) logger.debug("QBitTorrent returned %s" % resp) if command == 'version/api': return resp # some just return Ok. or Fails. if resp and resp != 'Ok.': return False # some commands return nothing but response code (always 200) return True except URLError as err: logger.debug('Failed URL: %s' % url) logger.debug('QBitTorrent webUI raised the following error: %s' % err.reason) return False
def _getJSON(URL, params): # Get JSON response from URL # Return json,True or error_msg,False URL += "/?%s" % urlencode(params) result, success = fetchURL(URL, retry=False) if success: try: result_json = json.loads(result) return result_json, True except (ValueError, AttributeError): return "Could not convert response to json", False return "getJSON returned %s" % result, False
def _command(self, command, args=None, content_type=None, files=None): logger.debug('QBittorrent WebAPI Command: %s' % command) url = self.base_url + '/' + command data = None headers = dict() if files or content_type == 'multipart/form-data': data, headers = encode_multipart(args, files, '-------------------------acebdf13572468') else: if args: data = makeBytestr(urlencode(args)) if content_type: headers['Content-Type'] = content_type request = Request(url, data, headers) if lazylibrarian.CONFIG['PROXY_HOST']: for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']): request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item) request.add_header('User-Agent', getUserAgent()) try: response = self.opener.open(request) try: contentType = response.headers['content-type'] except KeyError: contentType = '' resp = response.read() # some commands return json if contentType == 'application/json': if resp: return json.loads(resp) return '' else: # some commands return plain text resp = makeUnicode(resp) logger.debug("QBitTorrent returned %s" % resp) if command == 'version/api': return resp # some just return Ok. or Fails. if resp and resp != 'Ok.': return False # some commands return nothing but response code (always 200) return True except URLError as err: logger.debug('Failed URL: %s' % url) logger.debug('QBitTorrent webUI raised the following error: %s' % err.reason) return False
def to_string(self): """Returns this token as a plain string, suitable for storage. The resulting string includes the token's secret, so you should never send or store this string where a third party can read it. """ data = { 'oauth_token': self.key, 'oauth_token_secret': self.secret, } if self.callback_confirmed is not None: data['oauth_callback_confirmed'] = self.callback_confirmed return urlencode(data)
def getShelfBooks(page, shelf_name): global client, user_id data = '${base}/review/list?format=xml&v=2&id=${user_id}&sort=author&order=a' data += '&key=${key}&page=${page}&per_page=100&shelf=${shelf_name}' owned_template = Template(data) body = urlencode({}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} request_url = owned_template.substitute(base='https://www.goodreads.com', user_id=user_id, page=page, key=lazylibrarian.CONFIG['GR_API'], shelf_name=shelf_name) gr_api_sleep() try: response, content = client.request(request_url, 'GET', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc())) return "Error in client.request: see error log" if not response['status'].startswith('2'): logger.error('Failure status: %s for page %s' % (response['status'], page)) return content
def _action(self, params, body=None, content_type=None): url = self.base_url + '/gui/' + '?token=' + self.token + '&' + urlencode(params) request = Request(url) if lazylibrarian.CONFIG['PROXY_HOST']: for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']): request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item) request.add_header('User-Agent', USER_AGENT) if body: if PY2: request.add_data(body) else: request.data(body) request.add_header('Content-length', len(body)) if content_type: request.add_header('Content-type', content_type) try: response = self.opener.open(request) return response.code, json.loads(response.read()) except HTTPError as err: logger.debug('URL: %s' % url) logger.debug('uTorrent webUI raised the following error: ' + str(err))
def EXTRA(book=None, test=False): errmsg = '' provider = "Extratorrent" host = lazylibrarian.CONFIG['EXTRA_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/rss") params = { "type": "search", "s_cat": "2", "search": book['searchterm'] } searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = int(item['seeders'].replace(',', '')) except ValueError: seeders = 0 try: size = int(item['size']) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['href'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['EXTRA_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def WWT(book=None, test=False): errmsg = '' provider = "WorldWideTorrents" host = lazylibrarian.CONFIG['WWT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/torrents-search.php") sterm = makeUnicode(book['searchterm']) cat = 0 # 0=all, 36=ebooks, 52=mags, 56=audiobooks if 'library' in book: if book['library'] == 'AudioBook': cat = 56 elif book['library'] == 'eBook': cat = 36 elif book['library'] == 'magazine': cat = 52 page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = { "search": book['searchterm'], "page": page, "cat": cat } searchURL = providerurl + "/?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # might return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True elif '503' in result: logger.warn("Cloudflare bot detection? %s: %s" % (provider, result)) logger.warn("Try unblocking %s from a browser" % providerurl) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') rows = [] try: tables = soup.find_all('table') # un-named table table = tables[2] if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str(td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = url_fix(host + '/download.php') + \ str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() size = size_in_bytes(size) except ValueError: size = 0 try: seeders = int(td[2].text.replace(',', '')) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['WWT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn('Maximum results page search reached, still more results available') next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def TPB(book=None, test=False): errmsg = '' provider = "TPB" host = lazylibrarian.CONFIG['TPB_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/s/?") cat = 0 # 601=ebooks, 102=audiobooks, 0=all, no mag category if 'library' in book: if book['library'] == 'AudioBook': cat = 102 elif book['library'] == 'eBook': cat = 601 elif book['library'] == 'magazine': cat = 0 sterm = makeUnicode(book['searchterm']) page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = { "q": book['searchterm'], "category": cat, "page": page, "orderby": "99" } searchURL = providerurl + "?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') # tpb uses a named table table = soup.find('table', id='searchResult') if table: rows = table.find_all('tr') else: rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 2: try: new_soup = BeautifulSoup(str(td[1]), 'html5lib') link = new_soup.find("a") magnet = link.get("href") title = link.text size = td[1].text.split(', Size ')[1].split('iB')[0] size = size.replace(' ', '') size = size_in_bytes(size) try: seeders = int(td[2].text.replace(',', '')) except ValueError: seeders = 0 if minimumseeders < seeders: # no point in asking for magnet link if not enough seeders magurl = '%s/%s' % (host, magnet) result, success = fetchURL(magurl) if not success: logger.debug('Error fetching url %s, %s' % (magurl, result)) else: magnet = None new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): magnet = output break if not magnet or not title: logger.debug('Missing magnet or title') else: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': magnet, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TPB_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn('Maximum results page search reached, still more results available') next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def GEN(book=None, prov=None, test=False): errmsg = '' provider = "libgen.io" if not prov: prov = 'GEN' host = lazylibrarian.CONFIG[prov + '_HOST'] if not host.startswith('http'): host = 'http://' + host search = lazylibrarian.CONFIG[prov + '_SEARCH'] if not search or not search.endswith('.php'): search = 'search.php' if 'index.php' not in search and 'search.php' not in search: search = 'search.php' if search[0] == '/': search = search[1:] sterm = makeUnicode(book['searchterm']) page = 1 results = [] next_page = True while next_page: if 'index.php' in search: params = { "s": book['searchterm'], "f_lang": "All", "f_columns": 0, "f_ext": "All" } else: params = { "view": "simple", "open": 0, "phrase": 0, "column": "def", "res": 100, "req": book['searchterm'] } if page > 1: params['page'] = page providerurl = url_fix(host + "/%s" % search) searchURL = providerurl + "?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True elif '111' in result: # looks like libgen has ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) errmsg = result else: logger.debug(searchURL) logger.debug('Error fetching page data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) try: soup = BeautifulSoup(result, 'html5lib') rows = [] try: table = soup.find_all('table', rules='rows')[-1] # the last table with rules=rows if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: # skip table headers rows = rows[1:] for row in rows: author = '' title = '' size = '' extn = '' link = '' td = row.find_all('td') if 'index.php' in search and len(td) > 3: # Foreign fiction try: author = formatAuthorName(td[0].text) title = td[2].text newsoup = BeautifulSoup(str(td[4]), 'html5lib') data = newsoup.find('a') if data: link = data.get('href') extn = td[4].text.split('(')[0].strip() size = td[4].text.split('(')[1].split(')')[0] size = size.upper() except IndexError as e: logger.debug('Error parsing libgen index.php results: %s' % str(e)) elif 'search.php' in search and len(td) > 8: # Non-fiction try: author = formatAuthorName(td[1].text) title = td[2].text size = td[7].text.upper() extn = td[8].text link = '' newsoup = BeautifulSoup(str(td[2]), 'html5lib') for res in newsoup.find_all('a'): output = res.get('href') if 'md5' in output: link = output break except IndexError as e: logger.debug('Error parsing libgen search.php results; %s' % str(e)) size = size_in_bytes(size) if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn if link.startswith('http'): url = redirect_url(host, link) else: if "/index.php?" in link: link = 'md5' + link.split('md5')[1] if "/ads.php?" in link: url = url_fix(host + "/" + link) else: url = url_fix(host + "/ads.php?" + link) bookresult, success = fetchURL(url) if not success: logger.debug('Error fetching link data from %s: %s' % (provider, bookresult)) logger.debug(url) url = None else: url = None try: new_soup = BeautifulSoup(bookresult, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output: if output.startswith('http') and '/get.php' in output: url = output break elif '/get.php' in output: url = '/get.php' + output.split('/get.php')[1] break elif '/download/book' in output: url = '/download/book' + output.split('/download/book')[1] break if url and not url.startswith('http'): url = url_fix(host + url) else: url = redirect_url(host, url) except Exception as e: logger.error('%s parsing bookresult for %s: %s' % (type(e).__name__, link, str(e))) url = None if url: results.append({ 'bookid': book['bookid'], 'tor_prov': provider + '/' + search, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'direct', 'priority': lazylibrarian.CONFIG[prov + '_DLPRIORITY'] }) logger.debug('Found %s, Size %s' % (title, size)) next_page = True except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn('Maximum results page search reached, still more results available') next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def KAT(book=None, test=False): errmsg = '' provider = "KAT" host = lazylibrarian.CONFIG['KAT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/usearch/" + quote(book['searchterm'])) params = { "category": "books", "field": "seeders", "sorder": "desc" } searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) result, success = fetchURL(searchURL) if not success: # seems KAT returns 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success results = [] if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 soup = BeautifulSoup(result, 'html5lib') rows = [] try: table = soup.find_all('table')[1] # un-named table if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # kat can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str(td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = 'http' + str(td[0]).split('href="http')[1].split('.torrent?')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() size = size_in_bytes(size) except ValueError: size = 0 try: seeders = int(td[3].text.replace(',', '')) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['KAT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def TDL(book=None, test=False): errmsg = '' provider = "torrentdownloads" host = lazylibrarian.CONFIG['TDL_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host) params = { "type": "search", "cid": "2", "search": book['searchterm'] } searchURL = providerurl + "/rss.xml?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = item['title'] seeders = int(item['seeders'].replace(',', '')) link = item['link'] size = int(item['size']) url = None try: pubdate = item['published'] except KeyError: pubdate = None if link and minimumseeders < seeders: # no point requesting the magnet link if not enough seeders # TDL gives us a relative link result, success = fetchURL(providerurl+link) if success: new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): url = output break if not url or not title: logger.debug('Missing url or title') else: res = { 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TDL_DLPRIORITY'] } if pubdate: res['tor_date'] = pubdate logger.debug('Found %s. Size: %s' % (title, size)) results.append(res) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def getSeriesAuthors(seriesid): """ Get a list of authors contributing to a series and import those authors (and their books) into the database Return how many authors you added """ myDB = database.DBConnection() result = myDB.match("select count(*) as counter from authors") start = int(result['counter']) result = myDB.match('select SeriesName from series where SeriesID=?', (seriesid,)) seriesname = result['SeriesName'] members = getSeriesMembers(seriesid) dic = {u'\u2018': "", u'\u2019': "", u'\u201c': '', u'\u201d': '', "'": "", '"': ''} if members: myDB = database.DBConnection() for member in members: # order = member[0] bookname = member[1] authorname = member[2] # workid = member[3] authorid = member[4] bookname = replace_all(bookname, dic) if not authorid: # goodreads gives us all the info we need, librarything/google doesn't base_url = 'https://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} searchname = bookname + ' ' + authorname searchname = cleanName(unaccented(searchname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) try: rootxml, in_cache = gr_xml_request(set_url) if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: try: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) except (KeyError, AttributeError): booktitle = "" book_fuzz = fuzz.token_set_ratio(booktitle, bookname) if book_fuzz >= 98: try: author = item.find('./best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find('./best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug("Author Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: # try again with title only searchname = cleanName(unaccented(bookname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) rootxml, in_cache = gr_xml_request(set_url) if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) book_fuzz = fuzz.token_set_ratio(booktitle, bookname) if book_fuzz >= 98: try: author = item.find('./best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find('./best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug("Title Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname)) except Exception as e: logger.error("Error finding goodreads results: %s %s" % (type(e).__name__, str(e))) if authorid: lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid) result = myDB.match("select count(*) as counter from authors") finish = int(result['counter']) newauth = finish - start logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname)) return newauth
def getSeriesAuthors(seriesid): """ Get a list of authors contributing to a series and import those authors (and their books) into the database Return how many authors you added """ myDB = database.DBConnection() result = myDB.match("select count(*) as counter from authors") start = int(result['counter']) result = myDB.match('select SeriesName from series where SeriesID=?', (seriesid, )) seriesname = result['SeriesName'] members, api_hits = getSeriesMembers(seriesid, seriesname) dic = { u'\u2018': "", u'\u2019': "", u'\u201c': '', u'\u201d': '', "'": "", '"': '' } if members: myDB = database.DBConnection() for member in members: # order = member[0] bookname = member[1] authorname = member[2] # workid = member[3] authorid = member[4] # pubyear = member[5] bookname = replace_all(bookname, dic) if not authorid: # goodreads gives us all the info we need, librarything/google doesn't base_url = 'https://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} searchname = bookname + ' ' + authorname searchname = cleanName(unaccented(searchname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) try: rootxml, in_cache = gr_xml_request(set_url) if not in_cache: api_hits += 1 if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: try: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) except (KeyError, AttributeError): booktitle = "" book_fuzz = fuzz.token_set_ratio( booktitle, bookname) if book_fuzz >= 98: try: author = item.find( './best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find( './best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug( "Author Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: # try again with title only searchname = cleanName(unaccented(bookname)) if PY2: searchname = searchname.encode( lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode( params) rootxml, in_cache = gr_xml_request(set_url) if not in_cache: api_hits += 1 if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) book_fuzz = fuzz.token_set_ratio( booktitle, bookname) if book_fuzz >= 98: try: author = item.find( './best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find( './best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug( "Title Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname)) except Exception as e: logger.error("Error finding goodreads results: %s %s" % (type(e).__name__, str(e))) if authorid: lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid) result = myDB.match("select count(*) as counter from authors") finish = int(result['counter']) newauth = finish - start logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname)) return newauth
def KAT(book=None, test=False): errmsg = '' provider = "KAT" host = lazylibrarian.CONFIG['KAT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/usearch/" + quote(book['searchterm'])) params = {"category": "books", "field": "seeders", "sorder": "desc"} searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) result, success = fetchURL(searchURL) if not success: # seems KAT returns 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success results = [] if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 soup = BeautifulSoup(result, 'html5lib') rows = [] try: table = soup.find_all('table')[1] # un-named table if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # kat can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str( td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = 'http' + str(td[0]).split('href="http')[1].split( '.torrent?')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 elif 'G' in size: size = size.split('G')[0] mult = 1024 * 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(td[3].text) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['KAT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def SABnzbd(title=None, nzburl=None, remove_data=False): if nzburl in ['delete', 'delhistory'] and title == 'unknown': logger.debug('%s function unavailable in this version of sabnzbd, no nzo_ids' % nzburl) return False hostname = lazylibrarian.CONFIG['SAB_HOST'] port = check_int(lazylibrarian.CONFIG['SAB_PORT'], 0) if not hostname or not port: logger.error('Invalid sabnzbd host or port, check your config') return False if hostname.endswith('/'): hostname = hostname[:-1] if not hostname.startswith("http://") and not hostname.startswith("https://"): hostname = 'http://' + hostname HOST = "%s:%s" % (hostname, port) if lazylibrarian.CONFIG['SAB_SUBDIR']: HOST = HOST + "/" + lazylibrarian.CONFIG['SAB_SUBDIR'] params = {} if nzburl == 'auth' or nzburl == 'get_cats': # connection test, check auth mode or get_cats params['mode'] = nzburl params['output'] = 'json' if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] title = 'LL.(%s)' % nzburl elif nzburl == 'queue': params['mode'] = 'queue' params['output'] = 'json' if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] title = 'LL.(Queue)' elif nzburl == 'history': params['mode'] = 'history' params['output'] = 'json' if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] title = 'LL.(History)' elif nzburl == 'delete': # only deletes tasks if still in the queue, ie NOT completed tasks params['mode'] = 'queue' params['output'] = 'json' params['name'] = nzburl params['value'] = title if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] if remove_data: params['del_files'] = 1 title = 'LL.(Delete) ' + title elif nzburl == 'delhistory': params['mode'] = 'history' params['output'] = 'json' params['name'] = 'delete' params['value'] = title if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] if remove_data: params['del_files'] = 1 title = 'LL.(DelHistory) ' + title else: params['mode'] = 'addurl' params['output'] = 'json' if nzburl: params['name'] = nzburl if title: params['nzbname'] = title if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] if lazylibrarian.CONFIG['SAB_CAT']: params['cat'] = lazylibrarian.CONFIG['SAB_CAT'] if lazylibrarian.CONFIG['USENET_RETENTION']: params["maxage"] = lazylibrarian.CONFIG['USENET_RETENTION'] # FUTURE-CODE # if lazylibrarian.SAB_PRIO: # params["priority"] = lazylibrarian.SAB_PRIO # if lazylibrarian.SAB_PP: # params["script"] = lazylibrarian.SAB_SCRIPT URL = HOST + "/api?" + urlencode(params) # to debug because of api if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Request url for <a href="%s">SABnzbd</a>' % URL) proxies = proxyList() try: timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) r = requests.get(URL, timeout=timeout, proxies=proxies) result = r.json() except requests.exceptions.Timeout: logger.error("Timeout connecting to SAB with URL: %s" % URL) return False except Exception as e: if hasattr(e, 'reason'): errmsg = e.reason elif hasattr(e, 'strerror'): errmsg = e.strerror else: errmsg = str(e) logger.error("Unable to connect to SAB with URL: %s, %s" % (URL, errmsg)) return False if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug("Result text from SAB: " + str(result)) if title: title = unaccented_str(title) if title.startswith('LL.('): return result if result['status'] is True: logger.info("%s sent to SAB successfully." % title) # sab versions earlier than 0.8.0 don't return nzo_ids if 'nzo_ids' in result: if result['nzo_ids']: # check its not empty return result['nzo_ids'][0] return 'unknown' elif result['status'] is False: logger.error("SAB returned Error: %s" % result['error']) return False else: logger.error("Unknown error: " + str(result)) return False
def setWorkID(books=None): """ Set the goodreads workid for any books that don't already have one books is a comma separated list of bookids or if empty, select from database Paginate requests to reduce api hits """ myDB = database.DBConnection() pages = [] if books: page = books pages.append(page) else: cmd = "select BookID,BookName from books where WorkID='' or WorkID is null" books = myDB.select(cmd) if books: counter = 0 logger.debug('Setting WorkID for %s book%s' % (len(books), plural(len(books)))) page = '' for book in books: bookid = book['BookID'] if not bookid: logger.debug("No bookid for %s" % book['BookName']) else: if page: page = page + ',' page = page + bookid counter += 1 if counter == 50: counter = 0 pages.append(page) page = '' if page: pages.append(page) counter = 0 params = {"key": lazylibrarian.CONFIG['GR_API']} for page in pages: URL = 'https://www.goodreads.com/book/id_to_work_id/' + page + '?' + urlencode( params) try: rootxml, in_cache = gr_xml_request(URL, useCache=False) if rootxml is None: logger.debug("Error requesting id_to_work_id page") else: resultxml = rootxml.find('work-ids') if len(resultxml): ids = resultxml.getiterator('item') books = getList(page) cnt = 0 for item in ids: workid = item.text if not workid: logger.debug("No workid returned for %s" % books[cnt]) else: counter += 1 controlValueDict = {"BookID": books[cnt]} newValueDict = {"WorkID": workid} myDB.upsert("books", newValueDict, controlValueDict) cnt += 1 except Exception as e: logger.error("%s parsing id_to_work_id page: %s" % (type(e).__name__, str(e))) msg = 'Updated %s id%s' % (counter, plural(counter)) logger.debug("setWorkID complete: " + msg) return msg
def SABnzbd(title=None, nzburl=None, remove_data=False): if nzburl in ['delete', 'delhistory'] and title == 'unknown': logger.debug( '%s function unavailable in this version of sabnzbd, no nzo_ids' % nzburl) return False hostname = lazylibrarian.CONFIG['SAB_HOST'] port = check_int(lazylibrarian.CONFIG['SAB_PORT'], 0) if not hostname or not port: logger.error('Invalid sabnzbd host or port, check your config') return False if hostname.endswith('/'): hostname = hostname[:-1] if not hostname.startswith("http://") and not hostname.startswith( "https://"): hostname = 'http://' + hostname HOST = "%s:%s" % (hostname, port) if lazylibrarian.CONFIG['SAB_SUBDIR']: HOST = HOST + "/" + lazylibrarian.CONFIG['SAB_SUBDIR'] params = {} if nzburl == 'auth' or nzburl == 'get_cats': # connection test, check auth mode or get_cats params['mode'] = nzburl params['output'] = 'json' if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] title = 'LL.(%s)' % nzburl elif nzburl == 'queue': params['mode'] = 'queue' params['output'] = 'json' if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] title = 'LL.(Queue)' elif nzburl == 'history': params['mode'] = 'history' params['output'] = 'json' if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] title = 'LL.(History)' elif nzburl == 'delete': # only deletes tasks if still in the queue, ie NOT completed tasks params['mode'] = 'queue' params['output'] = 'json' params['name'] = nzburl params['value'] = title if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] if remove_data: params['del_files'] = 1 title = 'LL.(Delete) ' + title elif nzburl == 'delhistory': params['mode'] = 'history' params['output'] = 'json' params['name'] = 'delete' params['value'] = title if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] if remove_data: params['del_files'] = 1 title = 'LL.(DelHistory) ' + title else: params['mode'] = 'addurl' params['output'] = 'json' if nzburl: params['name'] = nzburl if title: params['nzbname'] = title if lazylibrarian.CONFIG['SAB_USER']: params['ma_username'] = lazylibrarian.CONFIG['SAB_USER'] if lazylibrarian.CONFIG['SAB_PASS']: params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS'] if lazylibrarian.CONFIG['SAB_API']: params['apikey'] = lazylibrarian.CONFIG['SAB_API'] if lazylibrarian.CONFIG['SAB_CAT']: params['cat'] = lazylibrarian.CONFIG['SAB_CAT'] if lazylibrarian.CONFIG['USENET_RETENTION']: params["maxage"] = lazylibrarian.CONFIG['USENET_RETENTION'] # FUTURE-CODE # if lazylibrarian.SAB_PRIO: # params["priority"] = lazylibrarian.SAB_PRIO # if lazylibrarian.SAB_PP: # params["script"] = lazylibrarian.SAB_SCRIPT URL = HOST + "/api?" + urlencode(params) # to debug because of api if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug('Request url for <a href="%s">SABnzbd</a>' % URL) proxies = proxyList() try: timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) r = requests.get(URL, timeout=timeout, proxies=proxies) result = r.json() except requests.exceptions.Timeout: logger.error("Timeout connecting to SAB with URL: %s" % URL) return False except Exception as e: if hasattr(e, 'reason'): errmsg = e.reason elif hasattr(e, 'strerror'): errmsg = e.strerror else: errmsg = str(e) logger.error("Unable to connect to SAB with URL: %s, %s" % (URL, errmsg)) return False if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms: logger.debug("Result text from SAB: " + str(result)) if title: title = unaccented_str(title) if title.startswith('LL.('): return result if result['status'] is True: logger.info("%s sent to SAB successfully." % title) # sab versions earlier than 0.8.0 don't return nzo_ids if 'nzo_ids' in result: if result['nzo_ids']: # check its not empty return result['nzo_ids'][0] return 'unknown' elif result['status'] is False: logger.error("SAB returned Error: %s" % result['error']) return False else: logger.error("Unknown error: " + str(result)) return False
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", entrystatus='Active', refresh=False): # noinspection PyBroadException try: logger.debug('[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname)) api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 startindex = 0 resultcount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request(URL, useCache=not refresh) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break if number_results == 0: logger.warn('Found no results for %s' % authorname) break else: logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname)) startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) # skip if no author, no author is no book. if not book['author']: logger.debug('Skipped a result without authorfield.') continue isbnhead = "" if len(book['isbn']) == 10: isbnhead = book['isbn'][0:3] elif len(book['isbn']) == 13: isbnhead = book['isbn'][3:6] booklang = book['lang'] # do we care about language? if "All" not in valid_langs: if book['isbn']: # seems google lies to us, sometimes tells us books are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = False lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,)) if lang: booklang = lang['lang'] cache_hits += 1 logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead)) match = True if not match: # no match in cache, try lookup dict if isbnhead: if len(book['isbn']) == 13 and book['isbn'].startswith('979'): for lang in lazylibrarian.isbn_979_dict: if isbnhead.startswith(lang): booklang = lazylibrarian.isbn_979_dict[lang] logger.debug("ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break elif (len(book['isbn']) == 10) or \ (len(book['isbn']) == 13 and book['isbn'].startswith('978')): for lang in lazylibrarian.isbn_978_dict: if isbnhead.startswith(lang): booklang = lazylibrarian.isbn_978_dict[lang] logger.debug("ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break if match: myDB.action('insert into languages values (?, ?)', (isbnhead, booklang)) if not match: booklang = thingLang(book['isbn']) lt_lang_hits += 1 if booklang: match = True myDB.action('insert into languages values (?, ?)', (isbnhead, booklang)) if match: # We found a better language match if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]: # these are all english, may need to expand this list logger.debug("%s Google thinks [%s], we think [%s]" % (book['name'], googlelang, booklang)) gb_lang_change += 1 else: # No match anywhere, accept google language booklang = googlelang # skip if language is in ignore list if booklang not in valid_langs: logger.debug('Skipped [%s] with language %s' % (book['name'], booklang)) ignored += 1 continue rejected = 0 check_status = False book_status = bookstatus # new_book status, or new_author status audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] added = today() locked = False existing_book = None bookname = book['name'] bookid = item['id'] if not bookname: logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname)) removedResults += 1 rejected = 1 else: bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip() # GoodReads sometimes has multiple bookids for the same book (same author/title, different # editions) and sometimes uses the same bookid if the book is the same but the title is # slightly different. Not sure if googlebooks does too, but we only want one... cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?' existing_book = myDB.match(cmd, (bookid,)) if existing_book: book_status = existing_book['Status'] audio_status = existing_book['AudioStatus'] locked = existing_book['Manual'] added = existing_book['BookAdded'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: if rejected in [3, 4, 5]: book_status = 'Ignored' audio_status = 'Ignored' else: book_status = bookstatus # new_book status, or new_author status audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] added = today() locked = False if not rejected and re.match('[^\w-]', bookname): # remove books with bad characters in title logger.debug("[%s] removed book for bad characters" % bookname) removedResults += 1 rejected = 2 if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd if book['date'] > today()[:len(book['date'])]: logger.debug('Rejecting %s, future publication date %s' % (bookname, book['date'])) removedResults += 1 rejected = 3 if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date']: logger.debug('Rejecting %s, no publication date' % bookname) removedResults += 1 rejected = 4 if not rejected and lazylibrarian.CONFIG['NO_ISBN']: if not isbnhead: logger.debug('Rejecting %s, no isbn' % bookname) removedResults += 1 rejected = 5 if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE' match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""'))) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug('Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorname, bookname, bookid)) rejected = 6 duplicates += 1 if not rejected: cmd = 'SELECT AuthorName,BookName FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?' match = myDB.match(cmd, (bookid,)) if match: # we have a book with this bookid already if bookname != match['BookName'] or authorname != match['AuthorName']: logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorname, bookname, match['AuthorName'], match['BookName'])) else: logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorname, bookname)) check_status = True duplicates += 1 rejected = 7 if check_status or not rejected or ( lazylibrarian.CONFIG['IMP_IGNORE'] and rejected in [3, 4, 5]): # dates, isbn if not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": booklang, "Status": book_status, "AudioStatus": audio_status, "BookAdded": added } resultcount += 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug("Book found: " + bookname + " " + book['date']) updated = False if 'nocover' in book['img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) updated = True elif book['img'] and book['img'].startswith('http'): link, success, _ = cache_img("book", bookid, book['img'], refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) updated = True else: logger.debug('Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug('Updated series: %s [%s]' % (bookid, serieslist)) updated = True setSeries(serieslist, bookid) new_status = setStatus(bookid, serieslist, bookstatus) if not new_status == book_status: book_status = new_status updated = True worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug("[%s] Added book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) added_count += 1 elif updated: logger.debug("[%s] Updated book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) updated_count += 1 else: book_ignore_count += 1 except KeyError: pass deleteEmptySeries() logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' % (authorname, api_hits, plural(api_hits))) cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?' cmd += ' AND Status != "Ignored" order by BookDate DESC' lastbook = myDB.match(cmd, (authorid,)) if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": entrystatus, "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored))) logger.debug("Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount))) myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info("[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
def find_results(self, searchterm=None, queue=None): """ GoogleBooks performs much better if we search for author OR title not both at once, so if searchterm is not isbn, two searches needed. Lazylibrarian searches use <ll> to separate title from author in searchterm If this token isn't present, it's an isbn or searchterm as supplied by user """ try: myDB = database.DBConnection() resultlist = [] # See if we should check ISBN field, otherwise ignore it api_strings = ['inauthor:', 'intitle:'] if is_valid_isbn(searchterm): api_strings = ['isbn:'] api_hits = 0 ignored = 0 total_count = 0 no_author_count = 0 title = '' authorname = '' if ' <ll> ' in searchterm: # special token separates title from author title, authorname = searchterm.split(' <ll> ') fullterm = searchterm.replace(' <ll> ', ' ') logger.debug('Now searching Google Books API with searchterm: %s' % fullterm) for api_value in api_strings: set_url = self.url if api_value == "isbn:": set_url = set_url + quote(api_value + searchterm) elif api_value == 'intitle:': searchterm = fullterm if title: # just search for title # noinspection PyUnresolvedReferences title = title.split(' (')[0] # without any series info searchterm = title searchterm = searchterm.replace("'", "").replace('"', '').strip() # and no quotes if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) set_url = set_url + quote(api_value + '"' + searchterm + '"') elif api_value == 'inauthor:': searchterm = fullterm if authorname: searchterm = authorname # just search for author searchterm = searchterm.strip() if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) set_url = set_url + quote_plus(api_value + '"' + searchterm + '"') startindex = 0 resultcount = 0 ignored = 0 number_results = 1 total_count = 0 no_author_count = 0 try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request(URL) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] logger.debug('Searching url: ' + URL) if number_results == 0: logger.warn('Found no results for %s with value: %s' % (api_value, searchterm)) break else: pass except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) if not book['author']: logger.debug('Skipped a result without authorfield.') no_author_count += 1 continue if not book['name']: logger.debug('Skipped a result without title.') continue valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if "All" not in valid_langs: # don't care about languages, accept all try: # skip if language is not in valid list - booklang = book['lang'] if booklang not in valid_langs: logger.debug( 'Skipped %s with language %s' % (book['name'], booklang)) ignored += 1 continue except KeyError: ignored += 1 logger.debug('Skipped %s where no language is found' % book['name']) continue if authorname: author_fuzz = fuzz.ratio(book['author'], authorname) else: author_fuzz = fuzz.ratio(book['author'], fullterm) if title: book_fuzz = fuzz.token_set_ratio(book['name'], title) # lose a point for each extra word in the fuzzy matches so we get the closest match words = len(getList(book['name'])) words -= len(getList(title)) book_fuzz -= abs(words) else: book_fuzz = fuzz.token_set_ratio(book['name'], fullterm) isbn_fuzz = 0 if is_valid_isbn(fullterm): isbn_fuzz = 100 highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace AuthorID = '' if book['author']: match = myDB.match( 'SELECT AuthorID FROM authors WHERE AuthorName=?', ( book['author'].replace('"', '""'),)) if match: AuthorID = match['AuthorID'] resultlist.append({ 'authorname': book['author'], 'authorid': AuthorID, 'bookid': item['id'], 'bookname': bookname, 'booksub': book['sub'], 'bookisbn': book['isbn'], 'bookpub': book['pub'], 'bookdate': book['date'], 'booklang': book['lang'], 'booklink': book['link'], 'bookrate': float(book['rate']), 'bookrate_count': book['rate_count'], 'bookimg': book['img'], 'bookpages': book['pages'], 'bookgenre': book['genre'], 'bookdesc': book['desc'], 'author_fuzz': author_fuzz, 'book_fuzz': book_fuzz, 'isbn_fuzz': isbn_fuzz, 'highest_fuzz': highest_fuzz, 'num_reviews': book['ratings'] }) resultcount += 1 except KeyError: break logger.debug("Returning %s result%s for (%s) with keyword: %s" % (resultcount, plural(resultcount), api_value, searchterm)) logger.debug("Found %s result%s" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored))) logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count))) logger.debug('The Google Books API was hit %s time%s for searchterm: %s' % (api_hits, plural(api_hits), fullterm)) queue.put(resultlist) except Exception: logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())
def ZOO(book=None, test=False): errmsg = '' provider = "zooqle" host = lazylibrarian.CONFIG['ZOO_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/search") params = { "q": book['searchterm'], "category": "books", "fmt": "rss" } searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) seeders = int(item['torrent_seeds'].replace(',', '')) link = item['links'][1]['href'] size = int(item['links'][1]['length']) magnet = item['torrent_magneturi'] url = None mode = 'torrent' if link: url = link mode = 'torrent' if magnet: if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['ZOO_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # looks like zooqle has ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def TPB(book=None, test=False): errmsg = '' provider = "TPB" host = lazylibrarian.CONFIG['TPB_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/s/?") cat = 0 # 601=ebooks, 102=audiobooks, 0=all, no mag category if 'library' in book: if book['library'] == 'AudioBook': cat = 102 elif book['library'] == 'eBook': cat = 601 elif book['library'] == 'magazine': cat = 0 sterm = makeUnicode(book['searchterm']) page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = { "q": book['searchterm'], "category": cat, "page": page, "orderby": "99" } searchURL = providerurl + "?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') # tpb uses a named table table = soup.find('table', id='searchResult') if table: rows = table.find_all('tr') else: rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 2: try: new_soup = BeautifulSoup(str(td[1]), 'html5lib') link = new_soup.find("a") magnet = link.get("href") title = link.text size = td[1].text.split(', Size ')[1].split('iB')[0] size = size.replace(' ', '') mult = 1 try: if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 elif 'G' in size: size = size.split('G')[0] mult = 1024 * 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(td[2].text) except ValueError: seeders = 0 if minimumseeders < int(seeders): # no point in asking for magnet link if not enough seeders magurl = '%s/%s' % (host, magnet) result, success = fetchURL(magurl) if not success: logger.debug('Error fetching url %s, %s' % (magurl, result)) else: magnet = None new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): magnet = output break if not magnet or not title: logger.debug('Missing magnet or title') else: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': magnet, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TPB_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def LIME(book=None, test=False): errmsg = '' provider = "Limetorrent" host = lazylibrarian.CONFIG['LIME_HOST'] if not host.startswith('http'): host = 'http://' + host params = { "q": book['searchterm'] } providerurl = url_fix(host + "/searchrss/other") searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = item['description'] seeders = int(seeders.split('Seeds:')[1].split(' ,')[0].replace(',', '').strip()) except (IndexError, ValueError): seeders = 0 size = item['size'] try: size = int(size) except ValueError: size = 0 try: pubdate = item['published'] except KeyError: pubdate = None url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['url'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: res = { 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY'] } if pubdate: res['tor_date'] = pubdate results.append(res) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # may have ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def ZOO(book=None, test=False): errmsg = '' provider = "zooqle" host = lazylibrarian.CONFIG['ZOO_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/search") params = {"q": book['searchterm'], "category": "books", "fmt": "rss"} searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) seeders = int(item['torrent_seeds']) link = item['links'][1]['href'] size = int(item['links'][1]['length']) magnet = item['torrent_magneturi'] url = None mode = 'torrent' if link: url = link mode = 'torrent' if magnet: if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' if not url or not title: logger.debug('No url or title found') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['ZOO_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # looks like zooqle has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def getSeriesMembers(seriesID=None, seriesname=None): """ Ask librarything or goodreads for details on all books in a series order, bookname, authorname, workid, authorid (workid and authorid are goodreads only) Return as a list of lists """ results = [] api_hits = 0 if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': params = {"format": "xml", "key": lazylibrarian.CONFIG['GR_API']} URL = 'https://www.goodreads.com/series/%s?%s' % (seriesID, urlencode(params)) try: rootxml, in_cache = gr_xml_request(URL) if not in_cache: api_hits += 1 if rootxml is None: logger.debug("Series %s:%s not recognised at goodreads" % (seriesID, seriesname)) return [], api_hits except Exception as e: logger.error("%s finding series %s: %s" % (type(e).__name__, seriesID, str(e))) return [], api_hits works = rootxml.find('series/series_works') books = works.getiterator('series_work') if books is None: logger.warn('No books found for %s' % seriesID) return [], api_hits for book in books: mydict = {} for mykey, location in [('order', 'user_position'), ('bookname', 'work/best_book/title'), ('authorname', 'work/best_book/author/name'), ('workid', 'work/id'), ('authorid', 'work/best_book/author/id'), ('pubyear', 'work/original_publication_year')]: if book.find(location) is not None: mydict[mykey] = book.find(location).text else: mydict[mykey] = "" results.append([ mydict['order'], mydict['bookname'], mydict['authorname'], mydict['workid'], mydict['authorid'], mydict['pubyear'] ]) else: api_hits = 0 data = getBookWork(None, "SeriesPage", seriesID) if data: try: table = data.split('class="worksinseries"')[1].split( '</table>')[0] rows = table.split('<tr') for row in rows: if 'href=' in row: booklink = row.split('href="')[1] bookname = booklink.split('">')[1].split('<')[0] # booklink = booklink.split('"')[0] try: authorlink = row.split('href="')[2] authorname = authorlink.split('">')[1].split( '<')[0] # authorlink = authorlink.split('"')[0] order = row.split('class="order">')[1].split( '<')[0] results.append( [order, bookname, authorname, '', '']) except IndexError: logger.debug( 'Incomplete data in series table for series %s' % seriesID) except IndexError: if 'class="worksinseries"' in data: # error parsing, or just no series data available? logger.debug('Error in series table for series %s' % seriesID) return results, api_hits
def TDL(book=None, test=False): errmsg = '' provider = "torrentdownloads" host = lazylibrarian.CONFIG['TDL_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host) params = {"type": "search", "cid": "2", "search": book['searchterm']} searchURL = providerurl + "/rss.xml?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = item['title'] seeders = int(item['seeders']) link = item['link'] size = int(item['size']) url = None if link and minimumseeders < int(seeders): # no point requesting the magnet link if not enough seeders # TDL gives us a relative link result, success = fetchURL(providerurl + link) if success: new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): url = output break if not url or not title: logger.debug('Missing url or title') else: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TDL_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def WWT(book=None, test=False): errmsg = '' provider = "WorldWideTorrents" host = lazylibrarian.CONFIG['WWT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/torrents-search.php") sterm = makeUnicode(book['searchterm']) cat = 0 # 0=all, 36=ebooks, 52=mags, 56=audiobooks if 'library' in book: if book['library'] == 'AudioBook': cat = 56 elif book['library'] == 'eBook': cat = 36 elif book['library'] == 'magazine': cat = 52 page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = {"search": book['searchterm'], "page": page, "cat": cat} searchURL = providerurl + "/?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # might return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') try: tables = soup.find_all('table') # un-named table table = tables[2] if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str( td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = url_fix(host + '/download.php') + \ str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 elif 'G' in size: size = size.split('G')[0] mult = 1024 * 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(td[2].text) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['WWT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def encode_postdata(data): # tell urlencode to deal with sequence values and map them correctly # to resulting querystring. for example self["k"] = ["v1", "v2"] will # result in 'k=v1&k=v2' and not k=%5B%27v1%27%2C+%27v2%27%5D return urlencode(data, True)
def EXTRA(book=None, test=False): errmsg = '' provider = "Extratorrent" host = lazylibrarian.CONFIG['EXTRA_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/rss") params = {"type": "search", "s_cat": "2", "search": book['searchterm']} searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = int(item['seeders']) except ValueError: seeders = 0 try: size = int(item['size']) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['href'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['EXTRA_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None, test=False): """ Generic NewzNabplus query function takes in host+key+type and returns the result set regardless of who based on site running NewzNab+ ref http://usenetreviewz.com/nzb-sites/ """ host = provider['HOST'] api_key = provider['API'] logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % ( searchType, host, searchMode, api_key, str(book))) results = [] params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode) if params: if not str(host)[:4] == "http": host = 'http://' + host if host[-1:] == '/': host = host[:-1] URL = host + '/api?' + urlencode(params) sterm = makeUnicode(book['searchterm']) rootxml = None logger.debug("[NewzNabPlus] URL = %s" % URL) result, success = fetchURL(URL, raw=True) if test: try: result = result.decode('utf-8') except UnicodeDecodeError: result = result.decode('latin-1') except AttributeError: pass if result.startswith('<') and result.endswith('/>') and "error code" in result: result = result[1:-2] success = False if not success: logger.debug(result) return success, result if success: try: rootxml = ElementTree.fromstring(result) except Exception as e: logger.error('Error parsing data from %s: %s %s' % (host, type(e).__name__, str(e))) rootxml = None else: try: result = result.decode('utf-8') except UnicodeDecodeError: result = result.decode('latin-1') except AttributeError: pass if not result or result == "''": result = "Got an empty response" logger.error('Error reading data from %s: %s' % (host, result)) # maybe the host doesn't support the search type cancelled = cancelSearchType(searchType, result, provider) if not cancelled: # it was some other problem BlockProvider(provider['HOST'], result) if rootxml is not None: # to debug because of api logger.debug('Parsing results from <a href="%s">%s</a>' % (URL, host)) if rootxml.tag == 'error': errormsg = rootxml.get('description', default='unknown error') logger.error("%s - %s" % (host, errormsg)) # maybe the host doesn't support the search type cancelled = cancelSearchType(searchType, errormsg, provider) if not cancelled: # it was some other problem BlockProvider(provider['HOST'], errormsg) else: resultxml = rootxml.getiterator('item') nzbcount = 0 maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0) for nzb in resultxml: try: thisnzb = ReturnResultsFieldsBySearchType(book, nzb, host, searchMode, provider['DLPRIORITY']) thisnzb['dispname'] = provider['DISPNAME'] if not maxage: nzbcount += 1 results.append(thisnzb) else: # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200 nzbdate = thisnzb['nzbdate'] try: parts = nzbdate.split(' ') nzbdate = ' '.join(parts[:5]) # strip the +0200 dt = datetime.datetime.strptime(nzbdate, "%a, %d %b %Y %H:%M:%S").timetuple() nzbage = age('%04d-%02d-%02d' % (dt.tm_year, dt.tm_mon, dt.tm_mday)) except Exception as e: logger.warn('Unable to get age from [%s] %s %s' % (thisnzb['nzbdate'], type(e).__name__, str(e))) nzbage = 0 if nzbage <= maxage: nzbcount += 1 results.append(thisnzb) else: logger.debug('%s is too old (%s day%s)' % (thisnzb['nzbtitle'], nzbage, plural(nzbage))) except IndexError: logger.debug('No results from %s for %s' % (host, sterm)) logger.debug('Found %s nzb at %s for: %s' % (nzbcount, host, sterm)) else: logger.debug('No data returned from %s for %s' % (host, sterm)) return results
def LIME(book=None, test=False): errmsg = '' provider = "Limetorrent" host = lazylibrarian.CONFIG['LIME_HOST'] if not host.startswith('http'): host = 'http://' + host params = {"q": book['searchterm']} providerurl = url_fix(host + "/searchrss/other") searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = item['description'] seeders = int( seeders.split('Seeds:')[1].split(',')[0].strip()) except (IndexError, ValueError): seeders = 0 size = item['size'] try: size = int(size) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['url'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # may have ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None, test=False): """ Generic NewzNabplus query function takes in host+key+type and returns the result set regardless of who based on site running NewzNab+ ref http://usenetreviewz.com/nzb-sites/ """ host = provider['HOST'] api_key = provider['API'] logger.debug( '[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % (searchType, host, searchMode, api_key, str(book))) results = [] params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode) if params: if not str(host)[:4] == "http": host = 'http://' + host if host[-1:] == '/': host = host[:-1] URL = host + '/api?' + urlencode(params) sterm = makeUnicode(book['searchterm']) rootxml = None logger.debug("[NewzNabPlus] URL = %s" % URL) result, success = fetchURL(URL, raw=True) if test: try: result = result.decode('utf-8') except UnicodeDecodeError: result = result.decode('latin-1') if result.startswith('<') and result.endswith( '/>') and "error code" in result: result = result[1:-2] success = False if not success: logger.debug(result) return success, result if success: try: rootxml = ElementTree.fromstring(result) except Exception as e: logger.error('Error parsing data from %s: %s %s' % (host, type(e).__name__, str(e))) rootxml = None else: try: result = result.decode('utf-8') except UnicodeDecodeError: result = result.decode('latin-1') if not result or result == "''": result = "Got an empty response" logger.error('Error reading data from %s: %s' % (host, result)) # maybe the host doesn't support the search type cancelled = cancelSearchType(searchType, result, provider) if not cancelled: # it was some other problem BlockProvider(provider['HOST'], result) if rootxml is not None: # to debug because of api logger.debug('Parsing results from <a href="%s">%s</a>' % (URL, host)) if rootxml.tag == 'error': errormsg = rootxml.get('description', default='unknown error') logger.error("%s - %s" % (host, errormsg)) # maybe the host doesn't support the search type cancelled = cancelSearchType(searchType, errormsg, provider) if not cancelled: # it was some other problem BlockProvider(provider['HOST'], errormsg) else: resultxml = rootxml.getiterator('item') nzbcount = 0 maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0) for nzb in resultxml: try: thisnzb = ReturnResultsFieldsBySearchType( book, nzb, host, searchMode, provider['DLPRIORITY']) thisnzb['dispname'] = provider['DISPNAME'] if not maxage: nzbcount += 1 results.append(thisnzb) else: # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200 nzbdate = thisnzb['nzbdate'] try: parts = nzbdate.split(' ') nzbdate = ' '.join( parts[:5]) # strip the +0200 dt = datetime.datetime.strptime( nzbdate, "%a, %d %b %Y %H:%M:%S").timetuple() nzbage = age( '%04d-%02d-%02d' % (dt.tm_year, dt.tm_mon, dt.tm_mday)) except Exception as e: logger.warn( 'Unable to get age from [%s] %s %s' % (thisnzb['nzbdate'], type(e).__name__, str(e))) nzbage = 0 if nzbage <= maxage: nzbcount += 1 results.append(thisnzb) else: logger.debug('%s is too old (%s day%s)' % (thisnzb['nzbtitle'], nzbage, plural(nzbage))) except IndexError: logger.debug('No results from %s for %s' % (host, sterm)) logger.debug('Found %s nzb at %s for: %s' % (nzbcount, host, sterm)) else: logger.debug('No data returned from %s for %s' % (host, sterm)) return results
def GEN(book=None, prov=None, test=False): errmsg = '' provider = "libgen.io" if prov is None: prov = 'GEN' host = lazylibrarian.CONFIG[prov + '_HOST'] if not host.startswith('http'): host = 'http://' + host search = lazylibrarian.CONFIG[prov + '_SEARCH'] if not search or not search.endswith('.php'): search = 'search.php' if 'index.php' not in search and 'search.php' not in search: search = 'search.php' if search[0] == '/': search = search[1:] sterm = makeUnicode(book['searchterm']) page = 1 results = [] next_page = True while next_page: if 'index.php' in search: params = { "s": book['searchterm'], "f_lang": "All", "f_columns": 0, "f_ext": "All" } else: params = { "view": "simple", "open": 0, "phrase": 0, "column": "def", "res": 100, "req": book['searchterm'] } if page > 1: params['page'] = page providerurl = url_fix(host + "/%s" % search) searchURL = providerurl + "?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True elif '111' in result: # looks like libgen has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) errmsg = result else: logger.debug(searchURL) logger.debug('Error fetching page data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) try: soup = BeautifulSoup(result, 'html5lib') try: table = soup.find_all('table')[2] # un-named table if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if 'search.php' in search and len(rows) > 1: rows = rows[1:] for row in rows: author = '' title = '' size = '' extn = '' link = '' td = row.find_all('td') if 'index.php' in search and len(td) > 3: try: author = formatAuthorName(td[0].text) title = td[2].text newsoup = BeautifulSoup(str(td[4]), 'html5lib') data = newsoup.find('a') link = data.get('href') extn = data.text.split('(')[0] size = data.text.split('(')[1].split(')')[0] size = size.upper() except IndexError as e: logger.debug( 'Error parsing libgen index.php results: %s' % str(e)) elif 'search.php' in search and len(td) > 8: try: author = formatAuthorName(td[1].text) title = td[2].text size = td[7].text.upper() extn = td[8].text newsoup = BeautifulSoup(str(td[2]), 'html5lib') link = newsoup.get('href') except IndexError as e: logger.debug( 'Error parsing libgen search.php results; %s' % str(e)) if not size: size = 0 else: try: mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 elif 'G' in size: size = size.split('G')[0] mult = 1024 * 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn if not link.startswith('http'): if "/ads.php?" in link: url = url_fix(host + link) else: url = url_fix(host + "/ads.php?" + link) else: url = redirect_url(host, link) bookresult, success = fetchURL(url) if not success: # may return 404 if no results, not really an error if '404' in bookresult: logger.debug( "No results found from %s for %s" % (provider, sterm)) else: logger.debug(url) logger.debug( 'Error fetching link data from %s: %s' % (provider, bookresult)) errmsg = bookresult bookresult = False if bookresult: url = None try: new_soup = BeautifulSoup( bookresult, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output: if output.startswith( 'http' ) and '/get.php' in output: url = output break elif '/get.php' in output: url = '/get.php' + output.split( '/get.php')[1] break elif '/download/book' in output: url = '/download/book' + output.split( '/download/book')[1] break if url and not url.startswith('http'): url = url_fix(host + url) else: url = redirect_url(host, url) except Exception as e: logger.error( '%s parsing bookresult for %s: %s' % (type(e).__name__, link, str(e))) url = None if url: results.append({ 'bookid': book['bookid'], 'tor_prov': provider + '/' + search, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'direct', 'priority': lazylibrarian.CONFIG[prov + '_DLPRIORITY'] }) logger.debug('Found %s, Size %s' % (title, size)) next_page = True except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def setWorkID(books=None): """ Set the goodreads workid for any books that don't already have one books is a comma separated list of bookids or if empty, select from database Paginate requests to reduce api hits """ myDB = database.DBConnection() pages = [] if books: page = books pages.append(page) else: cmd = "select BookID,BookName from books where WorkID='' or WorkID is null" books = myDB.select(cmd) if books: counter = 0 logger.debug('Setting WorkID for %s book%s' % (len(books), plural(len(books)))) page = '' for book in books: bookid = book['BookID'] if not bookid: logger.debug("No bookid for %s" % book['BookName']) else: if page: page = page + ',' page = page + bookid counter += 1 if counter == 50: counter = 0 pages.append(page) page = '' if page: pages.append(page) counter = 0 params = {"key": lazylibrarian.CONFIG['GR_API']} for page in pages: URL = 'https://www.goodreads.com/book/id_to_work_id/' + page + '?' + urlencode(params) try: rootxml, in_cache = gr_xml_request(URL, useCache=False) if rootxml is None: logger.debug("Error requesting id_to_work_id page") else: resultxml = rootxml.find('work-ids') if len(resultxml): ids = resultxml.getiterator('item') books = getList(page) cnt = 0 for item in ids: workid = item.text if not workid: logger.debug("No workid returned for %s" % books[cnt]) else: counter += 1 controlValueDict = {"BookID": books[cnt]} newValueDict = {"WorkID": workid} myDB.upsert("books", newValueDict, controlValueDict) cnt += 1 except Exception as e: logger.error("%s parsing id_to_work_id page: %s" % (type(e).__name__, str(e))) msg = 'Updated %s id%s' % (counter, plural(counter)) logger.debug("setWorkID complete: " + msg) return msg