def NewzNabPlus(book=None, host=None, api_key=None, searchType=None): #logger.info('[NewzNabPlus] Searching term [%s] for author [%s] and title [%s] on host [%s] for a [%s] item' % (book['searchterm'], book['authorName'], book['bookName'], host, searchType)) logger.info('[NewzNabPlus] searchType [%s] with Host [%s] using api [%s] for item [%s]'%(searchType, host, api_key,str(book))) results = [] params = ReturnSearchTypeStructure(api_key, book, searchType) if not str(host)[:4] == "http": host = 'http://' + host URL = host + '/api?' + urllib.urlencode(params) try : request = urllib2.Request(URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) opener = urllib2.build_opener(SimpleCache.CacheHandler(".ProviderCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) try: data = ElementTree.parse(resp) except (urllib2.URLError, IOError, EOFError), e: logger.warn('Error fetching data from %s: %s' % (host, e)) data = None except Exception, e: logger.error("Error 403 openning url") data = None
def get_author_books(self, authorid=None, authorname=None, refresh=False): api_hits = 0 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode( self.params) #Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: # Cache our request request = urllib2.Request(URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) opener = urllib2.build_opener( SimpleCache.CacheHandler(".AuthorCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) api_hits = api_hits + 1 sourcexml = ElementTree.parse(resp) except Exception, e: logger.error("Error fetching author info: " + str(e))
def NewzNab(book=None, newznabNumber=None): if (newznabNumber == "1"): HOST = lazylibrarian.NEWZNAB_HOST logger.info('Searching for %s.' % book['searchterm'] + " at: " + lazylibrarian.NEWZNAB_HOST) if (newznabNumber == "2"): HOST = lazylibrarian.NEWZNAB_HOST2 logger.info('Searching for %s.' % book['searchterm'] + " at: " + lazylibrarian.NEWZNAB_HOST2) results = [] if lazylibrarian.EBOOK_TYPE == None: params = { "t": "book", "apikey": lazylibrarian.NEWZNAB_API, #"cat": 7020, "author": book['searchterm'] } else: params = { "t": "search", "apikey": lazylibrarian.NEWZNAB_API, "cat": 7020, "q": book['searchterm'], "extended": 1, } if not str(HOST)[:4] == "http": HOST = 'http://' + HOST URL = HOST + '/api?' + urllib.urlencode(params) try: request = urllib2.Request(URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', common.USER_AGENT) opener = urllib2.build_opener( SimpleCache.CacheHandler(".ProviderCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) try: data = ElementTree.parse(resp) except (urllib2.URLError, IOError, EOFError), e: logger.warn('Error fetching data from %s: %s' % (lazylibrarian.NEWZNAB_HOST, e)) data = None except Exception, e: logger.error("Error 403 openning url") data = None
def get_author_books(self, authorid=None): URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode( self.params) try: # Cache our request request = urllib2.Request(URL) opener = urllib2.build_opener( SimpleCache.CacheHandler(".AuthorCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) sourcexml = ElementTree.parse(resp) except Exception, e: logger.error("Error fetching author info: " + str(e))
def find_author_id(self): URL = 'http://www.goodreads.com/api/author_url/?' + urllib.urlencode( self.name) + '&' + urllib.urlencode(self.params) logger.debug("Searching for author with name: %s" % self.name) # Cache our request request = urllib2.Request(URL) opener = urllib2.build_opener(SimpleCache.CacheHandler(".AuthorCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) try: sourcexml = ElementTree.parse(resp) except Exception, e: logger.error("Error fetching authorid: " + str(e))
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GR-ADD-BOOK" myDB = database.DBConnection() URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params) try: # Cache our request request = urllib2.Request(URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) opener = urllib2.build_opener(SimpleCache.CacheHandler(".AuthorCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) sourcexml = ElementTree.parse(resp) except Exception, e: logger.error("Error fetching book info: " + str(e))
def find_author_id(self): URL = 'http://www.goodreads.com/api/author_url/' + urllib.quote(self.name) + '?' + urllib.urlencode(self.params) logger.debug("Searching for author with name: %s" % self.name) # Cache our request request = urllib2.Request(URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) opener = urllib2.build_opener(SimpleCache.CacheHandler(".AuthorCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) try: sourcexml = ElementTree.parse(resp) except Exception, e: logger.error("Error fetching authorid: " + str(e) + str(URL))
def get_author_info(self, authorid=None, authorname=None, refresh=False): URL = 'http://www.goodreads.com/author/show/' + authorid + '.xml?' + urllib.urlencode( self.params) # Cache our request request = urllib2.Request(URL) opener = urllib2.build_opener(SimpleCache.CacheHandler(".AuthorCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) try: sourcexml = ElementTree.parse(resp) rootxml = sourcexml.getroot() resultxml = rootxml.find('author') author_dict = {} except Exception, e: logger.error("Error fetching author ID: " + str(e))
def find_results(self, authorname=None, queue=None): threading.currentThread().name = "GR-SEARCH" resultlist = [] api_hits = 0 url = urllib.quote_plus(authorname.encode('utf-8')) set_url = 'http://www.goodreads.com/search.xml?q=' + url + '&' + urllib.urlencode(self.params) logger.info('Now searching GoodReads API with keyword: ' + authorname) logger.debug('Searching for %s at: %s' % (authorname, set_url)) try: try: # Cache our request request = urllib2.Request(set_url) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) opener = urllib2.build_opener(SimpleCache.CacheHandler(".AuthorCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) api_hits = api_hits + 1 sourcexml = ElementTree.parse(resp) except Exception, e: logger.error("Error finding results: " + str(e)) rootxml = sourcexml.getroot() resultxml = rootxml.getiterator('work') author_dict = [] resultcount = 0 for author in resultxml: bookdate = "0001-01-01" if (author.find('original_publication_year').text == None): bookdate = "0000" else: bookdate = author.find('original_publication_year').text authorNameResult = author.find('./best_book/author/name').text booksub = "" bookpub = "" booklang = "en" try: bookimg = author.find('./best_book/image_url').text if (bookimg == 'http://www.goodreads.com/assets/nocover/111x148.png'): bookimg = 'images/nocover.png' except KeyError: bookimg = 'images/nocover.png' except AttributeError: bookimg = 'images/nocover.png' try: bookrate = author.find('average_rating').text except KeyError: bookrate = 0 bookpages = '0' bookgenre = '' bookdesc = '' bookisbn = '' booklink = 'http://www.goodreads.com/book/show/'+author.find('./best_book/id').text if (author.find('./best_book/title').text == None): bookTitle = "" else: bookTitle = author.find('./best_book/title').text author_fuzz = fuzz.ratio(authorNameResult.lower(), authorname.lower()) book_fuzz = fuzz.ratio(bookTitle.lower(), authorname.lower()) try: isbn_check = int(authorname[:-1]) if (len(str(isbn_check)) == 9) or (len(str(isbn_check)) == 12): isbn_fuzz = int(100) else: isbn_fuzz = int(0) except: isbn_fuzz = int(0) highest_fuzz = max(author_fuzz, book_fuzz, isbn_fuzz) resultlist.append({ 'authorname': author.find('./best_book/author/name').text, 'bookid': author.find('./best_book/id').text, 'authorid' : author.find('./best_book/author/id').text, 'bookname': bookTitle.encode("ascii", "ignore"), 'booksub': booksub, 'bookisbn': bookisbn, 'bookpub': bookpub, 'bookdate': bookdate, 'booklang': booklang, 'booklink': booklink, 'bookrate': float(bookrate), 'bookimg': bookimg, 'bookpages': bookpages, 'bookgenre': bookgenre, 'bookdesc': bookdesc, 'author_fuzz': author_fuzz, 'book_fuzz': book_fuzz, 'isbn_fuzz': isbn_fuzz, 'highest_fuzz': highest_fuzz, 'num_reviews': float(bookrate) }) resultcount = resultcount+1
try: time.sleep(1) #sleep 1 second to respect goodreads api terms if (book.find('isbn13').text is not None): BOOK_URL = 'http://www.goodreads.com/book/isbn?isbn=' + book.find('isbn13').text + '&' + urllib.urlencode(self.params) logger.debug(u"Book URL: " + str(BOOK_URL)) try: # Cache our request request = urllib2.Request(BOOK_URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) opener = urllib2.build_opener(SimpleCache.CacheHandler(".AuthorCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) except Exception, e: logger.error("Error finding results: ", e) BOOK_sourcexml = ElementTree.parse(resp) BOOK_rootxml = BOOK_sourcexml.getroot() bookLanguage = BOOK_rootxml.find('./book/language_code').text logger.debug(u"language: " + str(bookLanguage)) else: logger.debug("No ISBN provided, skipping") continue except Exception, e:
try: time.sleep( 1) #sleep 1 second to respect goodreads api terms if (book.find('isbn13').text is not None): BOOK_URL = 'http://www.goodreads.com/book/isbn?isbn=' + book.find( 'isbn13').text + '&' + urllib.urlencode( self.params) logger.debug(u"Book URL: " + str(BOOK_URL)) try: # Cache our request request = urllib2.Request(BOOK_URL) opener = urllib2.build_opener( SimpleCache.CacheHandler(".AuthorCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) except Exception, e: logger.error("Error finding results: ", e) BOOK_sourcexml = ElementTree.parse(resp) BOOK_rootxml = BOOK_sourcexml.getroot() bookLanguage = BOOK_rootxml.find( './book/language_code').text logger.debug(u"language: " + str(bookLanguage)) else: logger.debug("No ISBN provided, skipping") continue
def find_results(self, authorname=None): resultlist = [] logger.info(authorname) url = urllib.quote_plus(authorname.encode('utf-8')) set_url = 'http://www.goodreads.com/search.xml?q=' + url + '&' + urllib.urlencode( self.params) logger.info('Searching for author at: %s' % set_url) try: try: # Cache our request request = urllib2.Request(set_url) opener = urllib2.build_opener( SimpleCache.CacheHandler(".AuthorCache"), SimpleCache.ThrottlingProcessor(5)) resp = opener.open(request) sourcexml = ElementTree.parse(resp) except Exception, e: logger.error("Error finding results: " + str(e)) rootxml = sourcexml.getroot() resultxml = rootxml.getiterator('work') author_dict = [] resultcount = 0 for author in resultxml: bookdate = "0001-01-01" if (author.find('original_publication_year').text == None): bookdate = "0000" else: bookdate = author.find('original_publication_year').text authorNameResult = author.find('./best_book/author/name').text booksub = "" bookpub = "" booklang = "en" try: bookimg = author.find('./best_book/image_url').text if (bookimg == 'http://www.goodreads.com/assets/nocover/111x148.png' ): bookimg = 'images/nocover.png' except KeyError: bookimg = 'images/nocover.png' except AttributeError: bookimg = 'images/nocover.png' try: bookrate = author.find('average_rating').text except KeyError: bookrate = 0 bookpages = '0' bookgenre = '' bookdesc = 'Not available' bookisbn = author.find('./best_book/id').text if (author.find('./best_book/title').text == None): bookTitle = "" else: bookTitle = author.find('./best_book/title').text resultlist.append({ 'authorname': author.find('./best_book/author/name').text, 'bookid': author.find('./best_book/id').text, 'authorid': author.find('./best_book/author/id').text, 'bookname': bookTitle.encode("ascii", "ignore"), 'booksub': booksub, 'bookisbn': bookisbn, 'bookpub': bookpub, 'bookdate': bookdate, 'booklang': booklang, 'booklink': '/', 'bookrate': float(bookrate), 'bookimg': bookimg, 'bookpages': bookpages, 'bookgenre': bookgenre, 'bookdesc': bookdesc }) resultcount = resultcount + 1