def do_obtain_new_books(cpus, account, notification=lambda x, y:x): print "do_obtain_new_books in jobs.py" print "Account is: %s" % (account) # This server is an arbitrary_n job, so there is a notifier available. # Set the % complete to a small number to avoid the 'unavailable' indicator notification(0.01, "Starting up...") from calibre.library import db from calibre.utils.config import prefs prefs.refresh() db = db(read_only=False) print "DB is: %s" % (db) prefs = PrefsFacade(db) print "Prefs are: %s" % (prefs) print "Library id is (%s)" % (prefs.get_library_uuid()) reporter = ConsoleReporter() downloader = BeamEbooksDownloader(prefs, caller = reporter) print "-- LALA -- Downloader is: %s" % (downloader) if account[prefs.ENABLED]: downloader.login(account) if downloader.successful_login == False: notification(1.00, "Failed to log in...") else: notification(0.05, "Parsing document tree now...") downloadable_ebooks = downloader.recursive_descent(norms(prefs[prefs.URLBASE])) notification(0.50, "Loaded OPDS pages") reporter.notify(downloadable_ebooks) # # Now, download the obtained ebooks... notification(1.00, "Done...") adder = EBookAdder(prefs, "beam-ebooks") adder.load_books() new_ebooks = [] for entry in downloadable_ebooks: beamebooks_id = entry['id'] book = adder.books_of_this_shop.get(beamebooks_id) if book is None: new_ebooks.append(entry) result = (new_ebooks) return result
def login(self, account): self.beamid = None self.successful_login = False self.already_visited_links = [] self.downloadable_ebooks = [] self.account_id = account[self.prefs.ACCOUNT_ID] self.username = account[self.prefs.USERNAME] self.password = self.prefs.decrypt_password( account[self.prefs.OBFUSCATED_PASSWORD]) # Remove all cookies to be extra safe self.browser.cookiejar.clear() self.filenumber = 1000 if self.caller is not None: self.caller.notify("Logging in") url = self.urlbase + "/aldiko/cookisetzen.php" url = norms(url) print " URL: '%s'" % (url) print "Browser: '%s'" % (self.browser) # print " UA : '%s'" % (self.browser.user_agent) response = self.browser.open(url) self.save_response(response) print "Cookies: '%s'" % (self.browser.cookiejar) if response.code == 200: form = self.browser.select_form(nr=0) print "Form: '%s'" % (form) print " Auth: '%s', '%s'" % (self.username, self.password) self.browser.form['user'] = self.username self.browser.form['pass'] = self.password self.browser.submit() # After from submission self.save_response(response) # print "Response Code: '%s'" % (response.code) # print "Cookies: '%s'" % (self.browser.cookiejar) for cookie in self.browser.cookiejar: # print " C: '%s'" % (cookie) if hasattr(cookie, 'name'): if hasattr(cookie, 'value'): if cookie.name == 'beamid': self.beamid = cookie.value # TODO should we verify that the beamid is numeric??? self.successful_login = True self.caller.notify("Login Successful")
def login(self, account): self.beamid = None self.successful_login = False self.already_visited_links = [] self.downloadable_ebooks = [] self.account_id = account[self.prefs.ACCOUNT_ID] self.username = account[self.prefs.USERNAME] self.password = self.prefs.decrypt_password(account[self.prefs.OBFUSCATED_PASSWORD]) # Remove all cookies to be extra safe self.browser.cookiejar.clear() self.filenumber = 1000 if self.caller is not None: self.caller.notify("Logging in") url = self.urlbase + "/aldiko/cookisetzen.php" url = norms(url) print " URL: '%s'" % (url) print "Browser: '%s'" % (self.browser) # print " UA : '%s'" % (self.browser.user_agent) response = self.browser.open(url) self.save_response(response) print "Cookies: '%s'" % (self.browser.cookiejar) if response.code == 200: form = self.browser.select_form(nr = 0) print "Form: '%s'" % (form) print " Auth: '%s', '%s'" % (self.username, self.password) self.browser.form['user'] = self.username self.browser.form['pass'] = self.password self.browser.submit() # After from submission self.save_response(response) # print "Response Code: '%s'" % (response.code) # print "Cookies: '%s'" % (self.browser.cookiejar) for cookie in self.browser.cookiejar: # print " C: '%s'" % (cookie) if hasattr(cookie, 'name'): if hasattr(cookie, 'value'): if cookie.name == 'beamid': self.beamid = cookie.value # TODO should we verify that the beamid is numeric??? self.successful_login = True self.caller.notify("Login Successful")
def cli_main(self, argv): from calibre.utils.config import prefs as calibre_prefs from optparse import OptionParser from calibre_plugins.beam_ebooks_downloader.prefs import PrefsFacade my_db = db(path=None, read_only=False) # print 'Database is (%s)' % (prefs._get_db()) print 'Database is (%s)' % (my_db) prefs = PrefsFacade(my_db) print 'My Prefs are (%s)' % (prefs) print ' methods are (%s)' % (dir(prefs)) print ' library id is (%s)' % (prefs.get_library_uuid()) print 'Calibre Prefs are (%s)' % (calibre_prefs) print ' methods are (%s)' % (dir(calibre_prefs)) downloader = BeamEbooksDownloader(prefs, self.version, caller=self) # Loop over all accounts until we have support for selection for account_id in prefs[prefs.ACCOUNTS]: account = prefs[prefs.ACCOUNTS][account_id] account[prefs.ACCOUNT_ID] = account_id print "Account: '%s'" % account if account[prefs.ENABLED]: downloader.login(account) if downloader.successful_login == False: print "Failed to log in..." else: print "Parsing document tree now..." # Temporarily... # downloader.recursive_descent(prefs[prefs.URLBASE] + "/aldiko/bibuebersicht.php5?user="******"/aldiko/pakete.php5?user="******"/kunden/abos.php5") downloader.download_ebooks() pass
def recursive_descent(self, absolute_url=None, further_descend=True): if absolute_url is None: url = self.urlbase else: url = absolute_url caller = self.caller url = norms(url) if url in self.already_visited_links: print "Already have been here ('%s')..." % (url) else: harvested_urls = self.prefs[self.prefs.HARVESTED_URLS] harvest_state = harvested_urls.get(url) if harvest_state is None: harvest_state = {} self.prefs[self.prefs.HARVESTED_URLS][url] = harvest_state self.prefs.save() status = harvest_state.get(self.prefs.HARVEST_STATE) if status is None: harvest_state[self.prefs. HARVEST_STATE] = self.prefs.HARVEST_STATE_REVISIT self.prefs.save() title = harvest_state.get(self.prefs.HARVEST_TITLE) if title is None: harvest_state[self.prefs.HARVEST_TITLE] = "" self.prefs.save() if caller is not None: caller.notify("Visiting ('%s', '%s')..." % (url, harvest_state)) self.visit_url(absolute_url, further_descend) # In any case, return a list of ebooks to download return self.downloadable_ebooks
def recursive_descent(self, absolute_url = None, further_descend = True): if absolute_url is None: url = self.urlbase else: url = absolute_url caller = self.caller url = norms(url) if url in self.already_visited_links: print "Already have been here ('%s')..." % (url) else: harvested_urls = self.prefs[self.prefs.HARVESTED_URLS] harvest_state = harvested_urls.get(url) if harvest_state is None: harvest_state = {} self.prefs[self.prefs.HARVESTED_URLS][url] = harvest_state self.prefs.save() status = harvest_state.get(self.prefs.HARVEST_STATE) if status is None: harvest_state[self.prefs.HARVEST_STATE] = self.prefs.HARVEST_STATE_REVISIT self.prefs.save() title = harvest_state.get(self.prefs.HARVEST_TITLE) if title is None: harvest_state[self.prefs.HARVEST_TITLE] = "" self.prefs.save() if caller is not None: caller.notify("Visiting ('%s', '%s')..." % (url, harvest_state)) self.visit_url(absolute_url, further_descend) # In any case, return a list of ebooks to download return self.downloadable_ebooks
def visit_url(self, url=None, further_descend=True): print " URL: '%s'" % (url) self.browser.open(url) response = self.browser.open(url) self.save_response(response) content = response.get_data() links_to_visit = [] if response.code != 200: print "Something horrible happened (RC %s)" % (response.code) pass root = fromstring(content) entrylist = root.xpath("//entry") for entry in entrylist: # print " Entry: '%s'" % (tostring(entry, pretty_print=True).strip()) idtag = entry.xpath('id')[0] if idtag is not None: # print " Id: '%s'\n" % (tostring(idtag, pretty_print=True).strip()) contents = idtag.text_content() # print " Id content: '%s' / '%s'" % (idtag, contents) match = re.match('urn:beam-ebooks:private', contents) if match: href = self.extract_link(entry) if href: href = norms(href) print " Seems to be a followable link ('%s')" % ( href) links_to_visit.append(href) match = re.match('urn:beam-ebooks:alle', contents) if match: href = self.extract_link(entry) if href: href = norms(href) print " Seems to be a followable link ('%s')" % ( href) links_to_visit.append(href) match = re.match('urn:beam-ebooks:titelnr:', contents) if match: (href, mimetype) = self.extract_link(entry) if href: href = norms(href) match = re.search('\/download\.php5\?.*$', href) if match: print " Seems to be an ebook ('%s', '%s')" % ( mimetype, href) data = {} data['urn'] = contents data['href'] = href data['mimetype'] = mimetype foo = re.split(':', contents) data['id'] = foo[3] self.downloadable_ebooks.append(data) else: print " Seems to be a followable link ('%s')" % ( href) links_to_visit.append(href) # Finally, visit all pages that we encountered if further_descend: for link in links_to_visit: link = norms(link) self.recursive_descent(link) # In any case, return the links we had to visit... return links_to_visit
def visit_url(self, url = None, further_descend = True): print " URL: '%s'" % (url) self.browser.open(url) response = self.browser.open(url) self.save_response(response) content = response.get_data() links_to_visit = [] if response.code != 200: print "Something horrible happened (RC %s)" % (response.code) pass root = fromstring(content) entrylist = root.xpath("//entry") for entry in entrylist: # print " Entry: '%s'" % (tostring(entry, pretty_print=True).strip()) idtag = entry.xpath('id')[0] if idtag is not None: # print " Id: '%s'\n" % (tostring(idtag, pretty_print=True).strip()) contents = idtag.text_content() # print " Id content: '%s' / '%s'" % (idtag, contents) match = re.match('urn:beam-ebooks:private', contents) if match: href = self.extract_link(entry) if href: href = norms(href) print " Seems to be a followable link ('%s')" % (href) links_to_visit.append(href) match = re.match('urn:beam-ebooks:alle', contents) if match: href = self.extract_link(entry) if href: href = norms(href) print " Seems to be a followable link ('%s')" % (href) links_to_visit.append(href) match = re.match('urn:beam-ebooks:titelnr:', contents) if match: (href, mimetype) = self.extract_link(entry) if href: href = norms(href) match = re.search('\/download\.php5\?.*$', href) if match: print " Seems to be an ebook ('%s', '%s')" % (mimetype, href) data = {} data['urn'] = contents data['href'] = href data['mimetype'] = mimetype foo = re.split(':', contents) data['id'] = foo[3] self.downloadable_ebooks.append(data) else: print " Seems to be a followable link ('%s')" % (href) links_to_visit.append(href) # Finally, visit all pages that we encountered if further_descend: for link in links_to_visit: link = norms(link) self.recursive_descent(link) # In any case, return the links we had to visit... return links_to_visit