Python normsの例、calibre_plugins.beam_ebooks_downloader.urlnorm.norms Pythonの例

コード例 #1

0

ファイルを表示

ファイル: jobs.py プロジェクト: hakan42/calibre-beam-ebooks-downloader-plugin

def do_obtain_new_books(cpus, account, notification=lambda x, y:x):
    print "do_obtain_new_books in jobs.py"

    print "Account is: %s" % (account)

    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
    notification(0.01, "Starting up...")

    from calibre.library import db
    from calibre.utils.config import prefs
    prefs.refresh()
    db = db(read_only=False)

    print "DB is: %s" % (db)

    prefs = PrefsFacade(db)
    print "Prefs are: %s" % (prefs)
    print "Library id is (%s)" % (prefs.get_library_uuid())

    reporter = ConsoleReporter()
    downloader = BeamEbooksDownloader(prefs, caller = reporter)
    print "-- LALA -- Downloader is: %s" % (downloader)

    if account[prefs.ENABLED]:
        downloader.login(account)

        if downloader.successful_login == False:
            notification(1.00, "Failed to log in...")
        else:
            notification(0.05, "Parsing document tree now...")
            downloadable_ebooks = downloader.recursive_descent(norms(prefs[prefs.URLBASE]))
            notification(0.50, "Loaded OPDS pages")
            reporter.notify(downloadable_ebooks)
            #
            # Now, download the obtained ebooks...

    notification(1.00, "Done...")

    adder = EBookAdder(prefs, "beam-ebooks")

    adder.load_books()

    new_ebooks = []

    for entry in downloadable_ebooks:
        beamebooks_id = entry['id']

        book = adder.books_of_this_shop.get(beamebooks_id)
        if book is None:
            new_ebooks.append(entry)

    result = (new_ebooks)

    return result

コード例 #2

0

ファイルを表示

    def login(self, account):
        self.beamid = None
        self.successful_login = False

        self.already_visited_links = []
        self.downloadable_ebooks = []

        self.account_id = account[self.prefs.ACCOUNT_ID]

        self.username = account[self.prefs.USERNAME]
        self.password = self.prefs.decrypt_password(
            account[self.prefs.OBFUSCATED_PASSWORD])

        # Remove all cookies to be extra safe
        self.browser.cookiejar.clear()
        self.filenumber = 1000

        if self.caller is not None:
            self.caller.notify("Logging in")

        url = self.urlbase + "/aldiko/cookisetzen.php"
        url = norms(url)
        print "  URL: '%s'" % (url)

        print "Browser: '%s'" % (self.browser)
        # print "    UA : '%s'" % (self.browser.user_agent)

        response = self.browser.open(url)
        self.save_response(response)

        print "Cookies: '%s'" % (self.browser.cookiejar)

        if response.code == 200:
            form = self.browser.select_form(nr=0)
            print "Form: '%s'" % (form)
            print "  Auth: '%s', '%s'" % (self.username, self.password)
            self.browser.form['user'] = self.username
            self.browser.form['pass'] = self.password
            self.browser.submit()

        # After from submission
        self.save_response(response)
        # print "Response Code: '%s'" % (response.code)
        # print "Cookies: '%s'" % (self.browser.cookiejar)

        for cookie in self.browser.cookiejar:
            # print "  C: '%s'" % (cookie)
            if hasattr(cookie, 'name'):
                if hasattr(cookie, 'value'):
                    if cookie.name == 'beamid':
                        self.beamid = cookie.value
                        # TODO should we verify that the beamid is numeric???
                        self.successful_login = True
                        self.caller.notify("Login Successful")

コード例 #3

0

ファイルを表示

ファイル: downloader.py プロジェクト: hakan42/calibre-beam-ebooks-downloader-plugin

    def login(self, account):
        self.beamid = None
        self.successful_login = False

        self.already_visited_links = []
        self.downloadable_ebooks = []

        self.account_id = account[self.prefs.ACCOUNT_ID]

        self.username = account[self.prefs.USERNAME]
        self.password = self.prefs.decrypt_password(account[self.prefs.OBFUSCATED_PASSWORD])

        # Remove all cookies to be extra safe
        self.browser.cookiejar.clear()
        self.filenumber = 1000

        if self.caller is not None:
            self.caller.notify("Logging in")

        url = self.urlbase + "/aldiko/cookisetzen.php"
        url = norms(url)
        print "  URL: '%s'" % (url)

        print "Browser: '%s'" % (self.browser)
        # print "    UA : '%s'" % (self.browser.user_agent)

        response = self.browser.open(url)
        self.save_response(response)

        print "Cookies: '%s'" % (self.browser.cookiejar)

        if response.code == 200:
            form = self.browser.select_form(nr = 0)
            print "Form: '%s'" % (form)
            print "  Auth: '%s', '%s'" % (self.username, self.password)
            self.browser.form['user'] = self.username
            self.browser.form['pass'] = self.password
            self.browser.submit()

        # After from submission
        self.save_response(response)
        # print "Response Code: '%s'" % (response.code)
        # print "Cookies: '%s'" % (self.browser.cookiejar)

        for cookie in self.browser.cookiejar:
            # print "  C: '%s'" % (cookie)
            if hasattr(cookie, 'name'):
                if hasattr(cookie, 'value'):
                    if cookie.name == 'beamid':
                        self.beamid = cookie.value
                        # TODO should we verify that the beamid is numeric???
                        self.successful_login = True
                        self.caller.notify("Login Successful")

コード例 #4

0

ファイルを表示

    def cli_main(self, argv):
        from calibre.utils.config import prefs as calibre_prefs
        from optparse import OptionParser

        from calibre_plugins.beam_ebooks_downloader.prefs import PrefsFacade

        my_db = db(path=None, read_only=False)

        # print 'Database is (%s)' % (prefs._get_db())
        print 'Database is (%s)' % (my_db)

        prefs = PrefsFacade(my_db)

        print 'My Prefs are (%s)' % (prefs)
        print '    methods are (%s)' % (dir(prefs))
        print '    library id is (%s)' % (prefs.get_library_uuid())

        print 'Calibre Prefs are (%s)' % (calibre_prefs)
        print '    methods are (%s)' % (dir(calibre_prefs))

        downloader = BeamEbooksDownloader(prefs, self.version, caller=self)

        # Loop over all accounts until we have support for selection
        for account_id in prefs[prefs.ACCOUNTS]:
            account = prefs[prefs.ACCOUNTS][account_id]
            account[prefs.ACCOUNT_ID] = account_id
            print "Account: '%s'" % account

            if account[prefs.ENABLED]:
                downloader.login(account)

                if downloader.successful_login == False:
                    print "Failed to log in..."
                else:
                    print "Parsing document tree now..."
                    # Temporarily...
                    # downloader.recursive_descent(prefs[prefs.URLBASE] + "/aldiko/bibuebersicht.php5?user="******"/aldiko/pakete.php5?user="******"/kunden/abos.php5")
                    downloader.download_ebooks()

        pass

コード例 #5

0

ファイルを表示

    def recursive_descent(self, absolute_url=None, further_descend=True):
        if absolute_url is None:
            url = self.urlbase
        else:
            url = absolute_url

        caller = self.caller

        url = norms(url)
        if url in self.already_visited_links:
            print "Already have been here ('%s')..." % (url)
        else:
            harvested_urls = self.prefs[self.prefs.HARVESTED_URLS]
            harvest_state = harvested_urls.get(url)
            if harvest_state is None:
                harvest_state = {}
                self.prefs[self.prefs.HARVESTED_URLS][url] = harvest_state
                self.prefs.save()

            status = harvest_state.get(self.prefs.HARVEST_STATE)
            if status is None:
                harvest_state[self.prefs.
                              HARVEST_STATE] = self.prefs.HARVEST_STATE_REVISIT
                self.prefs.save()

            title = harvest_state.get(self.prefs.HARVEST_TITLE)
            if title is None:
                harvest_state[self.prefs.HARVEST_TITLE] = ""
                self.prefs.save()

            if caller is not None:
                caller.notify("Visiting ('%s', '%s')..." %
                              (url, harvest_state))

            self.visit_url(absolute_url, further_descend)

        # In any case, return a list of ebooks to download
        return self.downloadable_ebooks

コード例 #6

0

ファイルを表示

ファイル: downloader.py プロジェクト: hakan42/calibre-beam-ebooks-downloader-plugin

    def recursive_descent(self, absolute_url = None, further_descend = True):
        if absolute_url is None:
            url  = self.urlbase
        else:
            url  = absolute_url

        caller = self.caller

        url = norms(url)
        if url in self.already_visited_links:
            print "Already have been here ('%s')..." % (url)
        else:
            harvested_urls = self.prefs[self.prefs.HARVESTED_URLS]
            harvest_state = harvested_urls.get(url)
            if harvest_state is None:
                harvest_state = {}
                self.prefs[self.prefs.HARVESTED_URLS][url] = harvest_state
                self.prefs.save()

            status = harvest_state.get(self.prefs.HARVEST_STATE)
            if status is None:
                harvest_state[self.prefs.HARVEST_STATE] = self.prefs.HARVEST_STATE_REVISIT
                self.prefs.save()

            title = harvest_state.get(self.prefs.HARVEST_TITLE)
            if title is None:
                harvest_state[self.prefs.HARVEST_TITLE] = ""
                self.prefs.save()

            if caller is not None:
                caller.notify("Visiting ('%s', '%s')..." % (url, harvest_state))

            self.visit_url(absolute_url, further_descend)

        # In any case, return a list of ebooks to download
        return self.downloadable_ebooks

コード例 #7

0

ファイルを表示

    def visit_url(self, url=None, further_descend=True):
        print "  URL: '%s'" % (url)

        self.browser.open(url)
        response = self.browser.open(url)
        self.save_response(response)

        content = response.get_data()

        links_to_visit = []

        if response.code != 200:
            print "Something horrible happened (RC %s)" % (response.code)
            pass

        root = fromstring(content)

        entrylist = root.xpath("//entry")
        for entry in entrylist:
            # print "  Entry: '%s'" % (tostring(entry, pretty_print=True).strip())
            idtag = entry.xpath('id')[0]
            if idtag is not None:
                # print "  Id: '%s'\n" % (tostring(idtag, pretty_print=True).strip())
                contents = idtag.text_content()
                # print "    Id content: '%s' / '%s'" % (idtag, contents)

                match = re.match('urn:beam-ebooks:private', contents)
                if match:
                    href = self.extract_link(entry)
                    if href:
                        href = norms(href)
                        print "          Seems to be a followable link ('%s')" % (
                            href)
                        links_to_visit.append(href)

                match = re.match('urn:beam-ebooks:alle', contents)
                if match:
                    href = self.extract_link(entry)
                    if href:
                        href = norms(href)
                        print "          Seems to be a followable link ('%s')" % (
                            href)
                        links_to_visit.append(href)

                match = re.match('urn:beam-ebooks:titelnr:', contents)
                if match:
                    (href, mimetype) = self.extract_link(entry)
                    if href:
                        href = norms(href)
                        match = re.search('\/download\.php5\?.*$', href)
                        if match:
                            print "          Seems to be an ebook ('%s', '%s')" % (
                                mimetype, href)
                            data = {}
                            data['urn'] = contents
                            data['href'] = href
                            data['mimetype'] = mimetype

                            foo = re.split(':', contents)
                            data['id'] = foo[3]

                            self.downloadable_ebooks.append(data)
                        else:
                            print "          Seems to be a followable link ('%s')" % (
                                href)
                            links_to_visit.append(href)

        # Finally, visit all pages that we encountered
        if further_descend:
            for link in links_to_visit:
                link = norms(link)
                self.recursive_descent(link)

        # In any case, return the links we had to visit...
        return links_to_visit

コード例 #8

0

ファイルを表示

ファイル: downloader.py プロジェクト: hakan42/calibre-beam-ebooks-downloader-plugin

    def visit_url(self, url = None, further_descend = True):
        print "  URL: '%s'" % (url)

        self.browser.open(url)
        response = self.browser.open(url)
        self.save_response(response)
        
        content = response.get_data()

        links_to_visit = []

        if response.code != 200:
            print "Something horrible happened (RC %s)" % (response.code)
            pass

        root = fromstring(content)

        entrylist = root.xpath("//entry")
        for entry in entrylist:
            # print "  Entry: '%s'" % (tostring(entry, pretty_print=True).strip())
            idtag = entry.xpath('id')[0]
            if idtag is not None:
                # print "  Id: '%s'\n" % (tostring(idtag, pretty_print=True).strip())
                contents = idtag.text_content()
                # print "    Id content: '%s' / '%s'" % (idtag, contents)

                match = re.match('urn:beam-ebooks:private', contents)
                if match:
                    href = self.extract_link(entry)
                    if href:
                        href = norms(href)
                        print "          Seems to be a followable link ('%s')" % (href)
                        links_to_visit.append(href)

                match = re.match('urn:beam-ebooks:alle', contents)
                if match:
                    href = self.extract_link(entry)
                    if href:
                        href = norms(href)
                        print "          Seems to be a followable link ('%s')" % (href)
                        links_to_visit.append(href)

                match = re.match('urn:beam-ebooks:titelnr:', contents)
                if match:
                    (href, mimetype) = self.extract_link(entry)
                    if href:
                        href = norms(href)
                        match = re.search('\/download\.php5\?.*$', href)
                        if match:
                            print "          Seems to be an ebook ('%s', '%s')" % (mimetype, href)
                            data = {}
                            data['urn']      = contents
                            data['href']     = href
                            data['mimetype'] = mimetype

                            foo = re.split(':', contents)
                            data['id'] = foo[3]

                            self.downloadable_ebooks.append(data)
                        else:
                            print "          Seems to be a followable link ('%s')" % (href)
                            links_to_visit.append(href)

        # Finally, visit all pages that we encountered
        if further_descend:
            for link in links_to_visit:
                link = norms(link)
                self.recursive_descent(link)

        # In any case, return the links we had to visit...
        return links_to_visit