Python get_url_without_hash Beispiele

Programmiersprache: Python

Namespace / Paketname: docutil.url_util

Methode / Funktion: get_url_without_hash

Beispiele auf hotexamples.com: 8

Python get_url_without_hash - 8 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die docutil.url_util.get_url_without_hash, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: generic_syncer.py Projekt: bartdag/recodoc2

 def page_error(self, input_url, pages):
     local_url = get_local_url(self.output_url,
             get_url_without_hash(input_url))
     self.logger.info(
         'This page could not be downloaded: {0} in {1}'.format(
             input_url, local_url))
     error_page = DocumentPage(input, None, [])
     pages[local_url] = error_page

Beispiel #2

Datei anzeigen

 def page_error(self, input_url, pages):
     local_url = get_local_url(self.output_url,
                               get_url_without_hash(input_url))
     self.logger.info(
         'This page could not be downloaded: {0} in {1}'.format(
             input_url, local_url))
     error_page = DocumentPage(input, None, [])
     pages[local_url] = error_page

Beispiel #3

Datei anzeigen

    def process_page(self, url):
        self.logger.info("Processing page: " + url)
        local_url = self.make_copy(get_url_without_hash(url))

        local_page = urllib2.urlopen(local_url)
        content = local_page.read()
        local_page.close()
        parser = etree.HTMLParser(encoding=get_encoding(content))
        tree = etree.fromstring(content, parser)

        links = self.process_page_links(tree, local_url, url)
        self.process_page_imgs(tree, url)

        page = DocumentPage(url, local_url, links)

        return page

Beispiel #4

Datei anzeigen

 def process_page_links(self, tree, local_url, url):
     link_tags = self.links(tree)
     links = []
     for link_tag in link_tags:
         attributes = link_tag.attrib
         href = ''
         if 'href' in attributes:
             href = attributes['href']
             link_url = get_url_without_hash(urlparse.urljoin(url, href))
             local_url_to = get_local_url(self.output_url, link_url)
             local_url_to = get_sanitized_url(local_url_to)
             link = DocumentLink(link_url, local_url_to)
             links.append(link)
         else:
             continue
     return links

Beispiel #5

Datei anzeigen

Datei: generic_syncer.py Projekt: bartdag/recodoc2

    def process_page(self, url):
        self.logger.info("Processing page: " + url)
        local_url = self.make_copy(get_url_without_hash(url))

        local_page = urllib2.urlopen(local_url)
        content = local_page.read()
        local_page.close()
        parser = etree.HTMLParser(encoding=get_encoding(content))
        tree = etree.fromstring(content, parser)

        links = self.process_page_links(tree, local_url, url)
        self.process_page_imgs(tree, url)

        page = DocumentPage(url, local_url, links)

        return page

Beispiel #6

Datei anzeigen

Datei: generic_syncer.py Projekt: bartdag/recodoc2

 def process_page_links(self, tree, local_url, url):
     link_tags = self.links(tree)
     links = []
     for link_tag in link_tags:
         attributes = link_tag.attrib
         href = ''
         if 'href' in attributes:
             href = attributes['href']
             link_url = get_url_without_hash(urlparse.urljoin(url, href))
             local_url_to = get_local_url(self.output_url, link_url)
             local_url_to = get_sanitized_url(local_url_to)
             link = DocumentLink(link_url, local_url_to)
             links.append(link)
         else:
             continue
     return links

Beispiel #7

Datei anzeigen

Datei: tests.py Projekt: bartdag/recodoc2

 def get_url_without_hash(self):
     self.assertEqual(uu.get_url_without_hash("http://www.yo.com/foo#bar"), "http://www.you.com/foo")
     self.assertEqual(uu.get_url_without_hash("http://www.yo.com/foo"), "http://www.you.com/foo")

Beispiel #8

Datei anzeigen

 def get_url_without_hash(self):
     self.assertEqual(uu.get_url_without_hash('http://www.yo.com/foo#bar'),
                      'http://www.you.com/foo')
     self.assertEqual(uu.get_url_without_hash('http://www.yo.com/foo'),
                      'http://www.you.com/foo')