Example #1
0
 def test_get_local_url(self):
     self.assertEqual(
         uu.get_local_url('/home/bart/doc', 'http://doc.com/foo/bar.txt'),
         '/home/bart/doc/foo/bar.txt')
     self.assertEqual(
         uu.get_local_url('/home/bart/doc/', 'http://doc.com/foo/bar/'),
         '/home/bart/doc/foo/bar/bar__root.html')
     self.assertEqual(uu.get_local_url('/home/bart/doc', 'http://doc.com/'),
                      '/home/bart/doc/root__root.html')
Example #2
0
 def page_error(self, input_url, pages):
     local_url = get_local_url(self.output_url,
                               get_url_without_hash(input_url))
     self.logger.info(
         'This page could not be downloaded: {0} in {1}'.format(
             input_url, local_url))
     error_page = DocumentPage(input, None, [])
     pages[local_url] = error_page
Example #3
0
 def page_error(self, input_url, pages):
     local_url = get_local_url(self.output_url,
             get_url_without_hash(input_url))
     self.logger.info(
         'This page could not be downloaded: {0} in {1}'.format(
             input_url, local_url))
     error_page = DocumentPage(input, None, [])
     pages[local_url] = error_page
Example #4
0
    def make_copy(self, url_to_copy, binary=False):
        destination_url = get_local_url(self.output_url, url_to_copy)
        try:
            ensure_path_exists(destination_url)
        except:
            raise Exception('Could not make copy of {0} in {1}'.format(
                url_to_copy, destination_url))

        download_file(url_to_copy, get_path_from_url(destination_url),
                force=False, binary=binary)

        return destination_url
Example #5
0
    def make_copy(self, url_to_copy, binary=False):
        destination_url = get_local_url(self.output_url, url_to_copy)
        try:
            ensure_path_exists(destination_url)
        except:
            raise Exception('Could not make copy of {0} in {1}'.format(
                url_to_copy, destination_url))

        download_file(url_to_copy,
                      get_path_from_url(destination_url),
                      force=False,
                      binary=binary)

        return destination_url
Example #6
0
 def process_page_links(self, tree, local_url, url):
     link_tags = self.links(tree)
     links = []
     for link_tag in link_tags:
         attributes = link_tag.attrib
         href = ''
         if 'href' in attributes:
             href = attributes['href']
             link_url = get_url_without_hash(urlparse.urljoin(url, href))
             local_url_to = get_local_url(self.output_url, link_url)
             local_url_to = get_sanitized_url(local_url_to)
             link = DocumentLink(link_url, local_url_to)
             links.append(link)
         else:
             continue
     return links
Example #7
0
 def process_page_links(self, tree, local_url, url):
     link_tags = self.links(tree)
     links = []
     for link_tag in link_tags:
         attributes = link_tag.attrib
         href = ''
         if 'href' in attributes:
             href = attributes['href']
             link_url = get_url_without_hash(urlparse.urljoin(url, href))
             local_url_to = get_local_url(self.output_url, link_url)
             local_url_to = get_sanitized_url(local_url_to)
             link = DocumentLink(link_url, local_url_to)
             links.append(link)
         else:
             continue
     return links
Example #8
0
 def test_get_local_url(self):
     self.assertEqual(uu.get_local_url("/home/bart/doc", "http://doc.com/foo/bar.txt"), "/home/bart/doc/foo/bar.txt")
     self.assertEqual(
         uu.get_local_url("/home/bart/doc/", "http://doc.com/foo/bar/"), "/home/bart/doc/foo/bar/bar__root.html"
     )
     self.assertEqual(uu.get_local_url("/home/bart/doc", "http://doc.com/"), "/home/bart/doc/root__root.html")