예제 #1
0
 def test_get_relative_url(self):
     self.assertEqual(
         uu.get_relative_url('/home/bart/foo/bar.txt', '/home/bart'),
         'foo/bar.txt')
     self.assertEqual(
         uu.get_relative_url('/home/bart/foo/bar.txt', '/home/bart/'),
         'foo/bar.txt')
     self.assertEqual(uu.get_relative_url('/foo/bar.txt', '/'),
                      'foo/bar.txt')
     self.assertEqual(uu.get_relative_url('/foo/bar.txt', '/biz'),
                      '/foo/bar.txt')
예제 #2
0
 def download_entry(self, entry, path):
     uid = get_safe_local_id(entry.url)
     new_path = os.path.join(path, uid)
     download_file(entry.url, new_path)
     relative_path = get_relative_url(new_path)
     entry.local_paths = [relative_path]
     entry.downloaded = True
예제 #3
0
 def download_entry(self, entry, path):
     uid = get_safe_local_id(entry.url)
     new_path = os.path.join(path, uid)
     download_file(entry.url, new_path)
     relative_path = get_relative_url(new_path)
     entry.local_paths = [relative_path]
     entry.downloaded = True
예제 #4
0
    def parse_page(self, page_local_path, page_url, parse_refs=True):
        try:
            relative_url = get_relative_url(get_path(page_local_path))
            page = Page(url=page_url,
                    file_path=relative_url,
                    document=self.document)
            page.save()
            load = ParserLoad()
            load.parse_refs = parse_refs

            self._build_code_words(load)
            self._process_page(page, load)
        except Exception:
            print_exc()
예제 #5
0
    def parse_page(self, page_local_path, page_url, parse_refs=True):
        try:
            relative_url = get_relative_url(get_path(page_local_path))
            page = Page(url=page_url,
                        file_path=relative_url,
                        document=self.document)
            page.save()
            load = ParserLoad()
            load.parse_refs = parse_refs

            self._build_code_words(load)
            self._process_page(page, load)
        except Exception:
            print_exc()
예제 #6
0
    def download_entry(self, entry, path):
        local_paths = []
        next_url = entry.url
        page_id = 0

        while next_url is not None:
            uid = get_safe_local_id(next_url, '_page{0}'.format(page_id))
            new_path = os.path.join(path, uid)
            download_file(next_url, new_path)
            relative_path = get_relative_url(new_path)
            local_paths.append(relative_path)
            tree = download_html_tree(new_path)
            page_id += 1
            next_url = self._get_next_entry_url(next_url, page_id, tree)

        entry.downloaded = True
        entry.local_paths = local_paths
예제 #7
0
    def download_entry(self, entry, path):
        local_paths = []
        next_url = entry.url
        page_id = 0

        while next_url is not None:
            uid = get_safe_local_id(next_url, "_page{0}".format(page_id))
            new_path = os.path.join(path, uid)
            download_file(next_url, new_path)
            relative_path = get_relative_url(new_path)
            local_paths.append(relative_path)
            tree = download_html_tree(new_path)
            page_id += 1
            next_url = self._get_next_entry_url(next_url, page_id, tree)

        entry.downloaded = True
        entry.local_paths = local_paths
예제 #8
0
파일: tests.py 프로젝트: bartdag/recodoc2
 def test_get_relative_url(self):
     self.assertEqual(uu.get_relative_url("/home/bart/foo/bar.txt", "/home/bart"), "foo/bar.txt")
     self.assertEqual(uu.get_relative_url("/home/bart/foo/bar.txt", "/home/bart/"), "foo/bar.txt")
     self.assertEqual(uu.get_relative_url("/foo/bar.txt", "/"), "foo/bar.txt")
     self.assertEqual(uu.get_relative_url("/foo/bar.txt", "/biz"), "/foo/bar.txt")