def test_get_relative_url(self): self.assertEqual( uu.get_relative_url('/home/bart/foo/bar.txt', '/home/bart'), 'foo/bar.txt') self.assertEqual( uu.get_relative_url('/home/bart/foo/bar.txt', '/home/bart/'), 'foo/bar.txt') self.assertEqual(uu.get_relative_url('/foo/bar.txt', '/'), 'foo/bar.txt') self.assertEqual(uu.get_relative_url('/foo/bar.txt', '/biz'), '/foo/bar.txt')
def download_entry(self, entry, path): uid = get_safe_local_id(entry.url) new_path = os.path.join(path, uid) download_file(entry.url, new_path) relative_path = get_relative_url(new_path) entry.local_paths = [relative_path] entry.downloaded = True
def parse_page(self, page_local_path, page_url, parse_refs=True): try: relative_url = get_relative_url(get_path(page_local_path)) page = Page(url=page_url, file_path=relative_url, document=self.document) page.save() load = ParserLoad() load.parse_refs = parse_refs self._build_code_words(load) self._process_page(page, load) except Exception: print_exc()
def download_entry(self, entry, path): local_paths = [] next_url = entry.url page_id = 0 while next_url is not None: uid = get_safe_local_id(next_url, '_page{0}'.format(page_id)) new_path = os.path.join(path, uid) download_file(next_url, new_path) relative_path = get_relative_url(new_path) local_paths.append(relative_path) tree = download_html_tree(new_path) page_id += 1 next_url = self._get_next_entry_url(next_url, page_id, tree) entry.downloaded = True entry.local_paths = local_paths
def download_entry(self, entry, path): local_paths = [] next_url = entry.url page_id = 0 while next_url is not None: uid = get_safe_local_id(next_url, "_page{0}".format(page_id)) new_path = os.path.join(path, uid) download_file(next_url, new_path) relative_path = get_relative_url(new_path) local_paths.append(relative_path) tree = download_html_tree(new_path) page_id += 1 next_url = self._get_next_entry_url(next_url, page_id, tree) entry.downloaded = True entry.local_paths = local_paths
def test_get_relative_url(self): self.assertEqual(uu.get_relative_url("/home/bart/foo/bar.txt", "/home/bart"), "foo/bar.txt") self.assertEqual(uu.get_relative_url("/home/bart/foo/bar.txt", "/home/bart/"), "foo/bar.txt") self.assertEqual(uu.get_relative_url("/foo/bar.txt", "/"), "foo/bar.txt") self.assertEqual(uu.get_relative_url("/foo/bar.txt", "/biz"), "/foo/bar.txt")