def test_duplicate_download(self): HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/foo.pdf", body=self._read_file('text1.txt')) main(self.options, 'http://foo.org/foo.pdf', 'http://foo.org/foo.pdf') self.assertEqual(listdir(self.temp_dir), ['foo.pdf', 'foo_1.pdf']) with open(os.path.join(self.temp_dir, 'foo.pdf')) as f1: with open(os.path.join(self.temp_dir, 'foo_1.pdf')) as f2: self.assertEqual(f1.read(),f2.read())
def simple_multiple_resource_download_test(self): HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/foo%201.pdf", body=self._read_file('text1.txt')) HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/foo2.pdf?aaa=b", body=self._read_file('text2.txt')) main(self.options, 'http://foo.org/foo 1.pdf', 'http://foo.org/foo2.pdf?aaa=b') self.assertEqual(self._get_downloaded_files(), ['foo 1.pdf', 'foo2.pdf']) self.assertTrue(self._same_content('foo 1.pdf', 'text1.txt')) self.assertTrue(self._same_content('foo2.pdf', 'text2.txt'))
def dynamic_url_downloads_test(self): self.options.filename_model = "%INDEX-%FULLNAME" HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/foo1.pdf?aaa=6", body=self._read_file('text1.txt')) HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/foo1.pdf?aaa=7", body=self._read_file('text1.txt')) HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/foo2.pdf?aaa=6", body=self._read_file('text2.txt')) HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/foo2.pdf?aaa=7", body=self._read_file('text2.txt')) main(self.options, 'http://foo.org/foo{1:2}.pdf?aaa={6:7}') self.assertEqual(self._get_downloaded_files(), ['1-foo1.pdf', '2-foo1.pdf', '3-foo2.pdf', '4-foo2.pdf']) self.assertTrue(self._same_content('1-foo1.pdf', 'text1.txt')) self.assertTrue(self._same_content('2-foo1.pdf', 'text1.txt')) self.assertTrue(self._same_content('3-foo2.pdf', 'text2.txt')) self.assertTrue(self._same_content('4-foo2.pdf', 'text2.txt'))
def dynamic_directory_generation_test(self): self.options.destination_directory = os.path.join(self.temp_dir, "%HOST/series-%1") HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/bar-1/file.pdf", body=self._read_file('text1.txt')) HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/bar-2/file.pdf", body=self._read_file('text1.txt')) HTTPretty.register_uri(HTTPretty.GET, "http://baz.net/bar-1/file.pdf", body=self._read_file('text1.txt')) HTTPretty.register_uri(HTTPretty.GET, "http://baz.net/bar-2/file.pdf", body=self._read_file('text1.txt')) main(self.options, 'http://foo.org/bar-{1:2}/file.pdf', 'http://baz.net/bar-{1:2}/file.pdf') self.assertEqual(self._get_downloaded_files(), ['baz.net', 'foo.org']) self.assertEqual(listdir(os.path.join(self.temp_dir, 'baz.net')), ['series-1', 'series-2']) self.assertEqual(listdir(os.path.join(self.temp_dir, 'foo.org')), ['series-1', 'series-2']) self.assertTrue(self._same_content('foo.org/series-1/file.pdf', 'text1.txt')) self.assertTrue(self._same_content('foo.org/series-2/file.pdf', 'text1.txt')) self.assertTrue(self._same_content('baz.net/series-1/file.pdf', 'text1.txt')) self.assertTrue(self._same_content('baz.net/series-2/file.pdf', 'text1.txt'))
def inner_resources_download_test_with_offset(self): # main command line URLs set HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/section-1/download.html", body=self._read_file('page.html')) HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/section-2/download.html", body=self._read_file('page1.html')) self.options.filename_model = "%INDEX-%FULLNAME" self.options.search_queries = ['div.recursiveTest a'] self.options.offset = 2 # resources found inside main pages HTTPretty.register_uri(HTTPretty.GET, "http://recursive.org/page1.html", body=self._read_file('text1.txt')) HTTPretty.register_uri(HTTPretty.GET, "http://recursive.org/page2.html", body=self._read_file('text2.txt')) HTTPretty.register_uri(HTTPretty.GET, "http://recursive.org/page3.html", body=self._read_file('notfound.html'), status=404) HTTPretty.register_uri(HTTPretty.GET, "http://recursive.org/text1.txt", body=self._read_file('text1.txt')) HTTPretty.register_uri(HTTPretty.GET, "http://recursive.org/text2.txt", body=self._read_file('text2.txt')) main(self.options, 'http://foo.org/section-{1:2}/download.html') self.assertEqual(self._get_downloaded_files(), ['4-text1.txt', '5-text2.txt', ])
def simple_one_download_test(self): HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/main.html", body=self._read_file('page.html')) main(self.options, 'http://foo.org/main.html') self.assertEqual(self._get_downloaded_files(), ['main.html']) self.assertTrue(self._same_content('main.html', 'page.html'))
def test_duplicate_check_before_download(self): HTTPretty.register_uri(HTTPretty.GET, "http://foo.org/foo.pdf", body=self._read_file('text1.txt')) self.options.duplicate_check = True main(self.options, 'http://foo.org/foo.pdf', 'http://foo.org/foo.pdf') self.assertEqual(listdir(self.temp_dir), ['foo.pdf', ])