def test_warc_creation(self): thread = down.DownloadThread(101, proxy_util.ip_check_url, proxy=fr_proxy, prox_loc=france, basepath=base_path) path_to_warc = "{}warcs/{}.warc.gz".format(thread.storage_path, urlparse(thread.url).netloc) file_size = 0 exists = os.path.exists(path_to_warc) if exists: file_size = os.path.getsize(path_to_warc) thread.start() thread.join() print("Path exists already: {}".format(exists)) # thread._add_to_warc() self.assertGreater(os.path.getsize(path_to_warc), file_size) print("Path exists already: {}".format(os.path.exists(path_to_warc))) with warc.open(path_to_warc) as warc_file: for record, offset, leftover in warc_file.browse(): print(str(record.header)) print(str(record.payload.read()))
def test_class_with_china_proxy(self): print("\nTesting the functionality of the DownloadThread class:") thread = down.DownloadThread(1, proxy_util.ip_check_url, china_proxy, prox_loc=china, basepath=base_path) thread.start() print(" Waiting for thread to join.") thread.join() print(" After join:\n" + str(thread.html)) text = thread.html print( " The originstamp_result of this thread: \n{}\n And the errors if any:\n" .format(thread.originstamp_result, str(thread.error))) self.assertIsNotNone(text, "None HTML was stored and processed.") print(" Testing whether thread is alive") thread.join() self.assertFalse(thread.is_alive(), "Thread is still alive after join") ipfs_hash = thread.ipfs_hash self.assertIsNotNone( ipfs_hash, "The DownloadThread did not produce an ipfs_hash") if ipfs_hash: file_path = downloader.ipfs_get(ipfs_hash) self.assertTrue( os.path.exists(file_path), "File not transmitted to ipfs, it cannot be fetched") else: raise self.failureException
def test_get_one_proxy_if_not_set(self): thread = down.DownloadThread(101, url, prox_loc="DE", basepath=base_path) thread.start() thread.join() self.assertIsNone(thread.error) self.assertIsNotNone(thread.html)
def test_download_blocked_site(self): thread = down.DownloadThread(101, blocked_url, proxy=china_proxy, prox_loc=china) thread.start() thread.join() print(str(thread.html)) self.assertIsNotNone(thread.error)
def test_thread_initialization(self): print("Test thread initialization:") thread = down.DownloadThread(1, url, proxy, basepath=base_path) self.assertEqual(thread.url, url) self.assertEqual(thread.threadID, 1) path = "/home/sebastian/testing-stw/temporary/1/" self.assertEqual(thread.path, path) self.assertTrue(os.path.exists(path)) print(" Download folder was created.") print(thread.phantom.service.service_args) self.assertEqual(thread.phantom.service.service_args[0], "--proxy={}".format(proxy)) print(" Proxy is set correctly to " + proxy + ".") thread.phantom.capabilities["browserName"] = "Mozilla/5.0" print(str(thread.phantom.capabilities))
def test_phantom_proxy(self): prox_loc, this_proxy = proxy_util.get_one_random_proxy() country = proxy_util.ip_lookup_country(this_proxy.split(":")[0]) print(this_proxy.split(":")[0]) thread = down.DownloadThread(101, proxy_util.ip_check_url, proxy=this_proxy, prox_loc=country, basepath=base_path) thread.start() thread.join() print(thread.html) print(str(thread.error) + " | Was the error") print(thread.phantom.service.service_args) print(thread.html.find(this_proxy.split(":")[0])) self.assertNotEqual(-1, thread.html.find(this_proxy.split(":")[0]))
def test_load_images(self): print("\nTesting the load_images method") thread = down.DownloadThread(2, url, html=html, basepath=base_path) soup = Bs(html, "lxml") images = thread._load_images(soup) self.assertEqual(len(images), 2)