def test_warc_creation(self):
        thread = down.DownloadThread(101,
                                     proxy_util.ip_check_url,
                                     proxy=fr_proxy,
                                     prox_loc=france,
                                     basepath=base_path)

        path_to_warc = "{}warcs/{}.warc.gz".format(thread.storage_path,
                                                   urlparse(thread.url).netloc)
        file_size = 0
        exists = os.path.exists(path_to_warc)
        if exists:
            file_size = os.path.getsize(path_to_warc)

        thread.start()
        thread.join()

        print("Path exists already: {}".format(exists))
        # thread._add_to_warc()
        self.assertGreater(os.path.getsize(path_to_warc), file_size)
        print("Path exists already: {}".format(os.path.exists(path_to_warc)))
        with warc.open(path_to_warc) as warc_file:
            for record, offset, leftover in warc_file.browse():
                print(str(record.header))
                print(str(record.payload.read()))
    def test_class_with_china_proxy(self):
        print("\nTesting the functionality of the DownloadThread class:")
        thread = down.DownloadThread(1,
                                     proxy_util.ip_check_url,
                                     china_proxy,
                                     prox_loc=china,
                                     basepath=base_path)
        thread.start()
        print("    Waiting for thread to join.")
        thread.join()
        print("    After join:\n" + str(thread.html))
        text = thread.html

        print(
            "    The originstamp_result of this thread: \n{}\n And the errors if any:\n"
            .format(thread.originstamp_result, str(thread.error)))
        self.assertIsNotNone(text, "None HTML was stored and processed.")
        print("    Testing whether thread is alive")
        thread.join()
        self.assertFalse(thread.is_alive(), "Thread is still alive after join")
        ipfs_hash = thread.ipfs_hash
        self.assertIsNotNone(
            ipfs_hash, "The DownloadThread did not produce an ipfs_hash")
        if ipfs_hash:
            file_path = downloader.ipfs_get(ipfs_hash)
            self.assertTrue(
                os.path.exists(file_path),
                "File not transmitted to ipfs, it cannot be fetched")
        else:
            raise self.failureException
 def test_get_one_proxy_if_not_set(self):
     thread = down.DownloadThread(101,
                                  url,
                                  prox_loc="DE",
                                  basepath=base_path)
     thread.start()
     thread.join()
     self.assertIsNone(thread.error)
     self.assertIsNotNone(thread.html)
 def test_download_blocked_site(self):
     thread = down.DownloadThread(101,
                                  blocked_url,
                                  proxy=china_proxy,
                                  prox_loc=china)
     thread.start()
     thread.join()
     print(str(thread.html))
     self.assertIsNotNone(thread.error)
    def test_thread_initialization(self):
        print("Test thread initialization:")
        thread = down.DownloadThread(1, url, proxy, basepath=base_path)
        self.assertEqual(thread.url, url)
        self.assertEqual(thread.threadID, 1)

        path = "/home/sebastian/testing-stw/temporary/1/"
        self.assertEqual(thread.path, path)
        self.assertTrue(os.path.exists(path))
        print("    Download folder was created.")
        print(thread.phantom.service.service_args)
        self.assertEqual(thread.phantom.service.service_args[0],
                         "--proxy={}".format(proxy))
        print("    Proxy is set correctly to " + proxy + ".")
        thread.phantom.capabilities["browserName"] = "Mozilla/5.0"
        print(str(thread.phantom.capabilities))
    def test_phantom_proxy(self):
        prox_loc, this_proxy = proxy_util.get_one_random_proxy()
        country = proxy_util.ip_lookup_country(this_proxy.split(":")[0])

        print(this_proxy.split(":")[0])
        thread = down.DownloadThread(101,
                                     proxy_util.ip_check_url,
                                     proxy=this_proxy,
                                     prox_loc=country,
                                     basepath=base_path)
        thread.start()
        thread.join()

        print(thread.html)
        print(str(thread.error) + " | Was the error")
        print(thread.phantom.service.service_args)
        print(thread.html.find(this_proxy.split(":")[0]))
        self.assertNotEqual(-1, thread.html.find(this_proxy.split(":")[0]))
 def test_load_images(self):
     print("\nTesting the load_images method")
     thread = down.DownloadThread(2, url, html=html, basepath=base_path)
     soup = Bs(html, "lxml")
     images = thread._load_images(soup)
     self.assertEqual(len(images), 2)