def test_set_retries_less_than_zero(self) -> None: """ Tests the method which let us set the number of retry to perform for the case that the given value is less than zero """ given = -1 download_helper = DownloadHelper() self.assertRaises(ValueError, lambda: download_helper.set_retries(given))
def test_set_url_return(self) -> None: """ Tests the response of the method which let us set the url to work with. """ given = "https://example.org" download_helper = DownloadHelper() actual = download_helper.set_url(given) self.assertIsInstance(actual, DownloadHelper)
def test_set_url_not_str(self) -> None: """ Tests the method which let us set the url to work with for the case that the given url is not a string. """ given = ["Hello", "World"] download_helper = DownloadHelper() self.assertRaises(TypeError, lambda: download_helper.set_url(given))
def test_set_certificate_validation_return(self) -> None: """ Tests the response of the method which let us authorize the certificate validation. """ given = True download_helper = DownloadHelper() actual = download_helper.set_certificate_validation(given) self.assertIsInstance(actual, DownloadHelper)
def test_set_retries_return(self) -> None: """ Tests the response of the method which let us set the number of retry to perform. """ given = 3 download_helper = DownloadHelper() actual = download_helper.set_retries(given) self.assertIsInstance(actual, DownloadHelper)
def test_set_retries_not_int(self) -> None: """ Tests the method which let us set the number of retry to perform for the case that the given value is not a string. """ given = ["Hello", "World"] download_helper = DownloadHelper() self.assertRaises(TypeError, lambda: download_helper.set_retries(given))
def test_set_certificate_validation_not_bool(self) -> None: """ Tests the method which let us authorize the certificate validation for the case that the given value is not a boolean. """ given = ["Hello", "World"] download_helper = DownloadHelper() self.assertRaises( TypeError, lambda: download_helper.set_certificate_validation(given))
def start(self) -> "UpdaterBase": """ Starts the update process. """ for file in infrastructure.LINKS.values(): destination = os.path.join(outputs.CURRENT_DIRECTORY, file["destination"]) DownloadHelper(file["link"]).download_text(destination=destination) logging.info("Updated: %r", destination) for file in pyfunceble.LINKS.values(): destination = os.path.join(outputs.CURRENT_DIRECTORY, file["destination"]) DownloadHelper(file["link"]).download_text(destination=destination) logging.info("Updated: %r", destination) destination = os.path.join( outputs.CURRENT_DIRECTORY, infrastructure.WORKFLOW_LINKS["main"]["destination"], ) DownloadHelper(infrastructure.WORKFLOW_LINKS["main"]["link"]).download_text( destination=destination ) logging.info("Updated: %r", destination) scheduled_file = os.path.join( outputs.CURRENT_DIRECTORY, infrastructure.WORKFLOW_LINKS["scheduler"]["destination"], ) if int(secrets.token_hex(8), 16) % 3 == 0: data = DownloadHelper( infrastructure.WORKFLOW_LINKS["scheduler"]["link"] ).download_text(destination=None) random_minute = secrets.randbelow(59) random_hour = secrets.randbelow(12) new_data = re.sub( r'cron: "\d+\s\d+\s(\*\s\*\s\*)"', r'cron: "{0} {1} \1"'.format(random_minute, random_hour), data, ) with open(scheduled_file, "w", encoding="utf-8") as file_stream: file_stream.write(new_data) logging.info("Updated: %r", scheduled_file)
def start(self, max_workers: Optional[int] = None): """ Starts the generation of the dataset file. """ raw_data = DownloadHelper( self.UPSTREAM_LINK).download_text().split("\n") with concurrent.futures.ThreadPoolExecutor( max_workers=max_workers) as executor: for result in executor.map(self.parse_line, raw_data): for extension, suffixes in result.items(): if extension not in self.database: self.database[extension] = suffixes else: self.database[extension].extend(suffixes) PyFunceble.facility.Logger.debug( "Got: extension: %r ; suffixes: %r.", extension, suffixes) for extension, suffixes in self.database.items(): self.database[extension] = (ListHelper( suffixes).remove_duplicates().remove_empty().sort().subject) DictHelper(self.database).to_json_file(self.destination) return self
def start(self, max_workers: Optional[int] = None) -> "IanaDBGenerator": """ Starts the generation of the dataset file. :param max_workers: The maximal number of workers we are allowed to use. """ raw_data = ( DownloadHelper(self.UPSTREAM_LINK) .download_text() .split('<span class="domain tld">') ) with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: for extension, whois_server in executor.map( self.get_extension_and_referrer_from_block, raw_data ): if extension: self.database[extension] = whois_server PyFunceble.facility.Logger.debug( "Got: extension: %r ; whois server: %r", extension, whois_server ) DictHelper(self.database).to_json_file(self.destination) return self
def start(self) -> "UpdaterBase": """ Starts the update process. """ if self.administration.raw_link: logging.info("Started to download: %r", self.administration.raw_link) DownloadHelper(self.administration.raw_link).download_text( destination=self.download_temp_file.name) logging.info("Finished to download: %r", self.administration.raw_link) self.download_temp_file.seek(0) logging.info("Started comparison of: %r", self.final_destination) kept, removed, new = self.produce_diff() logging.info("Finished comparison of: %r", self.final_destination) to_write = kept.copy() to_write.update(new) try: # Safety. to_write.remove(None) except KeyError: pass try: # Safety. to_write.remove("") except KeyError: pass logging.info("Started to update: %r", self.final_destination) FileHelper(self.final_destination).write("\n".join(sorted(to_write)) + "\n", overwrite=True) logging.info("Finished to update: %r", self.final_destination) if removed: logging.info( "Started to write our temporary whitelist list into: %r", self.whitelist_list.name, ) FileHelper(self.whitelist_list.name).write("\n".join(removed) + "\n", overwrite=True) self.whitelist_list.seek(0) logging.info( "Finished to write our temporary whitelist list into: %r", self.whitelist_list.name, ) self.remove_removed()
def start(self) -> "GHAWorkflowsUpdater": DownloadHelper( dead_hosts.launcher.defaults.links.GHA_MAIN_WORKFLOW["link"] ).download_text(destination=os.path.join( self.info_manager.GHA_WORKFLOWS_DIR, dead_hosts.launcher.defaults.links. GHA_MAIN_WORKFLOW["destination"], )) DownloadHelper( dead_hosts.launcher.defaults.links.GHA_SCHEDULER_WORKFLOW["link"] ).download_text(destination=os.path.join( self.info_manager.GHA_WORKFLOWS_DIR, dead_hosts.launcher.defaults.links. GHA_SCHEDULER_WORKFLOW["destination"], )) return self
def test_download_text(self, session_patch: unittest.mock.MagicMock) -> None: """ Tests the method which let us set download the text of a given url. """ given = "https://exmaple.org" download_helper = DownloadHelper(given) session_patch.return_value.text = "Hello, World!" session_patch.return_value.status_code = 200 expected = "Hello, World!" actual = download_helper.download_text() self.assertEqual(expected, actual)
def fetch_file_to_test(self) -> "Orchestration": """ Provides the latest version of the file to test. """ if self.authorization_handler.is_refresh_authorized(): logging.info( "We are authorized to refresh the lists! Let's do that.") logging.info("Raw Link: %r", self.info_manager.raw_link) if self.info_manager.raw_link: DownloadHelper(self.info_manager.raw_link).download_text( destination=self.origin_file.path) logging.info( "Could get the new version of the list. Updating the download time." ) self.info_manager["last_download_datetime"] = datetime.utcnow() self.info_manager[ "last_download_timestamp"] = self.info_manager[ "last_download_datetime"].timestamp() elif self.origin_file.exists(): logging.info( "Raw link not given or is empty. Let's work with %r.", self.origin_file.path, ) self.origin_file.read() logging.info("Emptying the download time.") self.info_manager[ "last_download_datetime"] = datetime.fromtimestamp(0) self.info_manager[ "last_download_timestamp"] = self.info_manager[ "last_download_datetime"].timestamp() else: logging.info(f"Could not find {self.origin_file.path}. " "Generating empty content to test.") self.origin_file.write("# No content yet.", overwrite=True) logging.info("Emptying the download time.") self.info_manager[ "last_download_datetime"] = datetime.fromtimestamp(0) self.info_manager[ "last_download_timestamp"] = self.info_manager[ "last_download_datetime"].timestamp() logging.info("Updated %r.", self.origin_file.path) return self
def test_download_text_response_not_ok( self, session_patch: unittest.mock.MagicMock) -> None: """ Tests the method which let us set download the text of a given url for the case that the response is not ok. """ destination = tempfile.NamedTemporaryFile(delete=False) given = "https://exmaple.org" download_helper = DownloadHelper(given) session_patch.return_value.status_code = 500 self.assertRaises( PyFunceble.helpers.exceptions.UnableToDownload, lambda: download_helper.download_text(destination=destination.name ), )
def get_upstream_version() -> Box: """ Provides the state of the upstream version. """ return Box( DictHelper().from_yaml( DownloadHelper( InternalUrlConverter(PyFunceble.cli.storage.VERSION_DUMP_LINK). get_converted()).download_text()), frozen_box=True, )
def test_download_text_to_file( self, session_patch: unittest.mock.MagicMock) -> None: """ Tests the method which let us set download the text of a given url for the case that we want the response into a file. """ destination = tempfile.NamedTemporaryFile(delete=False) given = "https://exmaple.org" download_helper = DownloadHelper(given) session_patch.return_value.text = "Hello, World!" session_patch.return_value.status_code = 200 download_helper.download_text(destination=destination.name) destination.seek(0) expected = b"Hello, World!" actual = destination.read() self.assertEqual(expected, actual)
def download_file(file: str, destination: str) -> bool: """ Downloads the given file (if it's an URL). :param file: The file to download. :param destination. The file to write. :return: A boolean which represent the action state. """ if URLSyntaxChecker(file).is_valid(): DownloadHelper(file).download_text(destination=destination) return True return False
def start(self) -> None: """ Starts the download process. """ if self.authorized and self.is_last_download_expired(): if not hasattr(self, "destination") or not self.destination: raise PyFunceble.downloader.exceptions.NoDownloadDestinationGiven( ) if not hasattr(self, "download_link") or not self.download_link: raise PyFunceble.downloader.exceptions.NoDownloadLinkGiven() if DownloadHelper(self.download_link).download_text( destination=self.destination): self.set_current_downtime() self.save_all_downtimes()
def test_set_certificate_validation(self) -> None: """ Tests the method which let us authorize the certificate validation. """ given = True expected = True download_helper = DownloadHelper() download_helper.set_certificate_validation(given) actual = download_helper.certificate_validation self.assertEqual(expected, actual) download_helper = DownloadHelper(certificate_validation=given) actual = download_helper.certificate_validation self.assertEqual(expected, actual)
def test_set_url(self) -> None: """ Tests the method which let us set the url to work with. """ given = "https://example.org" expected = "https://example.org" download_helper = DownloadHelper() download_helper.set_url(given) actual = download_helper.url self.assertEqual(expected, actual) download_helper = DownloadHelper(given) actual = download_helper.url self.assertEqual(expected, actual)
def test_set_retries(self) -> None: """ Tests the method which let us set the number of retry to perform. """ given = 3 expected = 3 download_helper = DownloadHelper() download_helper.set_retries(given) actual = download_helper.retries self.assertEqual(expected, actual) download_helper = DownloadHelper(retries=given) actual = download_helper.retries self.assertEqual(expected, actual)
def fetch_data(repo_name: str, info_dir: str) -> Tuple[str]: """ Fetches the data of the given input source. """ logging.info("Let's fetch the data behind %r", repo_name) url_base = hubgit.PARTIAL_RAW_URL % repo_name info_url = url_base + "info.json" domain_url = url_base + "domains.list" clean_url = url_base + "clean.list" ip_url = url_base + "ip.list" whitelisted_url = url_base + "whitelisted.list" domain_found = False clean_found = False ip_found = False whitelisted_found = False ip_file_to_deliver = None domain_file_to_deliver = None download_info_file = os.path.join(info_dir, secrets.token_hex(8)) downloaded_ip_file = tempfile.NamedTemporaryFile("r", delete=False) downloaded_domain_file = tempfile.NamedTemporaryFile("r", delete=False) downloaded_clean_file = tempfile.NamedTemporaryFile("r", delete=False) downloaded_whitelisted_file = tempfile.NamedTemporaryFile("r", delete=False) output_ip_file = tempfile.NamedTemporaryFile("w", delete=False) output_domain_file = tempfile.NamedTemporaryFile("w", delete=False) try: logging.info( "[%r] Started to download %r into %r", repo_name, info_url, download_info_file, ) DownloadHelper(info_url).download_text(destination=download_info_file) logging.info( "[%r] Finished to download %r into %r", repo_name, info_url, download_info_file, ) except UnableToDownload: logging.critical( "[%r] Could not download %r into %r. Reason: Not found.", repo_name, info_url, download_info_file, ) try: logging.info( "[%r] Started to download %r into %r", repo_name, domain_url, downloaded_domain_file.name, ) DownloadHelper(domain_url).download_text( destination=downloaded_domain_file.name ) logging.info( "[%r] Finished to download %r into %r", repo_name, domain_url, downloaded_domain_file.name, ) domain_found = True except UnableToDownload: logging.critical( "[%r] Could not download %r into %r. Reason: Not found.", repo_name, domain_url, downloaded_domain_file.name, ) try: logging.info( "[%r] Started to download %r into %r", repo_name, clean_url, downloaded_clean_file.name, ) DownloadHelper(clean_url).download_text( destination=downloaded_clean_file.name ) logging.info( "[%r] Finished to download %r into %r", repo_name, clean_url, downloaded_clean_file.name, ) clean_found = True except UnableToDownload: logging.critical( "[%r] Could not download %r into %r. Reason: Not found.", repo_name, clean_url, downloaded_clean_file.name, ) try: logging.info( "[%r] Started to download %r into %r", repo_name, ip_url, downloaded_ip_file.name, ) DownloadHelper(ip_url).download_text(destination=downloaded_ip_file.name) logging.info( "[%r] Finished to download %r into %r", repo_name, ip_url, downloaded_ip_file.name, ) ip_found = True except UnableToDownload: logging.critical( "[%r] Could not download %r into %r. Reason: Not found.", repo_name, ip_url, downloaded_ip_file.name, ) try: logging.info( "[%r] Started to download %r into %r", repo_name, whitelisted_url, downloaded_whitelisted_file.name, ) DownloadHelper(whitelisted_url).download_text( destination=downloaded_whitelisted_file.name ) logging.info( "[%r] Finished to download %r into %r", repo_name, whitelisted_url, downloaded_whitelisted_file.name, ) whitelisted_found = True except UnableToDownload: logging.critical( "[%r] Could not download %r into %r. Reason: Not found.", repo_name, whitelisted_url, downloaded_whitelisted_file.name, ) downloaded_domain_file.seek(0) downloaded_clean_file.seek(0) downloaded_ip_file.seek(0) downloaded_whitelisted_file.seek(0) if whitelisted_found: domain_file_to_read = ( domain_file_to_deliver ) = downloaded_whitelisted_file.name elif clean_found: domain_file_to_read = domain_file_to_deliver = downloaded_clean_file.name elif domain_found: domain_file_to_read = domain_file_to_deliver = downloaded_domain_file.name else: domain_file_to_read = domain_file_to_deliver = None if ip_found: ip_file_to_read = ip_file_to_deliver = downloaded_ip_file.name else: ip_file_to_read = ip_file_to_deliver = None logging.info( "[%r] Using %r as (domain) file to read and deliver.", repo_name, domain_file_to_read, ) logging.info( "[%r] Using %r as (ip) file to read and deliver.", repo_name, domain_file_to_read, ) if domain_file_to_read: logging.info( "[%r] Starting to whitelist content of %r", repo_name, domain_file_to_read, ) WhitelistCore( output_file=domain_file_to_read, use_official=True, ).filter(file=domain_file_to_read, already_formatted=True) logging.info( "[%r] Finished to whitelist content of %r", repo_name, domain_file_to_read, ) logging.info( "[%r] Starting to filter content of %r", repo_name, domain_file_to_read ) with open(domain_file_to_read, "r", encoding="utf-8") as file_stream: for line in file_stream: if not line.strip(): continue if DomainSyntaxChecker(line.strip()).is_valid(): output_domain_file.write(line) elif IPSyntaxChecker(line.strip()).is_valid(): output_ip_file.write(line) logging.info( "[%r] Finished to filter content of %r", repo_name, domain_file_to_read ) if ip_file_to_read: logging.info( "[%r] Starting to whitelist content of %r", repo_name, ip_file_to_read ) WhitelistCore( output_file=ip_file_to_read, use_official=True, ).filter(file=ip_file_to_read, already_formatted=True) logging.info( "[%r] Finished to whitelist content of %r", repo_name, ip_file_to_read ) logging.info( "[%r] Starting to filter content of %r", repo_name, ip_file_to_read ) with open(ip_file_to_read, "r", encoding="utf-8") as file_stream: for line in file_stream: if not line.strip(): continue if DomainSyntaxChecker(line.strip()).is_valid(): output_domain_file.write(line) elif IPSyntaxChecker(line.strip()).is_valid(): output_ip_file.write(line) logging.info( "[%r] Finished to filter content of %r", repo_name, ip_file_to_read ) downloaded_ip_file.close() downloaded_domain_file.close() downloaded_clean_file.close() downloaded_whitelisted_file.close() if downloaded_ip_file.name != ip_file_to_deliver: FileHelper(downloaded_ip_file.name).delete() if downloaded_domain_file.name != domain_file_to_deliver: FileHelper(downloaded_domain_file.name).delete() if downloaded_whitelisted_file.name != domain_file_to_deliver: FileHelper(downloaded_whitelisted_file.name).delete() if downloaded_clean_file.name != domain_file_to_deliver: FileHelper(downloaded_clean_file.name).delete() output_domain_file.seek(0) output_ip_file.seek(0) output_domain_file.seek(0) output_ip_file.seek(0) return output_domain_file.name, output_ip_file.name
def start(self) -> "OfficialPyFuncebleLicenseUpdater": DownloadHelper(dead_hosts.launcher.defaults.links. OFFICIAL_PYFUNCEBLE_LICENSE["link"]).download_text( destination=self.destination) return self
SOFTWARE. """ import os from datetime import datetime from typing import List from PyFunceble.helpers.download import DownloadHelper from .hubgit import IGNORE_REPO_RAW_URL CURRENT_DATETIME: datetime = datetime.utcnow() REPOSITORIES_TO_IGNORE: List[str] = [ x.strip() for x in DownloadHelper(IGNORE_REPO_RAW_URL).download_text().splitlines() if x and not x.strip().startswith("#") ] for index, line in enumerate(REPOSITORIES_TO_IGNORE): if "#" in line: line = line[:line.find("#")].strip() REPOSITORIES_TO_IGNORE[index] = line if "GITHUB_RUN_NUMBER" in os.environ: VERSION: str = (f"V2.{os.environ['GITHUB_RUN_NUMBER']}." f"{CURRENT_DATETIME.strftime('%Y')}." f"{CURRENT_DATETIME.strftime('%m')}." f"{CURRENT_DATETIME.strftime('%d')}") else:
def start(self) -> "OurRequirementsUpdater": DownloadHelper( dead_hosts.launcher.defaults.links.OUR_REQUIREMENTS["link"] ).download_text(destination=self.destination) return self
def start(self) -> "OurLicenseUpdater": DownloadHelper(dead_hosts.launcher.defaults.links.OUR_LICENSE["link"] ).download_text(destination=self.destination) return self