def save_to_env_file(cls, envs, env_file_location): """ Save the given dict of environment variable into our environment file. :param dict envs: A dict of environment variables to save. :param str env_file_location: The location of the file we have to update. """ file_instance = File(env_file_location) try: content = file_instance.read() except FileNotFoundError: content = "" if content: for environment_variable, value in envs.items(): to_write = "{0}={1}".format(environment_variable, value) regex = r"{0}=.*".format(environment_variable) if Regex(content, regex, return_data=False).match(): content = Regex(content, regex, replace_with=to_write).replace() else: if not content.endswith("\n"): content += "\n{0}\n".format(to_write) else: content += "{0}\n".format(to_write) else: for environment_variable, value in envs.items(): to_write = "{0}={1}".format(environment_variable, value) content += "{0}\n".format(to_write) file_instance.write(content, overwrite=True)
def _special_blogspot(self): """ Handle the blogspot SPECIAL case. """ # We initate a variable whith a regex which will match all blogpost domain. regex_blogspot = ".blogspot." # We iniate a list of elements in the HTML which will tell us more about # the status of the domain. regex_blogger = ["create-blog.g?", "87065", "doesn’t exist"] if PyFunceble.INTERN["to_test_type"] == "domain": # The element we are testing is a domain. # We construct the url to get. url_to_get = "http://%s" % self.tested elif PyFunceble.INTERN["to_test_type"] == "url": # The element we are testing is a URL. # We construct the url to get. url_to_get = self.tested else: raise Exception("Unknow test type.") if Regex(self.tested, regex_blogspot, return_data=False, escape=True).match(): # The element we are testing is a blogspot subdomain. # We get the HTML of the home page. blogger_content_request = requests.get(url_to_get, headers=self.headers) for regx in regex_blogger: # We loop through the list of regex to match. if (regx in blogger_content_request.text or Regex( blogger_content_request.text, regx, return_data=False, escape=False, ).match()): # The content match the currently read regex. # We update the source. self.source = "SPECIAL" # We update the domain status. self.domain_status = PyFunceble.STATUS["official"]["down"] # We update the output file. self.output = (self.output_parent_dir + PyFunceble.OUTPUTS["splited"]["directory"] + self.domain_status) # And we break the loop as we matched something. break
def __extract_from_record(self): # pragma: no cover """ Extract the expiration date from the whois record. """ if self.whois_record: # The whois record is not empty. for string in self.expiration_patterns: # We loop through the list of regex. # We try tro extract the expiration date from the WHOIS record. expiration_date = Regex( self.whois_record, string, return_data=True, rematch=True, group=0 ).match() if expiration_date: # The expiration date could be extracted. # We get the extracted expiration date. self.expiration_date = expiration_date[0].strip() # We initate a regex which will help us know if a number # is present into the extracted expiration date. regex_rumbers = r"[0-9]" if Regex( self.expiration_date, regex_rumbers, return_data=False ).match(): # The extracted expiration date has a number. # We format the extracted expiration date. self.expiration_date = self._format() if ( self.expiration_date and not Regex( self.expiration_date, r"[0-9]{2}\-[a-z]{3}\-2[0-9]{3}", return_data=False, ).match() ): # The formatted expiration date does not match our unified format. # We log the problem. Logs().expiration_date(self.subject, self.expiration_date) # We save the whois record into the database. self.whois_db.add( self.subject, self.expiration_date, self.whois_record )
def file(self): """ Manage the case that need to test each domain of a given file path. Note: 1 domain per line. """ list_to_test = self._extract_domain_from_file() AutoContinue().restore() if PyFunceble.CONFIGURATION["adblock"]: list_to_test = self.adblock_decode(list_to_test) else: list_to_test = list(map(self._format_domain, list_to_test)) PyFunceble.Clean(list_to_test) if PyFunceble.CONFIGURATION["inactive_database"]: Database().to_test() if PyFunceble.CONFIGURATION[ "file_to_test"] in PyFunceble.CONFIGURATION[ "inactive_db"] and "to_test" in PyFunceble.CONFIGURATION[ "inactive_db"][PyFunceble.CONFIGURATION[ "file_to_test"]] and PyFunceble.CONFIGURATION[ "inactive_db"][PyFunceble.CONFIGURATION[ "file_to_test"]]["to_test"]: list_to_test.extend(PyFunceble.CONFIGURATION["inactive_db"][ PyFunceble.CONFIGURATION["file_to_test"]]["to_test"]) regex_delete = r"localhost$|localdomain$|local$|broadcasthost$|0\.0\.0\.0$|allhosts$|allnodes$|allrouters$|localnet$|loopback$|mcastprefix$" # pylint: disable=line-too-long list_to_test = List( Regex(list_to_test, regex_delete).not_matching_list()).format() if PyFunceble.CONFIGURATION["filter"]: list_to_test = List( Regex(list_to_test, PyFunceble.CONFIGURATION["filter"], escape=True).matching_list()).format() list( map( self.domain, list_to_test[PyFunceble. CONFIGURATION["counter"]["number"]["tested"]:], repeat(list_to_test[-1]), ))
def _get_extension_and_referer_from_block(cls, block): """ Extract the extention from the given HTML block. Plus get its referer. :param str block: An HTML block. """ # We extract the different extension from the currently readed line. regex_valid_extension = r"(/domains/root/db/)(.*)(\.html)" if "/domains/root/db/" in block: # The link is in the line. # We try to extract the extension. matched = Regex(block, regex_valid_extension, return_data=True, rematch=True).match()[1] if matched: # The extraction is not empty or None. # We get the referer. referer = cls._get_referer(matched) if not referer: referer = "whois.nic.{0}".format(matched) if cls._check_referer(matched, referer): return matched, referer, True return matched, referer, False return None, None, None
def is_url_valid(cls, url=None): """ Check if the domain of the given URL is valid. Argument: - url: str The url to test. Returns: bool - True: is valid. - False: is invalid. """ if url: to_test = url else: to_test = PyFunceble.CONFIGURATION["URL"] if to_test.startswith("http"): regex = r"((http:\/\/|https:\/\/)(.+?(?=\/)|.+?$))" domain = Regex(to_test, regex, return_data=True, rematch=True).match()[2] domain_status = ExpirationDate().is_domain_valid(domain) ip_status = ExpirationDate().is_domain_valid(domain) if domain_status or ip_status: return True return False
def _extensions(self): """ Extract the extention from the given block. Plus get its referer. """ upstream_lines = (Download( self.iana_url, return_data=True).text().split('<span class="domain tld">')) # We extract the different extension from the currently readed line. regex_valid_extension = r"(/domains/root/db/)(.*)(\.html)" for block in upstream_lines: if "/domains/root/db/" in block: # The link is in the line. # We try to extract the extension. matched = Regex(block, regex_valid_extension, return_data=True, rematch=True).match()[1] if matched: # The extraction is not empty or None. # We get the referer. referer = self._referer(matched) # We yield the matched extension and its referer. yield (matched, referer)
def _is_to_ignore(cls, line): """ Check if we have to ignore the given line. :param str line: The line from the file. :return: The result of the check. :rtype: bool """ # We set the list of regex to match to be # considered as ignored. to_ignore = [r"(^!|^@@|^\/|^\[|^\.|^-|^_|^\?|^&)" ] # , r"(\$|,)(image)"] for element in to_ignore: # We loop through the list of regex. if Regex(line, element, return_data=False).match(): # The currently read line match the currently read # regex. # We return true, it has to be ignored. return True # Wer return False, it does not has to be ignored. return False
def _handle_options(self, options): """ Handle the data from the options. :param options: The list of options from the rule. :type options: list :return: The list of domains to return globally. :rtype: list """ # We initiate a variable which will save our result result = [] # We initiate the regex which will be used to extract the domain listed # under the option domain= regex_domain_option = r"domain=(.*)" for option in options: # We loop through the list of option. try: # We try to extract the list of domains from the currently read # option. domains = Regex(option, regex_domain_option, return_data=True, rematch=True, group=0).match()[-1] if domains: # We could extract something. if self.aggressive: # pragma: no cover result.extend( list( filter( lambda x: x and not x.startswith("~"), domains.split("|"), ))) else: # We return True. return True except TypeError: pass # We return the result. return result
def __blogspot(self): """ Handle the blogspot SPECIAL case. :return: :code:`(new status, new source)` or :code:`None` if there is any change to apply. :rtype: tuple|None """ # We iniate a list of elements in the HTML which will tell us more about # the status of the domain. regex_blogger = ["create-blog.g?", "87065", "doesn’t exist"] if self.subject_type in ["domain", "file_domain"]: # The element we are testing is a domain. # We construct the url to get. url_to_get = "http://%s" % self.subject elif self.subject_type in ["url", "file_url"]: # The element we are testing is a URL. # We construct the url to get. url_to_get = self.subject else: raise ValueError("Given subject type not registered.") try: # We get the HTML of the home page. blogger_content_request = PyFunceble.requests.get( url_to_get, headers=self.headers) for regx in regex_blogger: # We loop through the list of regex to match. if (regx in blogger_content_request.text or Regex( blogger_content_request.text, regx, return_data=False, escape=False, ).match()): # * The currently read regex is present into the docuement. # or # * Something in the document match the currently read regex. # We update the status and source. return self.__special_down() except ( PyFunceble.requests.exceptions.InvalidURL, PyFunceble.socket.timeout, PyFunceble.requests.exceptions.Timeout, PyFunceble.requests.ConnectionError, urllib3_exceptions.InvalidHeader, UnicodeDecodeError, # The probability that this happend in production is minimal. ): pass # We return None, there is no changes. return None
def test_replace(self): """ This method will test Regex.replace(). """ regex = "th" expected = "Hello, htis is Fun Ilrys. I just wanted to know how htings goes around hte tests." # pylint: disable=line-too-long actual = Regex(self.data, regex, replace_with="ht").replace() self.assertEqual(expected, actual) # Test of the case that there is not replace_with regex = "th" expected = self.data actual = Regex(self.data, regex).replace() self.assertEqual(expected, actual)
def test_match_get_group(self): """ Tests the matching method for the case that we want a specific group. """ regex = "e" expected = "e" actual = Regex(regex).match(self.data, group=0) self.assertEqual(expected, actual) regex = r"([a-z]{1,})\s([a-z]{1,})\s" expected = "this" actual = Regex(regex).match(self.data, group=1) self.assertEqual(expected, actual)
def test_replace(self): """ Test Regex.replace(). """ regex = "th" expected = "Hello, htis is Fun Ilrys. I just wanted to know how htings goes around hte tests." # pylint: disable=line-too-long actual = Regex(self.data, regex, replace_with="ht").replace() self.assertEqual(expected, actual)
def test_not_matching_list(self): """ Test Regex.not_matching_list(). """ regex = "fun" expected = ["hello", "world", "PyFunceble"] actual = Regex(self.data_list, regex).not_matching_list() self.assertEqual(expected, actual)
def test_replace(self): """ Tests the replacement method. """ regex = "th" expected = "Hello, htis is Fun Ilrys. I just wanted to know how htings goes around hte tests." # pylint: disable=line-too-long actual = Regex(regex).replace_match(self.data, "ht") self.assertEqual(expected, actual)
def test_matching_list(self): """ Test Regex.match_list(). """ regex = "fun" expected = ["funilrys", "funceble", "pyfunceble"] actual = Regex(self.data_list, regex).matching_list() self.assertEqual(expected, actual)
def test_match(self): """ This method will test Regex.match() for the case that we want a specific group. """ # Test of the rematch case. regex = r"([a-z]{1,})\s([a-z]{1,})\s" expected = "is" actual = Regex(self.data, regex, rematch=True, group=1).match() self.assertEqual(expected, actual) # Test of the group case regex = "e" expected = "e" actual = Regex(self.data, regex, group=0).match() self.assertEqual(expected, actual)
def __blogspot(self): """ Handle the blogspot SPECIAL case. :return: :code:`(new status, new source)` or :code:`None` if there is any change to apply. :rtype: tuple|None """ # We iniate a list of elements in the HTML which will tell us more about # the status of the domain. regex_blogger = ["create-blog.g?", "87065", "doesn’t exist"] if PyFunceble.INTERN["to_test_type"] == "domain": # The element we are testing is a domain. # We construct the url to get. url_to_get = "http://%s" % PyFunceble.INTERN["to_test"] elif PyFunceble.INTERN["to_test_type"] == "url": # The element we are testing is a URL. # We construct the url to get. url_to_get = PyFunceble.INTERN["to_test"] else: raise NotImplementedError( "to_test_type not implemented: `{}`".format( PyFunceble.INTERN["to_test_type"] ) ) # We get the HTML of the home page. blogger_content_request = requests.get(url_to_get, headers=self.headers) for regx in regex_blogger: # We loop through the list of regex to match. if ( regx in blogger_content_request.text or Regex( blogger_content_request.text, regx, return_data=False, escape=False, ).match() ): # * The currently read regex is present into the docuement. # or # * Something in the document match the currently read regex. # We update the status and source. return self.__special_down() # We return None, there is no changes. return None
def __handle_potentially_inactive(self, previous_state): """ Handle the potentially inactive case. :param previous_state: The previously catched status. :type previous_state: str :return: :code:`(new status, new source)` or :code:`None` if there is any change to apply. :rtype: tuple|None """ if ( PyFunceble.HTTP_CODE["active"] and PyFunceble.INTERN["http_code"] in PyFunceble.HTTP_CODE["list"]["potentially_down"] ): # * The http status request is activated. # and # * The extracted http status code is in the list of # potentially down list. # We generate the analytics files. Generate(domain_status=previous_state).analytic_file("potentially_down") if not PyFunceble.CONFIGURATION["no_special"]: # We are authorized to play with the SPEICIAL rules. for regx in self.regexes_active_to_inactive_potentially_down: # We loop through the list of available regex. if Regex( data=PyFunceble.INTERN["to_test"], regex=regx, return_data=False, escape=False, ).match(): # The element we are currently testing match the # regex we are currently reading. # We get the output of the function associated # with the regex. output = self.regexes_active_to_inactive_potentially_down[ regx ]() if output is not None: # The output is not None. # We return the new source and state. return output # We return None, there is no changes. return None
def test_replace_no_replace_with(self): """ Test Regex.replace() for the case than no replace with is given. """ regex = "th" expected = self.data actual = Regex(self.data, regex).replace() self.assertEqual(expected, actual)
def test_match_group(self): """ Test Regex.match() for the case that we want a specific group. """ regex = "e" expected = "e" actual = Regex(self.data, regex, group=0).match() self.assertEqual(expected, actual)
def test_match_rematch(self): """ Test Regex.match() for the case that we want to rematch the different groups. """ regex = r"([a-z]{1,})\s([a-z]{1,})\s" expected = "is" actual = Regex(self.data, regex, rematch=True, group=1).match() self.assertEqual(expected, actual)
def test_replace_no_replacement(self): """ Tests the replacement method for the case that we replacement is not given. """ regex = "th" expected = self.data actual = Regex(regex).replace_match(self.data, None) self.assertEqual(expected, actual)
def test_matching_list(self): """ Tests the method which let us get a list of matchint string from a given list of string. """ regex = "fun" expected = ["funilrys", "funceble", "pyfunceble"] actual = Regex(regex).get_matching_list(self.data_list) self.assertEqual(expected, actual)
def test_not_matching_list(self): """ Tests the method which let us get a list of non matching strin from a given list of string. """ regex = "fun" expected = ["hello", "world", "PyFunceble"] actual = Regex(regex).get_not_matching_list(self.data_list) self.assertEqual(expected, actual)
def bypass(cls): """ Exit the script if `[PyFunceble skip]` is matched into the latest commit message. """ regex_bypass = r"\[PyFunceble\sskip\]" if PyFunceble.CONFIGURATION["travis"] and Regex( Command("git log -1").execute(), regex_bypass, return_data=False).match(): AutoSave(True, is_bypass=True)
def restore(self): """ Restore the 'output/' directory structure based on the `dir_structure.json` file. """ structure = self._get_structure() list_of_key = list(structure.keys()) structure = structure[list_of_key[0]] parent_path = list_of_key[0] + directory_separator for directory in structure: base = self.base + parent_path + directory + directory_separator self._create_directory(base) for file in structure[directory]: file_path = base + file content_to_write = structure[directory][file]["content"] online_sha = structure[directory][file]["sha512"] content_to_write = Regex(content_to_write, "@@@", escape=True, replace_with="\\n").replace() git_to_keep = file_path.replace("gitignore", "keep") keep_to_git = file_path.replace("keep", "gitignore") if self._restore_replace(): if path.isfile(file_path) and Hash( file_path, "sha512", True).get() == online_sha: rename(file_path, git_to_keep) write = False else: File(file_path).delete() file_path = git_to_keep write = True else: if path.isfile(keep_to_git) and Hash( file_path, "sha512", True).get() == online_sha: rename(file_path, keep_to_git) write = False else: File(keep_to_git).delete() file_path = keep_to_git write = True if write: File(file_path).write(content_to_write + "\n", True)
def up_status_file(self): """ Logic behind the up status when generating the status file. """ if not self.expiration_date: self.expiration_date = "Unknown" if PyFunceble.HTTP_CODE["active"] and PyFunceble.CONFIGURATION[ "http_code" ] in PyFunceble.HTTP_CODE[ "list" ][ "potentially_down" ]: self._analytic_file("potentially_down", self.domain_status) regex_to_match = [ ".canalblog.com", ".doubleclick.net", ".liveadvert.com", ".skyrock.com", ".tumblr.com", ] for regx in regex_to_match: if Regex(self.tested, regx, return_data=False, escape=True).match(): self.source = "SPECIAL" self.domain_status = PyFunceble.STATUS["official"]["down"] self.output = self.output_parent_dir + PyFunceble.OUTPUTS[ "splited" ][ "directory" ] + self.domain_status self.special_blogspot() elif PyFunceble.HTTP_CODE["active"] and PyFunceble.CONFIGURATION[ "http_code" ] in PyFunceble.HTTP_CODE[ "list" ][ "potentially_up" ]: self.special_blogspot() self.special_wordpress_com() if self.source != "SPECIAL": self.domain_status = PyFunceble.STATUS["official"]["up"] self.output = self.output_parent_dir + PyFunceble.OUTPUTS["splited"][ "directory" ] + self.domain_status
def adblock_decode(self, list_to_test): """ Convert the adblock format into a readable format which is understood by the system. Argument: - list_to_test: list The read content of the given file. Returns: list The list of domain to test. """ result = [] regex = r"^(?:.*\|\|)([^\/\$\^]{1,}).*$" regex_v2 = r"(.*\..*)(?:#{1,}.*)" for line in list_to_test: rematch = Regex(line, regex, return_data=True, rematch=True, group=0).match() rematch_v2 = Regex(line, regex_v2, return_data=True, rematch=True, group=0).match() if rematch: result.extend(rematch) if rematch_v2: result.extend( List(self._format_adblock_decoded(rematch_v2)).format()) return result
def special_blogspot(self): """ Handle the blogspot SPECIAL case. """ regex_blogspot = ".blogspot." regex_blogger = ["create-blog.g?", "87065", "doesn’t exist"] if Regex(self.tested, regex_blogspot, return_data=False, escape=True).match(): blogger_content_request = requests.get("http://%s:80" % self.tested) for regx in regex_blogger: if regx in blogger_content_request.text or Regex( blogger_content_request.text, regx, return_data=False, escape=False ).match(): self.source = "SPECIAL" self.domain_status = PyFunceble.STATUS["official"]["down"] self.output = self.output_parent_dir + PyFunceble.OUTPUTS[ "splited" ][ "directory" ] + self.domain_status break