def _get_structure(self): """ This method return the structure we are goinng to work with. """ structure_file = "" req = "" if path.isfile(self.structure): structure_file = self.structure elif path.isfile(self.base + "dir_structure_production.json"): structure_file = self.base + "dir_structure_production.json" else: if "dev" not in PyFunceble.VERSION: req = requests.get(PyFunceble.LINKS["dir_structure"]) else: req = requests.get(PyFunceble.LINKS["dir_structure"].replace( "master", "dev")) if structure_file.endswith("_production.json"): structure = Dict().from_json(File(structure_file).read()) return self._update_structure_from_config(structure) elif structure_file.endswith(".json"): return Dict().from_json(File(structure_file).read()) return self._update_structure_from_config(Dict().from_json(req.text))
def __wordpress_dot_com(self): """ Handle the wordpress.com SPECIAL case. :return: :code:`(new status, new source)` or :code:`None` if there is any change to apply. :rtype: tuple|None """ # We initiate a variable which whill have to be into the HTML # in order to be considered as inactive. does_not_exist = "doesn’t exist" # We get the content of the page. wordpress_com_content = requests.get( "http://%s:80" % PyFunceble.INTERN["to_test"], headers=self.headers ) if does_not_exist in wordpress_com_content.text: # The marker is into the page content. # We return the new status and source. return self.__special_down() # We return None, there is no changes. return None
def _special_wordpress_com(self): """ Handle the wordpress.com special case. """ # We initiate the domain to match. wordpress_com = ".wordpress.com" # We initiate a variable which whill have to be into the HTML # in order to be considered as inactive. does_not_exist = "doesn’t exist" if self.tested.endswith(wordpress_com): # The currently tested element ends with wordpress.com. # We get the content of the page. wordpress_com_content = requests.get("http://%s:80" % self.tested, headers=self.headers) if does_not_exist in wordpress_com_content.text: # The marker is into the page content. # We update the source. self.source = "SPECIAL" # We update the status. self.domain_status = PyFunceble.STATUS["official"]["down"] # We update the output file. self.output = (self.output_parent_dir + PyFunceble.OUTPUTS["splited"]["directory"] + self.domain_status)
def _special_blogspot(self): """ Handle the blogspot SPECIAL case. """ # We initate a variable whith a regex which will match all blogpost domain. regex_blogspot = ".blogspot." # We iniate a list of elements in the HTML which will tell us more about # the status of the domain. regex_blogger = ["create-blog.g?", "87065", "doesn’t exist"] if PyFunceble.INTERN["to_test_type"] == "domain": # The element we are testing is a domain. # We construct the url to get. url_to_get = "http://%s" % self.tested elif PyFunceble.INTERN["to_test_type"] == "url": # The element we are testing is a URL. # We construct the url to get. url_to_get = self.tested else: raise Exception("Unknow test type.") if Regex(self.tested, regex_blogspot, return_data=False, escape=True).match(): # The element we are testing is a blogspot subdomain. # We get the HTML of the home page. blogger_content_request = requests.get(url_to_get, headers=self.headers) for regx in regex_blogger: # We loop through the list of regex to match. if (regx in blogger_content_request.text or Regex( blogger_content_request.text, regx, return_data=False, escape=False, ).match()): # The content match the currently read regex. # We update the source. self.source = "SPECIAL" # We update the domain status. self.domain_status = PyFunceble.STATUS["official"]["down"] # We update the output file. self.output = (self.output_parent_dir + PyFunceble.OUTPUTS["splited"]["directory"] + self.domain_status) # And we break the loop as we matched something. break
def __blogspot(self): """ Handle the blogspot SPECIAL case. :return: :code:`(new status, new source)` or :code:`None` if there is any change to apply. :rtype: tuple|None """ # We iniate a list of elements in the HTML which will tell us more about # the status of the domain. regex_blogger = ["create-blog.g?", "87065", "doesn’t exist"] if PyFunceble.INTERN["to_test_type"] == "domain": # The element we are testing is a domain. # We construct the url to get. url_to_get = "http://%s" % PyFunceble.INTERN["to_test"] elif PyFunceble.INTERN["to_test_type"] == "url": # The element we are testing is a URL. # We construct the url to get. url_to_get = PyFunceble.INTERN["to_test"] else: raise NotImplementedError( "to_test_type not implemented: `{}`".format( PyFunceble.INTERN["to_test_type"] ) ) # We get the HTML of the home page. blogger_content_request = requests.get(url_to_get, headers=self.headers) for regx in regex_blogger: # We loop through the list of regex to match. if ( regx in blogger_content_request.text or Regex( blogger_content_request.text, regx, return_data=False, escape=False, ).match() ): # * The currently read regex is present into the docuement. # or # * Something in the document match the currently read regex. # We update the status and source. return self.__special_down() # We return None, there is no changes. return None
def text(self): """ This method download the given link and return its requests.text. """ req = requests.get(self.link) if req.status_code == 200: if self.return_data: return req.text File(self.destination).write(req.text, overwrite=True) return True raise Exception("Unable to download %s." % repr(self.link))
def special_wordpress_com(self): """ Handle the wordpress.com special case. """ wordpress_com = ".wordpress.com" does_not_exist = "doesn’t exist" if self.tested.endswith(wordpress_com): wordpress_com_content = requests.get("http://%s:80" % self.tested) if does_not_exist in wordpress_com_content.text: self.source = "SPECIAL" self.domain_status = PyFunceble.STATUS["official"]["down"] self.output = self.output_parent_dir + PyFunceble.OUTPUTS["splited"][ "directory" ] + self.domain_status
def text(self): """ Download the given link and return or save its :code:`requests.text` at the given destination. :rtype: mixed :raises: :code:`Exception` If the status code is not :code:`200`. """ try: # We request the link. req = requests.get(self.link, verify=self.verification) if req.status_code == 200: # The request http status code is equal to 200. if self.return_data: # We have to return the data. # We return the link content. return req.text # We save the link content to the parsed destination. File(self.destination).write(req.text, overwrite=True) # We return True. return True # The request http status code is not equal to 200. # We raise an exception saying that we were unable to download. raise Exception("Unable to download %s." % repr(self.link)) except requests.exceptions.ConnectionError: print(Fore.RED + "No Internet connection available." + Style.RESET_ALL) exit(1)
def special_blogspot(self): """ Handle the blogspot SPECIAL case. """ regex_blogspot = ".blogspot." regex_blogger = ["create-blog.g?", "87065", "doesn’t exist"] if Regex(self.tested, regex_blogspot, return_data=False, escape=True).match(): blogger_content_request = requests.get("http://%s:80" % self.tested) for regx in regex_blogger: if regx in blogger_content_request.text or Regex( blogger_content_request.text, regx, return_data=False, escape=False ).match(): self.source = "SPECIAL" self.domain_status = PyFunceble.STATUS["official"]["down"] self.output = self.output_parent_dir + PyFunceble.OUTPUTS[ "splited" ][ "directory" ] + self.domain_status break