Exemple #1
0
    def save_to_env_file(cls, envs, env_file_location):
        """
        Save the given dict of environment variable into our environment file.

        :param dict envs: A dict of environment variables to save.
        :param str env_file_location: The location of the file we have to update.
        """

        file_instance = File(env_file_location)

        try:
            content = file_instance.read()
        except FileNotFoundError:
            content = ""

        if content:
            for environment_variable, value in envs.items():
                to_write = "{0}={1}".format(environment_variable, value)

                regex = r"{0}=.*".format(environment_variable)

                if Regex(content, regex, return_data=False).match():
                    content = Regex(content, regex,
                                    replace_with=to_write).replace()
                else:
                    if not content.endswith("\n"):
                        content += "\n{0}\n".format(to_write)
                    else:
                        content += "{0}\n".format(to_write)
        else:
            for environment_variable, value in envs.items():
                to_write = "{0}={1}".format(environment_variable, value)
                content += "{0}\n".format(to_write)

        file_instance.write(content, overwrite=True)
Exemple #2
0
    def _special_blogspot(self):
        """
        Handle the blogspot SPECIAL case.
        """

        # We initate a variable whith a regex which will match all blogpost domain.
        regex_blogspot = ".blogspot."

        # We iniate a list of elements in the HTML which will tell us more about
        # the status of the domain.
        regex_blogger = ["create-blog.g?", "87065", "doesn’t exist"]

        if PyFunceble.INTERN["to_test_type"] == "domain":
            # The element we are testing is a domain.

            # We construct the url to get.
            url_to_get = "http://%s" % self.tested
        elif PyFunceble.INTERN["to_test_type"] == "url":
            # The element we are testing is a URL.

            # We construct the url to get.
            url_to_get = self.tested
        else:
            raise Exception("Unknow test type.")

        if Regex(self.tested, regex_blogspot, return_data=False,
                 escape=True).match():
            # The element we are testing is a blogspot subdomain.

            # We get the HTML of the home page.
            blogger_content_request = requests.get(url_to_get,
                                                   headers=self.headers)

            for regx in regex_blogger:
                # We loop through the list of regex to match.

                if (regx in blogger_content_request.text or Regex(
                        blogger_content_request.text,
                        regx,
                        return_data=False,
                        escape=False,
                ).match()):
                    # The content match the currently read regex.

                    # We update the source.
                    self.source = "SPECIAL"

                    # We update the domain status.
                    self.domain_status = PyFunceble.STATUS["official"]["down"]

                    # We update the output file.
                    self.output = (self.output_parent_dir +
                                   PyFunceble.OUTPUTS["splited"]["directory"] +
                                   self.domain_status)

                    # And we break the loop as we matched something.
                    break
    def __extract_from_record(self):  # pragma: no cover
        """
        Extract the expiration date from the whois record.
        """

        if self.whois_record:
            # The whois record is not empty.

            for string in self.expiration_patterns:
                # We loop through the list of regex.

                # We try tro extract the expiration date from the WHOIS record.
                expiration_date = Regex(
                    self.whois_record, string, return_data=True, rematch=True, group=0
                ).match()

                if expiration_date:
                    # The expiration date could be extracted.

                    # We get the extracted expiration date.
                    self.expiration_date = expiration_date[0].strip()

                    # We initate a regex which will help us know if a number
                    # is present into the extracted expiration date.
                    regex_rumbers = r"[0-9]"

                    if Regex(
                        self.expiration_date, regex_rumbers, return_data=False
                    ).match():
                        # The extracted expiration date has a number.

                        # We format the extracted expiration date.
                        self.expiration_date = self._format()

                        if (
                            self.expiration_date
                            and not Regex(
                                self.expiration_date,
                                r"[0-9]{2}\-[a-z]{3}\-2[0-9]{3}",
                                return_data=False,
                            ).match()
                        ):
                            # The formatted expiration date does not match our unified format.

                            # We log the problem.
                            Logs().expiration_date(self.subject, self.expiration_date)

                        # We save the whois record into the database.
                        self.whois_db.add(
                            self.subject, self.expiration_date, self.whois_record
                        )
Exemple #4
0
    def file(self):
        """
        Manage the case that need to test each domain of a given file path.
        Note: 1 domain per line.
        """

        list_to_test = self._extract_domain_from_file()

        AutoContinue().restore()

        if PyFunceble.CONFIGURATION["adblock"]:
            list_to_test = self.adblock_decode(list_to_test)
        else:
            list_to_test = list(map(self._format_domain, list_to_test))

        PyFunceble.Clean(list_to_test)

        if PyFunceble.CONFIGURATION["inactive_database"]:
            Database().to_test()

            if PyFunceble.CONFIGURATION[
                    "file_to_test"] in PyFunceble.CONFIGURATION[
                        "inactive_db"] and "to_test" in PyFunceble.CONFIGURATION[
                            "inactive_db"][PyFunceble.CONFIGURATION[
                                "file_to_test"]] and PyFunceble.CONFIGURATION[
                                    "inactive_db"][PyFunceble.CONFIGURATION[
                                        "file_to_test"]]["to_test"]:
                list_to_test.extend(PyFunceble.CONFIGURATION["inactive_db"][
                    PyFunceble.CONFIGURATION["file_to_test"]]["to_test"])

        regex_delete = r"localhost$|localdomain$|local$|broadcasthost$|0\.0\.0\.0$|allhosts$|allnodes$|allrouters$|localnet$|loopback$|mcastprefix$"  # pylint: disable=line-too-long

        list_to_test = List(
            Regex(list_to_test, regex_delete).not_matching_list()).format()

        if PyFunceble.CONFIGURATION["filter"]:
            list_to_test = List(
                Regex(list_to_test,
                      PyFunceble.CONFIGURATION["filter"],
                      escape=True).matching_list()).format()

        list(
            map(
                self.domain,
                list_to_test[PyFunceble.
                             CONFIGURATION["counter"]["number"]["tested"]:],
                repeat(list_to_test[-1]),
            ))
Exemple #5
0
    def _get_extension_and_referer_from_block(cls, block):
        """
        Extract the extention from the given HTML block.
        Plus get its referer.

        :param str block: An HTML block.
        """

        # We extract the different extension from the currently readed line.
        regex_valid_extension = r"(/domains/root/db/)(.*)(\.html)"

        if "/domains/root/db/" in block:
            # The link is in the line.

            # We try to extract the extension.
            matched = Regex(block,
                            regex_valid_extension,
                            return_data=True,
                            rematch=True).match()[1]

            if matched:
                # The extraction is not empty or None.

                # We get the referer.
                referer = cls._get_referer(matched)

                if not referer:
                    referer = "whois.nic.{0}".format(matched)

                if cls._check_referer(matched, referer):
                    return matched, referer, True

                return matched, referer, False

        return None, None, None
Exemple #6
0
    def is_url_valid(cls, url=None):
        """
        Check if the domain of the given URL is valid.

        Argument:
            - url: str
                The url to test.

        Returns: bool
            - True: is valid.
            - False: is invalid.
        """

        if url:
            to_test = url
        else:
            to_test = PyFunceble.CONFIGURATION["URL"]

        if to_test.startswith("http"):
            regex = r"((http:\/\/|https:\/\/)(.+?(?=\/)|.+?$))"
            domain = Regex(to_test, regex, return_data=True,
                           rematch=True).match()[2]

            domain_status = ExpirationDate().is_domain_valid(domain)
            ip_status = ExpirationDate().is_domain_valid(domain)

            if domain_status or ip_status:
                return True

        return False
Exemple #7
0
    def _extensions(self):
        """
        Extract the extention from the given block.
        Plus get its referer.
        """

        upstream_lines = (Download(
            self.iana_url,
            return_data=True).text().split('<span class="domain tld">'))

        # We extract the different extension from the currently readed line.
        regex_valid_extension = r"(/domains/root/db/)(.*)(\.html)"

        for block in upstream_lines:
            if "/domains/root/db/" in block:
                # The link is in the line.

                # We try to extract the extension.
                matched = Regex(block,
                                regex_valid_extension,
                                return_data=True,
                                rematch=True).match()[1]

                if matched:
                    # The extraction is not empty or None.

                    # We get the referer.
                    referer = self._referer(matched)

                    # We yield the matched extension and its referer.
                    yield (matched, referer)
Exemple #8
0
    def _is_to_ignore(cls, line):
        """
        Check if we have to ignore the given line.

        :param str line: The line from the file.


        :return: The result of the check.
        :rtype: bool
        """

        # We set the list of regex to match to be
        # considered as ignored.
        to_ignore = [r"(^!|^@@|^\/|^\[|^\.|^-|^_|^\?|^&)"
                     ]  # , r"(\$|,)(image)"]

        for element in to_ignore:
            # We loop through the list of regex.

            if Regex(line, element, return_data=False).match():
                # The currently read line match the currently read
                # regex.

                # We return true, it has to be ignored.
                return True

        # Wer return False, it does not has to be ignored.
        return False
Exemple #9
0
    def _handle_options(self, options):
        """
        Handle the data from the options.

        :param options: The list of options from the rule.
        :type options: list

        :return: The list of domains to return globally.
        :rtype: list
        """

        # We initiate a variable which will save our result
        result = []

        # We initiate the regex which will be used to extract the domain listed
        # under the option domain=
        regex_domain_option = r"domain=(.*)"

        for option in options:
            # We loop through the list of option.
            try:
                # We try to extract the list of domains from the currently read
                # option.
                domains = Regex(option,
                                regex_domain_option,
                                return_data=True,
                                rematch=True,
                                group=0).match()[-1]

                if domains:
                    # We could extract something.

                    if self.aggressive:  # pragma: no cover
                        result.extend(
                            list(
                                filter(
                                    lambda x: x and not x.startswith("~"),
                                    domains.split("|"),
                                )))
                    else:
                        # We return True.
                        return True
            except TypeError:
                pass

        # We return the result.
        return result
Exemple #10
0
    def __blogspot(self):
        """
        Handle the blogspot SPECIAL case.

        :return:
            :code:`(new status, new source)` or :code:`None` if there is any
            change to apply.
        :rtype: tuple|None
        """

        # We iniate a list of elements in the HTML which will tell us more about
        # the status of the domain.
        regex_blogger = ["create-blog.g?", "87065", "doesn&#8217;t&nbsp;exist"]

        if self.subject_type in ["domain", "file_domain"]:
            # The element we are testing is a domain.

            # We construct the url to get.
            url_to_get = "http://%s" % self.subject
        elif self.subject_type in ["url", "file_url"]:
            # The element we are testing is a URL.

            # We construct the url to get.
            url_to_get = self.subject
        else:
            raise ValueError("Given subject type not registered.")

        try:
            # We get the HTML of the home page.
            blogger_content_request = PyFunceble.requests.get(
                url_to_get, headers=self.headers)

            for regx in regex_blogger:
                # We loop through the list of regex to match.

                if (regx in blogger_content_request.text or Regex(
                        blogger_content_request.text,
                        regx,
                        return_data=False,
                        escape=False,
                ).match()):
                    # * The currently read regex is present into the docuement.
                    # or
                    # * Something in the document match the currently read regex.

                    # We update the status and source.
                    return self.__special_down()
        except (
                PyFunceble.requests.exceptions.InvalidURL,
                PyFunceble.socket.timeout,
                PyFunceble.requests.exceptions.Timeout,
                PyFunceble.requests.ConnectionError,
                urllib3_exceptions.InvalidHeader,
                UnicodeDecodeError,  # The probability that this happend in production is minimal.
        ):
            pass

        # We return None, there is no changes.
        return None
Exemple #11
0
    def test_replace(self):
        """
        This method will test Regex.replace().
        """

        regex = "th"
        expected = "Hello, htis is Fun Ilrys. I just wanted to know how htings goes around hte tests."  # pylint: disable=line-too-long
        actual = Regex(self.data, regex, replace_with="ht").replace()

        self.assertEqual(expected, actual)

        # Test of the case that there is not replace_with
        regex = "th"
        expected = self.data
        actual = Regex(self.data, regex).replace()

        self.assertEqual(expected, actual)
Exemple #12
0
    def test_match_get_group(self):
        """
        Tests the matching method for the case that we want
        a specific group.
        """

        regex = "e"
        expected = "e"
        actual = Regex(regex).match(self.data, group=0)

        self.assertEqual(expected, actual)

        regex = r"([a-z]{1,})\s([a-z]{1,})\s"
        expected = "this"
        actual = Regex(regex).match(self.data, group=1)

        self.assertEqual(expected, actual)
Exemple #13
0
    def test_replace(self):
        """
        Test Regex.replace().
        """

        regex = "th"
        expected = "Hello, htis is Fun Ilrys. I just wanted to know how htings goes around hte tests."  # pylint: disable=line-too-long
        actual = Regex(self.data, regex, replace_with="ht").replace()

        self.assertEqual(expected, actual)
Exemple #14
0
    def test_not_matching_list(self):
        """
        Test Regex.not_matching_list().
        """

        regex = "fun"
        expected = ["hello", "world", "PyFunceble"]
        actual = Regex(self.data_list, regex).not_matching_list()

        self.assertEqual(expected, actual)
Exemple #15
0
    def test_replace(self):
        """
        Tests the replacement method.
        """

        regex = "th"
        expected = "Hello, htis is Fun Ilrys. I just wanted to know how htings goes around hte tests."  # pylint: disable=line-too-long
        actual = Regex(regex).replace_match(self.data, "ht")

        self.assertEqual(expected, actual)
Exemple #16
0
    def test_matching_list(self):
        """
        Test Regex.match_list().
        """

        regex = "fun"
        expected = ["funilrys", "funceble", "pyfunceble"]
        actual = Regex(self.data_list, regex).matching_list()

        self.assertEqual(expected, actual)
Exemple #17
0
    def test_match(self):
        """
        This method will test Regex.match() for the case that we want a specific
        group.
        """

        # Test of the rematch case.
        regex = r"([a-z]{1,})\s([a-z]{1,})\s"
        expected = "is"
        actual = Regex(self.data, regex, rematch=True, group=1).match()

        self.assertEqual(expected, actual)

        # Test of the group case
        regex = "e"
        expected = "e"
        actual = Regex(self.data, regex, group=0).match()

        self.assertEqual(expected, actual)
Exemple #18
0
        def __blogspot(self):
            """
            Handle the blogspot SPECIAL case.

            :return:
                :code:`(new status, new source)` or :code:`None` if there is any
                change to apply.
            :rtype: tuple|None
            """

            # We iniate a list of elements in the HTML which will tell us more about
            # the status of the domain.
            regex_blogger = ["create-blog.g?", "87065", "doesn&#8217;t&nbsp;exist"]

            if PyFunceble.INTERN["to_test_type"] == "domain":
                # The element we are testing is a domain.

                # We construct the url to get.
                url_to_get = "http://%s" % PyFunceble.INTERN["to_test"]
            elif PyFunceble.INTERN["to_test_type"] == "url":
                # The element we are testing is a URL.

                # We construct the url to get.
                url_to_get = PyFunceble.INTERN["to_test"]
            else:
                raise NotImplementedError(
                    "to_test_type not implemented: `{}`".format(
                        PyFunceble.INTERN["to_test_type"]
                    )
                )

            # We get the HTML of the home page.
            blogger_content_request = requests.get(url_to_get, headers=self.headers)

            for regx in regex_blogger:
                # We loop through the list of regex to match.

                if (
                    regx in blogger_content_request.text
                    or Regex(
                        blogger_content_request.text,
                        regx,
                        return_data=False,
                        escape=False,
                    ).match()
                ):
                    # * The currently read regex is present into the docuement.
                    # or
                    # * Something in the document match the currently read regex.

                    # We update the status and source.
                    return self.__special_down()

            # We return None, there is no changes.
            return None
Exemple #19
0
        def __handle_potentially_inactive(self, previous_state):
            """
            Handle the potentially inactive case.

            :param previous_state: The previously catched status.
            :type previous_state: str

            :return:
                :code:`(new status, new source)` or :code:`None` if there is any
                change to apply.
            :rtype: tuple|None
            """

            if (
                PyFunceble.HTTP_CODE["active"]
                and PyFunceble.INTERN["http_code"]
                in PyFunceble.HTTP_CODE["list"]["potentially_down"]
            ):
                # * The http status request is activated.
                # and
                # * The extracted http status code is in the list of
                #   potentially down list.

                # We generate the analytics files.
                Generate(domain_status=previous_state).analytic_file("potentially_down")

                if not PyFunceble.CONFIGURATION["no_special"]:
                    # We are authorized to play with the SPEICIAL rules.

                    for regx in self.regexes_active_to_inactive_potentially_down:
                        # We loop through the list of available regex.

                        if Regex(
                            data=PyFunceble.INTERN["to_test"],
                            regex=regx,
                            return_data=False,
                            escape=False,
                        ).match():
                            # The element we are currently testing match the
                            # regex we are currently reading.

                            # We get the output of the function associated
                            # with the regex.
                            output = self.regexes_active_to_inactive_potentially_down[
                                regx
                            ]()

                            if output is not None:
                                # The output is not None.

                                # We return the new source and state.
                                return output

            # We return None, there is no changes.
            return None
Exemple #20
0
    def test_replace_no_replace_with(self):
        """
        Test Regex.replace() for the case than no replace
        with is given.
        """

        regex = "th"
        expected = self.data
        actual = Regex(self.data, regex).replace()

        self.assertEqual(expected, actual)
Exemple #21
0
    def test_match_group(self):
        """
        Test Regex.match() for the case that we want a specific
        group.
        """

        regex = "e"
        expected = "e"
        actual = Regex(self.data, regex, group=0).match()

        self.assertEqual(expected, actual)
Exemple #22
0
    def test_match_rematch(self):
        """
        Test Regex.match() for the case that we want to rematch the
        different groups.
        """

        regex = r"([a-z]{1,})\s([a-z]{1,})\s"
        expected = "is"
        actual = Regex(self.data, regex, rematch=True, group=1).match()

        self.assertEqual(expected, actual)
Exemple #23
0
    def test_replace_no_replacement(self):
        """
        Tests the replacement method for the case that we replacement
        is not given.
        """

        regex = "th"
        expected = self.data
        actual = Regex(regex).replace_match(self.data, None)

        self.assertEqual(expected, actual)
Exemple #24
0
    def test_matching_list(self):
        """
        Tests the method which let us get a list of
        matchint string from a given list of string.
        """

        regex = "fun"
        expected = ["funilrys", "funceble", "pyfunceble"]
        actual = Regex(regex).get_matching_list(self.data_list)

        self.assertEqual(expected, actual)
Exemple #25
0
    def test_not_matching_list(self):
        """
        Tests the method which let us get a list of non
        matching strin from a given list of string.
        """

        regex = "fun"
        expected = ["hello", "world", "PyFunceble"]
        actual = Regex(regex).get_not_matching_list(self.data_list)

        self.assertEqual(expected, actual)
Exemple #26
0
    def bypass(cls):
        """
        Exit the script if `[PyFunceble skip]` is matched into the latest
        commit message.
        """

        regex_bypass = r"\[PyFunceble\sskip\]"

        if PyFunceble.CONFIGURATION["travis"] and Regex(
                Command("git log -1").execute(), regex_bypass,
                return_data=False).match():

            AutoSave(True, is_bypass=True)
    def restore(self):
        """
        Restore the 'output/' directory structure based on the `dir_structure.json` file.
        """

        structure = self._get_structure()

        list_of_key = list(structure.keys())
        structure = structure[list_of_key[0]]
        parent_path = list_of_key[0] + directory_separator

        for directory in structure:
            base = self.base + parent_path + directory + directory_separator

            self._create_directory(base)

            for file in structure[directory]:
                file_path = base + file

                content_to_write = structure[directory][file]["content"]
                online_sha = structure[directory][file]["sha512"]

                content_to_write = Regex(content_to_write,
                                         "@@@",
                                         escape=True,
                                         replace_with="\\n").replace()

                git_to_keep = file_path.replace("gitignore", "keep")
                keep_to_git = file_path.replace("keep", "gitignore")

                if self._restore_replace():
                    if path.isfile(file_path) and Hash(
                            file_path, "sha512", True).get() == online_sha:
                        rename(file_path, git_to_keep)
                        write = False
                    else:
                        File(file_path).delete()
                        file_path = git_to_keep
                        write = True
                else:
                    if path.isfile(keep_to_git) and Hash(
                            file_path, "sha512", True).get() == online_sha:
                        rename(file_path, keep_to_git)
                        write = False
                    else:
                        File(keep_to_git).delete()
                        file_path = keep_to_git
                        write = True

                if write:
                    File(file_path).write(content_to_write + "\n", True)
Exemple #28
0
    def up_status_file(self):
        """
        Logic behind the up status when generating the status file.
        """

        if not self.expiration_date:
            self.expiration_date = "Unknown"

        if PyFunceble.HTTP_CODE["active"] and PyFunceble.CONFIGURATION[
            "http_code"
        ] in PyFunceble.HTTP_CODE[
            "list"
        ][
            "potentially_down"
        ]:
            self._analytic_file("potentially_down", self.domain_status)

            regex_to_match = [
                ".canalblog.com",
                ".doubleclick.net",
                ".liveadvert.com",
                ".skyrock.com",
                ".tumblr.com",
            ]

            for regx in regex_to_match:
                if Regex(self.tested, regx, return_data=False, escape=True).match():
                    self.source = "SPECIAL"
                    self.domain_status = PyFunceble.STATUS["official"]["down"]
                    self.output = self.output_parent_dir + PyFunceble.OUTPUTS[
                        "splited"
                    ][
                        "directory"
                    ] + self.domain_status

            self.special_blogspot()
        elif PyFunceble.HTTP_CODE["active"] and PyFunceble.CONFIGURATION[
            "http_code"
        ] in PyFunceble.HTTP_CODE[
            "list"
        ][
            "potentially_up"
        ]:
            self.special_blogspot()
            self.special_wordpress_com()

        if self.source != "SPECIAL":
            self.domain_status = PyFunceble.STATUS["official"]["up"]
            self.output = self.output_parent_dir + PyFunceble.OUTPUTS["splited"][
                "directory"
            ] + self.domain_status
Exemple #29
0
    def adblock_decode(self, list_to_test):
        """
        Convert the adblock format into a readable format which is understood
        by the system.

        Argument:
            - list_to_test: list
                The read content of the given file.

        Returns: list
            The list of domain to test.
        """

        result = []
        regex = r"^(?:.*\|\|)([^\/\$\^]{1,}).*$"
        regex_v2 = r"(.*\..*)(?:#{1,}.*)"

        for line in list_to_test:
            rematch = Regex(line,
                            regex,
                            return_data=True,
                            rematch=True,
                            group=0).match()

            rematch_v2 = Regex(line,
                               regex_v2,
                               return_data=True,
                               rematch=True,
                               group=0).match()

            if rematch:
                result.extend(rematch)

            if rematch_v2:
                result.extend(
                    List(self._format_adblock_decoded(rematch_v2)).format())

        return result
Exemple #30
0
    def special_blogspot(self):
        """
        Handle the blogspot SPECIAL case.
        """

        regex_blogspot = ".blogspot."
        regex_blogger = ["create-blog.g?", "87065", "doesn&#8217;t&nbsp;exist"]

        if Regex(self.tested, regex_blogspot, return_data=False, escape=True).match():
            blogger_content_request = requests.get("http://%s:80" % self.tested)

            for regx in regex_blogger:
                if regx in blogger_content_request.text or Regex(
                    blogger_content_request.text, regx, return_data=False, escape=False
                ).match():
                    self.source = "SPECIAL"
                    self.domain_status = PyFunceble.STATUS["official"]["down"]
                    self.output = self.output_parent_dir + PyFunceble.OUTPUTS[
                        "splited"
                    ][
                        "directory"
                    ] + self.domain_status
                    break