Esempio n. 1
0
    def test_standard_numeric_sorting(self):
        """
        Tests the standard numeric sorting.
        """

        given = [
            "1.example.com",
            "2.example.com",
            "3.example.com",
            "11.example.com",
            "12.example.com",
            "10.example.com",
        ]

        given_url = [f"http://{x}" for x in given]

        expected = [
            "1.example.com",
            "2.example.com",
            "3.example.com",
            "10.example.com",
            "11.example.com",
            "12.example.com",
        ]

        expected_url = [f"http://{x}" for x in expected]

        actual = List(given).custom_format(Sort.standard)

        self.assertEqual(expected, actual)

        actual_url = List(given_url).custom_format(Sort.standard)

        self.assertEqual(expected_url, actual_url)
Esempio n. 2
0
    def _json_print(self):  # pragma: no cover
        """
        Management of the json template.
        """

        if self.output:
            # The given output is not empty.

            if PyFunceble.path.isfile(self.output):
                # The given output already exist.

                # We get the content of the output.
                content = Dict().from_json(self.file_output_instance.read())

                if isinstance(content, list):
                    # The content is a list.

                    # We extend the content with our data to print.
                    content.extend(self.data_to_print)

                    # We format our list.
                    content = List(content).custom_format(Sort.standard)

                    if PyFunceble.CONFIGURATION["hierarchical_sorting"]:
                        # The hierarchical sorting is activated.

                        # We format our content hierarchicaly
                        content = List(content).custom_format(
                            Sort.hierarchical)

                    # We finally save our content into the file.
                    Dict(content).to_json(self.output)
                else:
                    # The content is not a list.

                    # We raise an exception.
                    raise Exception("Output not correctly formatted.")
            else:
                # The given output does not already exist.

                # We save our data to print into the output.
                #
                # Note: We do not have to take care if self.data_to_print is a list
                # formatted or not because this method should not be called if it is
                # not the case.
                Dict(self.data_to_print).to_json(self.output)
        else:
            # The given output is empty.

            # We raise an exception.
            raise Exception("Empty output given.")
Esempio n. 3
0
    def test_merge(self):
        """
        Test List().merge().
        """

        to_merge = ["hello", "world", 5, {"world": "hello"}]
        expected = ["hello", "world", 5, {"hello": "world", "world": "hello"}]

        actual = List(self.main_list).merge(to_merge)
        self.assertEqual(expected, actual)

        to_merge = ["hello", "world", 5, {"world": "hello"}]
        expected = [
            "hello",
            "world",
            5,
            {
                "hello": "world"
            },
            [1, 2, 3],
            {
                "world": "hello"
            },
        ]

        actual = List(self.main_list).merge(to_merge, False)
        self.assertEqual(expected, actual)

        to_merge = ["hello", "world", 5, {"hello": "you!"}, [1, 2, 4, 5]]
        expected = ["hello", "world", 5, {"hello": "you!"}, [1, 2, 4, 5]]

        actual = List(self.main_list).merge(to_merge)
        self.assertEqual(expected, actual)

        to_merge = ["hello", "world", 5, {"hello": "you!"}, [1, 2, 4, 5]]
        expected = [
            "hello",
            "world",
            5,
            {
                "hello": "world"
            },
            [1, 2, 3],
            {
                "hello": "you!"
            },
            [1, 2, 4, 5],
        ]

        actual = List(self.main_list).merge(to_merge, False)
        self.assertEqual(expected, actual)
Esempio n. 4
0
    def __sort_generated_files(cls):
        """
        Sort the content of all files we generated.
        """

        for root, _, files in PyFunceble.walk(
            PyFunceble.OUTPUT_DIRECTORY + PyFunceble.OUTPUTS["parent_directory"]
        ):
            # We loop through the list of directories of the output directory.

            for file in files:
                # We loop through the list of file of the
                # currently read directory.

                if file.endswith(".json"):
                    # The currently read filename ends
                    # with .json.

                    # We continue the loop.
                    continue

                if file in [".keep", ".gitignore"]:
                    # The currently read filename is
                    # into a list of filename that are not relevant
                    # for us.

                    # We continue the loop.
                    continue

                # We create an instance of our File().
                file_instance = File(
                    "{0}{1}{2}".format(root, PyFunceble.directory_separator, file)
                )
                # We get the content of the current file.
                file_content = file_instance.read().splitlines()

                if not PyFunceble.CONFIGURATION["hierarchical_sorting"]:
                    # We do not have to sort hierarchicaly.

                    # We sort the lines of the file standarly.
                    formatted = List(file_content[3:]).custom_format(Sort.standard)
                else:
                    # We do have to sort hierarchicaly.

                    # We sort the lines of the file hierarchicaly.
                    formatted = List(file_content[3:]).custom_format(Sort.hierarchical)

                # We finally put the formatted data in place.
                file_instance.write(
                    "\n".join(file_content[:3] + formatted), overwrite=True
                )
Esempio n. 5
0
    def file(self):
        """
        Manage the case that need to test each domain of a given file path.
        Note: 1 domain per line.
        """

        list_to_test = self._extract_domain_from_file()

        AutoContinue().restore()

        if PyFunceble.CONFIGURATION["adblock"]:
            list_to_test = self.adblock_decode(list_to_test)
        else:
            list_to_test = list(map(self._format_domain, list_to_test))

        PyFunceble.Clean(list_to_test)

        if PyFunceble.CONFIGURATION["inactive_database"]:
            Database().to_test()

            if PyFunceble.CONFIGURATION[
                    "file_to_test"] in PyFunceble.CONFIGURATION[
                        "inactive_db"] and "to_test" in PyFunceble.CONFIGURATION[
                            "inactive_db"][PyFunceble.CONFIGURATION[
                                "file_to_test"]] and PyFunceble.CONFIGURATION[
                                    "inactive_db"][PyFunceble.CONFIGURATION[
                                        "file_to_test"]]["to_test"]:
                list_to_test.extend(PyFunceble.CONFIGURATION["inactive_db"][
                    PyFunceble.CONFIGURATION["file_to_test"]]["to_test"])

        regex_delete = r"localhost$|localdomain$|local$|broadcasthost$|0\.0\.0\.0$|allhosts$|allnodes$|allrouters$|localnet$|loopback$|mcastprefix$"  # pylint: disable=line-too-long

        list_to_test = List(
            Regex(list_to_test, regex_delete).not_matching_list()).format()

        if PyFunceble.CONFIGURATION["filter"]:
            list_to_test = List(
                Regex(list_to_test,
                      PyFunceble.CONFIGURATION["filter"],
                      escape=True).matching_list()).format()

        list(
            map(
                self.domain,
                list_to_test[PyFunceble.
                             CONFIGURATION["counter"]["number"]["tested"]:],
                repeat(list_to_test[-1]),
            ))
Esempio n. 6
0
    def list_of_mined(cls):
        """
        Provide the list of mined so they can be added to the list
        queue.

        :return: The list of mined domains or URL.
        :rtype: list
        """

        # We initiate a variable which will return the result.
        result = []

        if PyFunceble.CONFIGURATION["mining"]:
            # The mining is activated.

            if PyFunceble.INTERN["file_to_test"] in PyFunceble.INTERN["mined"]:
                # The file we are testing is into our mining database.

                for element in PyFunceble.INTERN["mined"][
                        PyFunceble.INTERN["file_to_test"]]:
                    # We loop through the list of index of the file we are testing.

                    # We append the element of the currently read index to our result.
                    result.extend(PyFunceble.INTERN["mined"][
                        PyFunceble.INTERN["file_to_test"]][element])

                # We format our result.
                result = List(result).format()

        # We return the result.
        return result
Esempio n. 7
0
    def file_url(self):
        """
        Manage the case that we have to test a file

        .. note::
            1 URL per line.
        """

        # We get, format, clean the list of URL to test.
        list_to_test = self._file_list_to_test_filtering()

        # We initiate a local variable which will save the current state of the list.
        not_filtered = list_to_test

        try:
            # We remove the element which are in the database from the
            # current list to test.
            list_to_test = List(
                list(
                    set(
                        list_to_test[PyFunceble.INTERN["counter"]["number"]["tested"] :]
                    )
                    - set(PyFunceble.INTERN["flatten_inactive_db"])
                )
            ).format()
            _ = list_to_test[-1]
        except IndexError:
            # Our list to test is the one with the element from the database.
            list_to_test = not_filtered[
                PyFunceble.INTERN["counter"]["number"]["tested"] :
            ]

            # We delete the undesired variable.
            del not_filtered

        if PyFunceble.CONFIGURATION["hierarchical_sorting"]:
            # The hierarchical sorting is desired by the user.

            # We format the list.
            list_to_test = List(list(list_to_test)).custom_format(Sort.hierarchical)

        try:
            # We test each URL from the list to test.
            return [self.url(x, list_to_test[-1]) for x in list_to_test if x]
        except IndexError:
            # We print a message on screen.
            print(PyFunceble.Fore.CYAN + PyFunceble.Style.BRIGHT + "Nothing to test.")
Esempio n. 8
0
    def _get_list_to_of_subjects_to_test_from_file(
            self, file_object):  # pragma: no cover
        """
        Give a file object, we construct/get the list of subject to test.
        """

        to_retest_inactive_db = self.inactive_db.get_to_retest()

        if PyFunceble.CONFIGURATION["multiprocess"]:
            with Pool(PyFunceble.CONFIGURATION["maximal_processes"]) as pool:
                if not PyFunceble.CONFIGURATION["adblock"]:
                    formatted_subjects = set(
                        pool.map(self._format_line, file_object))
                else:
                    formatted_subjects = {
                        x
                        for x in AdBlock(file_object).decode()
                    }
        else:
            if not PyFunceble.CONFIGURATION["adblock"]:
                formatted_subjects = {
                    self._format_line(x)
                    for x in file_object
                }
            else:
                formatted_subjects = {x for x in AdBlock(file_object).decode()}

        subjects_to_test = (formatted_subjects -
                            self.autocontinue.get_already_tested() -
                            self.inactive_db.get_already_tested() -
                            to_retest_inactive_db)

        if not subjects_to_test:
            subjects_to_test = list(formatted_subjects)
        else:
            subjects_to_test = list(subjects_to_test)

        if not PyFunceble.CONFIGURATION["multiprocess"]:
            if not PyFunceble.CONFIGURATION["hierarchical_sorting"]:
                subjects_to_test = List(subjects_to_test).custom_format(
                    Sort.standard)
            else:
                subjects_to_test = List(subjects_to_test).custom_format(
                    Sort.hierarchical)

        return chain(subjects_to_test, to_retest_inactive_db)
Esempio n. 9
0
    def test_hierarchical_sorting(self):
        """
        Tests hierarchical sorting.
        """

        expected = [
            "google.com",
            "adservice.google.com",
            "hello_world.google.com",
            "s0-2mdn-net.l.google.com",
            "ssl-google-analytics.l.google.com",
            "www-google-analytics.l.google.com",
            "googleadservices.com",
            "pagead2.googleadservices.com",
            "partner.googleadservices.com",
            "www.googleadservices.com",
            "google-analytics.com",
            "ssl.google-analytics.com",
            "www.google-analytics.com",
            "chart.googleapis.com",
            "ad-creatives-public.commondatastorage.googleapis.com",
            "imasdk.googleapis.com",
            "ade.googlesyndication.com",
            "pagead2.googlesyndication.com",
            "tpc.googlesyndication.com",
            "www.googletagmanager.com",
            "www.googletagservices.com",
            "redirector.googlevideo.com",
            "0.gravatar.com",
            "1.gravatar.com",
            "hello",
        ]

        actual = List(self.data_list).custom_format(Sort.hierarchical)

        self.assertEqual(expected, actual)

        expected_url = [f"https://{x}" for x in expected]

        actual_url = List(self.data_url_list).custom_format(Sort.hierarchical)

        self.assertEqual(expected_url, actual_url)
Esempio n. 10
0
    def file(self):
        """
        Manage the case that need to test each domain of a given file path.

        .. note::
            1 domain per line.
        """

        # We get, format, filter, clean the list to test.
        list_to_test = self._file_list_to_test_filtering()

        if PyFunceble.CONFIGURATION["idna_conversion"]:
            # We have to convert domains to idna.

            # We convert if we need to convert.
            list_to_test = domain2idna(list_to_test)

            if PyFunceble.CONFIGURATION["hierarchical_sorting"]:
                # The hierarchical sorting is desired by the user.

                # We format the list.
                list_to_test = List(list_to_test).custom_format(Sort.hierarchical)
            else:
                # The hierarchical sorting is not desired by the user.

                # We format the list.
                list_to_test = List(list_to_test).custom_format(Sort.standard)

        try:
            # We test each element of the list to test.
            list(
                map(
                    self.domain,
                    list_to_test[
                        PyFunceble.CONFIGURATION["counter"]["number"]["tested"] :
                    ],
                    PyFunceble.repeat(list_to_test[-1]),
                )
            )
        except IndexError:
            # We print a message on screen.
            print(PyFunceble.Fore.CYAN + PyFunceble.Style.BRIGHT + "Nothing to test.")
Esempio n. 11
0
    def _add_to_test(self, to_add):
        """
        Add an element or a list of element into
        :code:`PyFunceble.INTERN['inactive_db'][PyFunceble.INTERN["file_to_test"]]['to_test']`.

        :param to_add: The domain, IP or URL to add.
        :type to_add: str|list
        """

        if PyFunceble.CONFIGURATION["inactive_database"]:
            # The database subsystem is activated.

            if not isinstance(to_add, list):
                # The element to add is not a list.

                # We set it into a list.
                to_add = [to_add]

            if PyFunceble.INTERN["file_to_test"] in PyFunceble.INTERN[
                    "inactive_db"]:
                # The file we are testing is into the database.

                if ("to_test" in PyFunceble.INTERN["inactive_db"][
                        PyFunceble.INTERN["file_to_test"]]):
                    # The `to_test` index is into the database related to the file
                    # we are testing.

                    # We extend the `to_test` element with the list we have to restest.
                    PyFunceble.INTERN["inactive_db"][PyFunceble.INTERN[
                        "file_to_test"]]["to_test"].extend(to_add)
                else:
                    # The `to_test` index is not into the database related to the file
                    # we are testing.

                    # We initiate the `to_test` element with the list we have to retest.
                    PyFunceble.INTERN["inactive_db"][
                        PyFunceble.INTERN["file_to_test"]]["to_test"] = to_add
            else:
                # The file we are testing is not into the database.

                # We add the file and its to_test information into the database.
                PyFunceble.INTERN["inactive_db"].update(
                    {PyFunceble.INTERN["file_to_test"]: {
                         "to_test": to_add
                     }})

            # We format the list to test in order to avoid duplicate.
            PyFunceble.INTERN["inactive_db"][
                PyFunceble.INTERN["file_to_test"]]["to_test"] = List(
                    PyFunceble.INTERN["inactive_db"]
                    [PyFunceble.INTERN["file_to_test"]]["to_test"]).format()

            # And we finally backup the database.
            self._backup()
Esempio n. 12
0
    def _add(self, to_add):
        """
        Add the currently mined information to the
        mined "database".

        :param to_add: The element to add.
        :type to_add: dict
        """

        if PyFunceble.CONFIGURATION["mining"]:
            # The mining is activated.

            if PyFunceble.INTERN["file_to_test"] not in PyFunceble.INTERN[
                    "mined"]:
                # Our tested file path is not into our mined database.

                # We initiate it.
                PyFunceble.INTERN["mined"][
                    PyFunceble.INTERN["file_to_test"]] = {}

            for element in to_add:
                # We loop through the element to add.

                if (element in PyFunceble.INTERN["mined"][
                        PyFunceble.INTERN["file_to_test"]]):
                    # The element is already into the tested file path database.

                    # We extent it with our element to add.
                    PyFunceble.INTERN["mined"][
                        PyFunceble.INTERN["file_to_test"]][element].extend(
                            to_add[element])
                else:
                    # The element is already into the tested file path database.

                    # We initiate it.
                    PyFunceble.INTERN["mined"][PyFunceble.INTERN[
                        "file_to_test"]][element] = to_add[element]

                # We format the added information in order to avoid duplicate.
                PyFunceble.INTERN["mined"][
                    PyFunceble.INTERN["file_to_test"]][element] = List(
                        PyFunceble.INTERN["mined"]
                        [PyFunceble.INTERN["file_to_test"]][element]).format()

            # We backup everything.
            self._backup()
Esempio n. 13
0
    def _extensions(self, line):
        """
        Extract the extension from the given line.

        :param line: The line from the official public suffix repository.
        :type line: str
        """

        # We strip the parsed line.
        line = line.strip()

        if not line.startswith("//") and "." in line:
            # * The parsed line is not a commented line.
            # and
            # * There is a point in the parsed line.
            line = line.encode("idna").decode("utf-8")

            if line.startswith("*."):
                # The parsed line start with `*.`.

                # We remove the first two characters.
                line = line[2:]

            # We we split the points and we get the last element.
            # Explanation: The idea behind this action is to
            # always get the extension.
            extension = line.split(".")[-1]

            if extension in self.public_suffix_db:
                # The extension is alrady in our database.

                # We update the content of the 1st level TDL with
                # the content of the suffix.
                # In between, we format so that we ensure that there is no
                # duplicate in the database index content.
                self.public_suffix_db[extension] = List(
                    self.public_suffix_db[extension] + [line]).format()
            else:
                # The extension is not already in our database.

                # We append the currently formatted extension and the line content.
                self.public_suffix_db.update({extension: [line]})
Esempio n. 14
0
    def __generate_complements(self):  # pragma: no cover
        """
        Generate the complements from the given list of tested.
        """

        # We get the list of domains we are going to work with.
        result = [
            z for x, y in self.get_already_tested() for z in y
            if not PyFunceble.Check(z).is_subdomain()
            and PyFunceble.Check(z).is_domain()
        ]

        # We generate the one without "www." if "www." is given.
        result.extend([x[4:] for x in result if x.startswith("www.")])
        # We generate the one with "www." if "www." is not given.
        result.extend(
            ["www.{0}".format(x) for x in result if not x.startswith("www.")])

        # We remove the already tested subjects.
        return set(List(result).format()) - self.get_already_tested()
Esempio n. 15
0
    def _format_adblock_decoded(cls, to_format, result=None):
        """
        Format the exctracted adblock line before passing it to the system.

        Arguments:
            - to_format: str
                The extracted line from the file.
            - result: None or list
                The list of extracted domain.

        Returns: list
            The list of extracted domains.
        """

        if not result:
            result = []

        for data in List(to_format).format():
            if data:
                if "#" in data:
                    return cls._format_adblock_decoded(data.split("#"), result)

                elif "," in data:
                    return cls._format_adblock_decoded(data.split(","), result)

                elif "~" in data:
                    return cls._format_adblock_decoded(data.split("~"), result)

                elif "!" in data:
                    return cls._format_adblock_decoded(data.split("!"), result)

                elif "|" in data:
                    return cls._format_adblock_decoded(data.split("|"), result)

                elif data and (ExpirationDate.is_domain_valid(data)
                               or ExpirationDate.is_ip_valid(data)):
                    result.append(data)

        return result
Esempio n. 16
0
    def url_file(self):
        """
        Manage the case that we have to test a file
        Note: 1 URL per line.
        """

        list_to_test = self._extract_domain_from_file()

        AutoContinue().restore()

        PyFunceble.Clean(list_to_test)

        if PyFunceble.CONFIGURATION["inactive_database"]:
            Database().to_test()

            if PyFunceble.CONFIGURATION[
                    "file_to_test"] in PyFunceble.CONFIGURATION[
                        "inactive_db"] and "to_test" in PyFunceble.CONFIGURATION[
                            "inactive_db"][PyFunceble.CONFIGURATION[
                                "file_to_test"]] and PyFunceble.CONFIGURATION[
                                    "inactive_db"][PyFunceble.CONFIGURATION[
                                        "file_to_test"]]["to_test"]:
                list_to_test.extend(PyFunceble.CONFIGURATION["inactive_db"][
                    PyFunceble.CONFIGURATION["file_to_test"]]["to_test"])

        if PyFunceble.CONFIGURATION["filter"]:
            list_to_test = List(
                Regex(list_to_test,
                      PyFunceble.CONFIGURATION["filter"],
                      escape=True).matching_list()).format()

        list(
            map(
                self.url,
                list_to_test[PyFunceble.
                             CONFIGURATION["counter"]["number"]["tested"]:],
                repeat(list_to_test[-1]),
            ))
Esempio n. 17
0
    def test_hierarchical_numeric_sorting(self):
        """
        Tests the hierarchical numeric sorting.
        """

        given = [
            "1.example.com",
            "2.example.com",
            "3.example.com",
            "11.example.com",
            "12.example.com",
            "10.example.com",
            "hello.1.example.com",
            "hello.2.example.com",
            "hello.3.example.com",
            "hello.11.example.com",
            "hello.12.example.com",
            "hello.10.example.com",
        ]

        expected = [
            "1.example.com",
            "hello.1.example.com",
            "2.example.com",
            "hello.2.example.com",
            "3.example.com",
            "hello.3.example.com",
            "10.example.com",
            "hello.10.example.com",
            "11.example.com",
            "hello.11.example.com",
            "12.example.com",
            "hello.12.example.com",
        ]

        actual = List(given).custom_format(Sort.hierarchical)

        self.assertEqual(expected, actual)
Esempio n. 18
0
    def adblock_decode(self, list_to_test):
        """
        Convert the adblock format into a readable format which is understood
        by the system.

        Argument:
            - list_to_test: list
                The read content of the given file.

        Returns: list
            The list of domain to test.
        """

        result = []
        regex = r"^(?:.*\|\|)([^\/\$\^]{1,}).*$"
        regex_v2 = r"(.*\..*)(?:#{1,}.*)"

        for line in list_to_test:
            rematch = Regex(line,
                            regex,
                            return_data=True,
                            rematch=True,
                            group=0).match()

            rematch_v2 = Regex(line,
                               regex_v2,
                               return_data=True,
                               rematch=True,
                               group=0).match()

            if rematch:
                result.extend(rematch)

            if rematch_v2:
                result.extend(
                    List(self._format_adblock_decoded(rematch_v2)).format())

        return result
Esempio n. 19
0
    def __get_or_generate_complements_json(self):  # pragma: no cover
        """
        Get or generate the complements while working with
        as JSON formatted database.
        """

        result = []

        if "complements" not in self.database[self.filename].keys():
            # The complements are not saved,

            # We get the list of domains we are going to work with.
            result = [
                z for x, y in self.get_already_tested() for z in y
                if not PyFunceble.Check(z).is_subdomain()
                and PyFunceble.Check(z).is_domain()
            ]

            # We generate the one without "www." if "www." is given.
            result.extend([x[4:] for x in result if x.startswith("www.")])
            # We generate the one with "www." if "www." is not given.
            result.extend([
                "www.{0}".format(x) for x in result if not x.startswith("www.")
            ])

            # We remove the already tested subjects.
            result = set(List(result).format()) - self.get_already_tested()

            # We save the constructed list of complements
            self.database[self.filename]["complements"] = list(result)
            self.save()
        else:
            # We get the complements we still have to test.
            result = self.database[self.filename]["complements"]

        return result
Esempio n. 20
0
    def _format_decoded(self, to_format, result=None):  # pragma: no cover
        """
        Format the exctracted adblock line before passing it to the system.

        :param str to_format: The extracted line from the file.

        :param list result: A list of the result of this method.

        :return: The list of domains or IP to test.
        :rtype: list
        """

        if not result:
            # The result is not given.

            # We set the result as an empty list.
            result = []

        for data in List(to_format).format():
            # We loop through the different lines to format.

            if data:
                # The currently read line is not empty.

                if "^" in data:
                    # There is an accent in the currently read line.

                    # We recall this method but with the current result state
                    # and splited data.
                    return self._format_decoded(data.split("^"), result)

                if "#" in data:
                    # There is a dash in the currently read line.

                    # We recall this method but with the current result state
                    # and splited data.
                    return self._format_decoded(data.split("#"), result)

                if "," in data:
                    # There is a comma in the currently read line.

                    # We recall this method but with the current result state
                    # and splited data.
                    return self._format_decoded(data.split(","), result)

                if "!" in data:
                    # There is an exclamation mark in the currently read line.

                    # We recall this method but with the current result state
                    # and splited data.
                    return self._format_decoded(data.split("!"), result)

                if "|" in data:
                    # There is a vertival bar in the currently read line.

                    # We recall this method but with the current result state
                    # and splited data.
                    return self._format_decoded(data.split("|"), result)

                if data:
                    # The currently read line is not empty.

                    data = self._extract_base(data)

                    # We create an instance of the checker.
                    checker = Check(data)

                    if data and (checker.is_domain() or checker.is_ipv4()):
                        # The extraced base is not empty.
                        # and
                        # * The currently read line is a valid domain.
                        # or
                        # * The currently read line is a valid IP.

                        # We append the currently read line to the result.
                        result.append(data)
                    elif data:
                        # * The currently read line is not a valid domain.
                        # or
                        # * The currently read line is not a valid IP.

                        # We try to get the url base.
                        url_base = checker.is_url(return_base=True)

                        if url_base:
                            # The url_base is not empty or equal to False or None.

                            # We append the url base to the result.
                            result.append(url_base)

        # We return the result element.
        return result
Esempio n. 21
0
    def decode(self):
        """
        Decode/extract the domains to test from the adblock formated file.

        :return: The list of domains to test.
        :rtype: list
        """

        # We initiate a variable which will save what we are going to return.
        result = []

        # We initiate the first regex we are going to use to get
        # the element to format.
        regex = r"^(?:.*\|\|)([^\/\$\^]{1,}).*$"

        # We initiate the third regex we are going to use to get
        # the element to format.
        regex_v3 = (
            r"(?:#+(?:[a-z]+?)?\[[a-z]+(?:\^|\*)\=(?:\'|\"))(.*\..*)(?:(?:\'|\")\])"
        )

        # We initiate the fourth regex we are going to use to get
        # the element to format.
        regex_v4 = r"^\|(.*\..*)\|$"

        for line in self.to_format:
            # We loop through the different line.

            rematch = rematch_v3 = rematch_v4 = None

            # We extract the different group from our first regex.
            rematch = Regex(line,
                            regex,
                            return_data=True,
                            rematch=True,
                            group=0).match()

            # We extract the different group from our fourth regex.
            #
            # Note: We execute the following in second because it is more
            # specific that others.
            rematch_v4 = Regex(line,
                               regex_v4,
                               return_data=True,
                               rematch=True,
                               group=0).match()

            # We extract the different group from our third regex.
            rematch_v3 = Regex(line,
                               regex_v3,
                               return_data=True,
                               rematch=True,
                               group=0).match()

            if rematch:
                # The first extraction was successfull.

                if self.options_separator in line:
                    options = line.split(self.options_separator)[-1].split(
                        self.option_separator)

                    if (not options[-1] or "third-party" in options
                            or "script" in options or "popup" in options
                            or "xmlhttprequest" in options):
                        # We extend the result with the extracted elements.
                        result.extend(self._extract_base(rematch))

                    extra = self._handle_options(options)

                    if extra and isinstance(extra, list):  # pragma: no cover
                        extra.extend(self._extract_base(rematch))
                        result.extend(self._extract_base(extra))
                    elif extra:
                        result.extend(self._extract_base(rematch))

                else:
                    # We extend the result with the extracted elements.
                    result.extend(self._extract_base(rematch))

            if rematch_v4:
                # The fourth extraction was successfull.

                # We extend the formatted elements from the extracted elements.
                result.extend(List(self._format_decoded(rematch_v4)).format())

            if rematch_v3:
                # The second extraction was successfull.

                # We extend the formatted elements from the extracted elements.
                result.extend(List(self._format_decoded(rematch_v3)).format())

        # We return the result.
        return List(result).format()
Esempio n. 22
0
    def _merge(self):
        """
        Merge the real database with the older one which
        has already been set into :code:`PyFunceble.INTERN["inactive_db"]`
        """

        if PyFunceble.CONFIGURATION["inactive_database"]:
            # The database subsystem is activated.

            # We get the content of the database.
            database_content = Dict().from_json(
                File(self.inactive_db_path).read())

            # We get the database top keys.
            database_top_keys = database_content.keys()

            for database_top_key in database_top_keys:
                # We loop through the list of database top keys.

                if database_top_key not in PyFunceble.INTERN["inactive_db"]:
                    # The currently read top key is not already into the database.

                    # We initiate the currently read key with the same key from
                    # our database file.
                    PyFunceble.INTERN["inactive_db"][
                        database_top_key] = database_content[database_top_key]
                else:
                    # The currently read top key is already into the database.

                    # We get the list of lower indexes.
                    database_low_keys = database_content[
                        database_top_key].keys()

                    for database_low_key in database_low_keys:
                        # We loop through the lower keys.

                        if (database_low_key
                                not in PyFunceble.INTERN["inactive_db"]
                            [database_top_key]):  # pragma: no cover
                            # The lower key is not already into the database.

                            # We initiate the currently read low and top key with the
                            # same combinaison from our database file.
                            PyFunceble.INTERN["inactive_db"][database_top_key][
                                database_low_key] = database_content[
                                    database_top_key][database_low_key]
                        else:
                            # The lower key is not already into the database.

                            # We exted the currently read low and top key combinaison
                            # with the same combinaison from our database file.
                            PyFunceble.INTERN["inactive_db"][database_top_key][
                                database_low_key].extend(
                                    database_content[database_top_key]
                                    [database_low_key])

                            # And we format the list of element to ensure that there is no
                            # duplicate into the database content.
                            PyFunceble.INTERN["inactive_db"][database_top_key][
                                database_low_key] = List(
                                    PyFunceble.INTERN["inactive_db"]
                                    [database_top_key]
                                    [database_low_key]).format()
Esempio n. 23
0
    def _file_list_to_test_filtering(self):
        """
        Unify the way we work before testing file contents.
        """

        # We get the list to test from the file we have to test.
        list_to_test = self._extract_domain_from_file()

        # We save the original list to test globally.
        PyFunceble.INTERN["extracted_list_to_test"] = list_to_test

        # We get the list of mined.
        mined_list = Mining().list_of_mined()

        if mined_list:
            list_to_test.extend(mined_list)

        # We generate the directory structure.
        DirectoryStructure()

        # We restore the data from the last session if it does exist.
        AutoContinue().restore()

        if PyFunceble.CONFIGURATION["adblock"]:
            # The adblock decoder is activated.

            # We get the list of domain to test (decoded).
            list_to_test = AdBlock(list_to_test).decode()
        else:
            # The adblock decoder is not activated.

            # We get the formatted list of domain to test.
            list_to_test = list(map(self._format_domain, list_to_test))

        # We clean the output directory if it is needed.
        PyFunceble.Clean(list_to_test)

        # We set the start time.
        ExecutionTime("start")

        # We get the list we have to test in the current session (from the database).
        Inactive().to_test()

        if (
            PyFunceble.CONFIGURATION["inactive_database"]
            and PyFunceble.INTERN["file_to_test"] in PyFunceble.INTERN["inactive_db"]
            and "to_test"
            in PyFunceble.INTERN["inactive_db"][PyFunceble.INTERN["file_to_test"]]
            and PyFunceble.INTERN["inactive_db"][PyFunceble.INTERN["file_to_test"]][
                "to_test"
            ]
        ):
            # * The current file to test in into the database.
            # and
            # * The `to_test` index is present into the database
            #   related to the file we are testing.
            # and
            # * The `to_test` index content is not empty.

            # We extend our list to test with the content of the `to_test` index
            # of the current file database.
            list_to_test.extend(
                PyFunceble.INTERN["inactive_db"][PyFunceble.INTERN["file_to_test"]][
                    "to_test"
                ]
            )

        # We set a regex of element to delete.
        # Understand with this variable that we don't want to test those.
        regex_delete = r"localhost$|localdomain$|local$|broadcasthost$|0\.0\.0\.0$|allhosts$|allnodes$|allrouters$|localnet$|loopback$|mcastprefix$|ip6-mcastprefix$|ip6-localhost$|ip6-loopback$|ip6-allnodes$|ip6-allrouters$|ip6-localnet$"  # pylint: disable=line-too-long

        # We load the flatten version of the database.
        PyFunceble.INTERN.update({"flatten_inactive_db": Inactive().content()})

        # We initiate a local variable which will save the current state of the list.
        not_filtered = list_to_test

        try:
            # We remove the element which are in the database from the
            # current list to test.
            list_to_test = List(
                list(
                    set(Regex(list_to_test, regex_delete).not_matching_list())
                    - set(PyFunceble.INTERN["flatten_inactive_db"])
                )
            ).format()
            _ = list_to_test[-1]
        except IndexError:
            # We test without the database removing.
            list_to_test = List(
                Regex(not_filtered, regex_delete).not_matching_list()
            ).format()

            # We delete the not_filtered variable.
            del not_filtered

        if PyFunceble.CONFIGURATION["filter"]:
            # The filter is not empty.

            # We get update our list to test. Indeed we only keep the elements which
            # matches the given filter.
            list_to_test = List(
                Regex(
                    list_to_test, PyFunceble.CONFIGURATION["filter"], escape=False
                ).matching_list()
            ).format()

        list_to_test = List(list(list_to_test)).custom_format(Sort.standard)

        if PyFunceble.CONFIGURATION["hierarchical_sorting"]:
            # The hierarchical sorting is desired by the user.

            # We format the list.
            list_to_test = List(list(list_to_test)).custom_format(Sort.hierarchical)

        # We return the final list to test.
        return list_to_test
Esempio n. 24
0
    def test_standard_sorting(self):
        """
        Tests standard sorting.
        """

        expected = [
            "0.gravatar.com",
            "1.gravatar.com",
            "ad-creatives-public.commondatastorage.googleapis.com",
            "ade.googlesyndication.com",
            "adservice.google.com",
            "chart.googleapis.com",
            "googleadservices.com",
            "google-analytics.com",
            "google.com",
            "hello",
            "hello_world.google.com",
            "imasdk.googleapis.com",
            "pagead2.googleadservices.com",
            "pagead2.googlesyndication.com",
            "partner.googleadservices.com",
            "redirector.googlevideo.com",
            "s0-2mdn-net.l.google.com",
            "ssl.google-analytics.com",
            "ssl-google-analytics.l.google.com",
            "tpc.googlesyndication.com",
            "www.googleadservices.com",
            "www.google-analytics.com",
            "www-google-analytics.l.google.com",
            "www.googletagmanager.com",
            "www.googletagservices.com",
        ]
        actual = List(self.data_list).custom_format(Sort.standard)

        self.assertEqual(expected, actual)

        expected = [
            "https://0.gravatar.com",
            "https://1.gravatar.com",
            "https://ad-creatives-public.commondatastorage.googleapis.com",
            "https://ade.googlesyndication.com",
            "https://adservice.google.com",
            "https://chart.googleapis.com",
            "https://googleadservices.com",
            "https://google-analytics.com",
            "https://google.com",
            "https://hello",
            "https://hello_world.google.com",
            "https://imasdk.googleapis.com",
            "https://pagead2.googleadservices.com",
            "https://pagead2.googlesyndication.com",
            "https://partner.googleadservices.com",
            "https://redirector.googlevideo.com",
            "https://s0-2mdn-net.l.google.com",
            "https://ssl.google-analytics.com",
            "https://ssl-google-analytics.l.google.com",
            "https://tpc.googlesyndication.com",
            "https://www.googleadservices.com",
            "https://www.google-analytics.com",
            "https://www-google-analytics.l.google.com",
            "https://www.googletagmanager.com",
            "https://www.googletagservices.com",
        ]
        actual = List(self.data_url_list).custom_format(Sort.standard)

        self.assertEqual(expected, actual)
Esempio n. 25
0
    def file(self):
        """
        Manage the case that need to test each domain of a given file path.

        .. note::
            1 domain per line.
        """

        # We get, format, filter, clean the list to test.
        list_to_test = self._file_list_to_test_filtering()

        if PyFunceble.CONFIGURATION["idna_conversion"]:
            # We have to convert domains to idna.

            # We convert if we need to convert.
            list_to_test = domain2idna(list_to_test)

            if PyFunceble.CONFIGURATION["hierarchical_sorting"]:
                # The hierarchical sorting is desired by the user.

                # We format the list.
                list_to_test = List(list_to_test).custom_format(Sort.hierarchical)
            else:
                # The hierarchical sorting is not desired by the user.

                # We format the list.
                list_to_test = List(list_to_test).custom_format(Sort.standard)

        # We initiate a local variable which will save the current state of the list.
        not_filtered = list_to_test

        try:
            # We remove the element which are in the database from the
            # current list to test.
            list_to_test = List(
                list(
                    set(
                        list_to_test[PyFunceble.INTERN["counter"]["number"]["tested"] :]
                    )
                    - set(PyFunceble.INTERN["flatten_inactive_db"])
                )
            ).format()
            _ = list_to_test[-1]
        except IndexError:
            # Our list to test is the one with the element from the database.
            list_to_test = not_filtered[
                PyFunceble.INTERN["counter"]["number"]["tested"] :
            ]

            # We delete the undesired variable.
            del not_filtered

        if PyFunceble.CONFIGURATION["hierarchical_sorting"]:
            # The hierarchical sorting is desired by the user.

            # We format the list.
            list_to_test = List(list(list_to_test)).custom_format(Sort.hierarchical)

        try:
            # We test each element of the list to test.
            return [self.domain(x, list_to_test[-1]) for x in list_to_test if x]
        except IndexError:
            # We print a message on screen.
            print(PyFunceble.Fore.CYAN + PyFunceble.Style.BRIGHT + "Nothing to test.")