Beispiel #1
0
def standard(element: Any) -> List[Union[int, Any]]:
    """
    Provides the key to use for the standard sorting.

    :param element:
        The element to format.
    """

    element = element.strip()

    if not element:
        return []

    regex_helper = RegexHelper()

    element = Url2Netloc(element).get_converted().strip()

    if PyFunceble.facility.ConfigLoader.is_already_loaded():
        element = regex_helper.set_regex(
            r"^%s\s+" % PyFunceble.storage.CONFIGURATION.cli_testing.hosts_ip
        ).replace_match(element, "")

    cleaned = regex_helper.set_regex(r"[^a-zA-Z0-9\.]").replace_match(
        element, "")

    return [
        int(x) if x.isdigit() else x
        for x in regex_helper.set_regex(r"(\d+)").split(cleaned)
    ]
Beispiel #2
0
    def test_set_regex_escape(self) -> None:
        """
        Tests the method which let us set the regex to work with for the case
        that it's not a string.
        """

        regex_helper = RegexHelper()
        regex_helper.escape_regex = True
        regex_helper.set_regex("[a-z]")

        expected = r"\[a\-z\]"
        actual = regex_helper.regex

        self.assertEqual(expected, actual)
class AdblockInputLine2Subject(ConverterBase):
    """
    Provides an interface for the conversion or extraction of valuable subjects
    from an inputted AdBlock line.
    """

    _aggressive: bool = False

    __regex_helper: Optional[RegexHelper] = None

    def __init__(self,
                 data_to_convert: Optional[Any] = None,
                 aggressive: bool = False) -> None:
        if aggressive is not None:
            self.aggressive = aggressive

        self.__regex_helper = RegexHelper()

        super().__init__(data_to_convert=data_to_convert)

    @ConverterBase.data_to_convert.setter
    def data_to_convert(self, value: Any) -> None:
        """
        Overrites the default behavior.

        :raise TypeError:
            When the given data to convert is not :py:class:`str`
        """

        if not isinstance(value, str):
            raise TypeError(f"<value> should be {str}, {type(value)} given.")

        # pylint: disable=no-member
        super(AdblockInputLine2Subject,
              self.__class__).data_to_convert.fset(self, value)

    @property
    def aggressive(self) -> bool:
        """
        Provides the state of the :code:`_aggressive` attribute.
        """

        return self._aggressive

    @aggressive.setter
    def aggressive(self, value: bool) -> None:
        """
        Provides a way to activate/deactivate the aggressive decoding.

        :raise TypeError:
            When the given data to convert is not :py:class:`str`
        """

        if not isinstance(value, bool):
            raise TypeError(f"<value> should be {bool}, {type(value)} given.")

        self._aggressive = value

    def set_aggressive(self, value: bool) -> "AdblockInputLine2Subject":
        """
        Provides a way to activate/deactivate the aggressive decoding.
        """

        self.aggressive = value

        return self

    @staticmethod
    def should_be_ignored(line: str) -> bool:
        """
        Checks if we should ignore the given line.
        """

        starting_chars = ["!", "@@", "/", "[", ".", "-", "_", "?", "&"]

        return any(line.startswith(x) for x in starting_chars)

    @staticmethod
    def extract_base(subject: Union[str, List[str]]) -> Union[str, List[str]]:
        """
        Extracts the base of the given subject (supposely URL).

        :param subject:
            The subject to work with.

        Example:

            Giving :code:`"hello.world/?is=beautiful"` returns :code:`"hello.world"`
        """

        subject = subject.replace("*", "").replace("~", "")

        try:
            return Url2Netloc(subject).get_converted()
        except ValueError:
            return subject

    def _decode_multiple_subject(self, decoded: str) -> Set[str]:
        """
        Implementation of the decoding of the case that multiple
        subjects are possible in the given :py:class:`str`.

        :param decoded:
            The decoded part to split.
        """

        result = set()

        rematch = self.__regex_helper.set_regex(r"((?:[^~\*,]+))").match(
            decoded, rematch=True, return_match=True)

        if rematch:
            result.update({self.extract_base(x) for x in rematch})

        return result

    def _decode_options(self, decoded_options: List[str]) -> Set[str]:
        """
        Handle the decoding of the options.

        What it does:

            - It extracts all :code:`domain=` component - when found.
            - It extracts all :code:`href` URL base - when found.


        :param decoded_options:
            The splitted list of options.
        """

        result = set()

        for rule in decoded_options:
            if "domain=" in rule:
                rule = rule.replace("domain=", "").replace("|", ",")

                result.update(self._decode_multiple_subject(rule))
                continue

            if "href" in rule:
                matched = self.__regex_helper.set_regex(
                    r"((?:\"|\')(.*)(?:\"|\'))").match(rule,
                                                       return_match=True,
                                                       rematch=True,
                                                       group=1)

                if matched:
                    result.add(self.extract_base(matched))
                continue

        return result

    def _decode_v1(self, line: str) -> Set[str]:
        """
        Implementation of our first decoding mode.

        In this mode we try to decode the simple:

            ||ads.example.com^

        rule.

        :param line:
            The line to decode.
        """

        result = set()

        local_line = line.strip()

        if local_line.startswith("||") and (local_line.endswith("^")
                                            or local_line.endswith("$")):
            local_line = local_line.replace("||", "", 1)

            if local_line.endswith("^"):
                local_line = "".join(local_line.rsplit("^", 1))
            elif local_line.endswith("$"):
                local_line = "".join(local_line.rsplit("$", 1))

            result.update(self._decode_multiple_subject(local_line))

        return {x for x in result if "." in x}

    def _decode_v2(self, line: str) -> Set[str]:
        """
        Implementation of our second decoding mode.

        In this mode, we try to decode the simple:

            |https://ads.example.com|

        rule.

        :param line:
            The line to decode.
        """

        result = set()

        local_line = line.strip()

        if local_line.startswith("|") and local_line.endswith("|"):
            local_line = local_line.replace("|", "", 1)
            local_line = "".join(local_line.rsplit("|", 1))

            result.add(self.extract_base(local_line))

        return {x for x in result if "." in x}

    def _decode_v3(self, line: str) -> Set[str]:
        """
        Implementation of our third decoding mode.

        In this mode, we try to decode the simple:

            ||ads.example.com^$script,image,domain=example.com|~foo.example.info
            ||ads.example.com$script,image,domain=example.com|~foo.example.info

        rule.

        :param line:
            The line to decode.
        """

        result = set()

        local_line = line.strip()

        if not local_line.startswith("||"):
            return result

        if "$" in local_line:
            v1_mode, options = local_line.split("$", 1)

            if not v1_mode.endswith("^"):
                v1_mode += "^"

            result.update(self._decode_v1(v1_mode))

            if self.aggressive:
                result.update(self._decode_options(options.split(",")))
        elif "^" not in local_line:
            result.update(self._decode_v1(f"{local_line}^"))
        else:
            result.update(
                self._decode_v1(local_line[:local_line.find("^") + 1]))

        return {x for x in result if "." in x}

    def _decode_v4(self, line: str) -> Set[str]:
        """
        Implementation of our fourth decoding mode.

        In this mode, we try to decode the simple:

            @@||ads.example.com/notbanner^$~script

        rule.

        :param line:
            The line to decode.
        """

        result = set()
        local_line = line.strip()

        if (not self.aggressive or not local_line.startswith("@@||")
                or "^$" not in local_line):
            return result

        v1_mode, options = local_line.split("$", 1)

        result.update({
            self.extract_base(x)
            for x in self._decode_v1(v1_mode.replace("@@", ""))
        })

        result.update(self._decode_options(options.split(",")))

        return {x for x in result if "." in x}

    def _decode_v5(self, line: str) -> Set[str]:
        """
        Implementation of our fifth decoding mode.

        In this mode, we try to decode the simple:

            example.com,example.net##.advert
            exception.example.com#@#.advert
            example.com,example.net#?#div:-abp-has(> div > img.advert)
            exception.example.com#@#div:-abp-has(> div > img.advert)

        rule.

        :param line:
            The line to decode.
        """

        local_line = line.strip()
        result = set()

        if not self.aggressive:
            return result

        separators = ["##", "#@#", "#?#"]

        obj_of_interest, options = "", ""

        for separator in separators:
            if separator in local_line:
                obj_of_interest, options = local_line.split(separator, 1)
                break

        result.update(self._decode_multiple_subject(obj_of_interest))
        result.update(self._decode_options(options.split(",")))

        return {x for x in result if "." in x}

    def _decode_v6(self, line: str) -> Set[str]:
        """
        Implementation of our sixth decoding mode.

        In this mode we try to decode the simple:

            $domain=exam.pl|elpmaxe.pl|example.pl
            ^hello^$domain=example.com

        rule.

        :param line:
            The line to decode.
        """

        local_line = line.strip()
        result = set()

        if not self.aggressive:
            return result

        separators = ["$"]

        for separator in separators:
            if separator not in line:
                continue

            options = local_line[local_line.find(separator) + 1:]

            result.update(self._decode_options(options.split(",")))

        return {x for x in result if "." in x}

    def get_converted(self) -> List[str]:
        """
        Provides the converted data.
        """

        result = set()

        if not self.should_be_ignored(self.data_to_convert.strip()):
            result.update(self._decode_v1(self.data_to_convert))
            result.update(self._decode_v2(self.data_to_convert))
            result.update(self._decode_v3(self.data_to_convert))
            result.update(self._decode_v5(self.data_to_convert))
            result.update(self._decode_v6(self.data_to_convert))

        result.update(self._decode_v4(self.data_to_convert))

        return ListHelper(list(result)).sort().subject
Beispiel #4
0
class TestRegexHelper(unittest.TestCase):
    """
    Tests our regex helper.
    """
    def setUp(self) -> None:
        """
        Setups everything needed for the test.
        """

        self.helper = RegexHelper()

        self.test_regex = "[a-z]"
        self.testing_list_subject = [
            "hello",
            "world",
            "funilrys",
            "funceble",
            "PyFunceble",
            "pyfunceble",
        ]
        self.testing_subject = "Hello, this is Fun Ilrys. I just wanted to know how things goes around the tests."  # pylint: disable=line-too-long

    def tearDown(self) -> None:
        """
        Destroys everything previously initialized for the tests.
        """

        del self.testing_list_subject
        del self.testing_subject

    def test_set_regex_return(self) -> None:
        """
        Tests the response from the method which let us set the regex to work
        with.
        """

        actual = self.helper.set_regex(self.test_regex)

        self.assertIsInstance(actual, RegexHelper)

    def test_set_regex_method(self) -> None:
        """
        Tests the method which let us set the regex to work with.
        """

        given = self.test_regex
        expected = given

        self.helper.set_regex(given)

        actual = self.helper.regex

        self.assertEqual(expected, actual)

    def test_set_regex_attribute(self) -> None:
        """
        Tests overwritting of the :code:`regex` attribute.
        """

        given = self.test_regex
        expected = given

        self.helper.regex = given
        actual = self.helper.regex

        self.assertEqual(expected, actual)

    def test_set_regex_through_init(self) -> None:
        """
        Tests the overwritting of the regex to work through the class
        constructor.
        """

        given = self.test_regex
        expected = given

        helper = RegexHelper(given)
        actual = helper.regex

        self.assertEqual(expected, actual)

    def test_set_regex_not_str(self) -> None:
        """
        Tests the method which let us set the regex to work with for the case
        that it's not a string.
        """

        given = ["Hello", "World"]

        self.assertRaises(TypeError, lambda: self.helper.set_regex(given))

    def test_set_regex_escape(self) -> None:
        """
        Tests the method which let us set the regex to work with for the case
        that it's not a string.
        """

        regex_helper = RegexHelper()
        regex_helper.escape_regex = True
        regex_helper.set_regex("[a-z]")

        expected = r"\[a\-z\]"
        actual = regex_helper.regex

        self.assertEqual(expected, actual)

    def test_not_matching_list(self) -> None:
        """
        Tests the method which let us get a list of non
        matching strin from a given list of string.
        """

        regex = "fun"
        expected = ["hello", "world", "PyFunceble"]
        actual = self.helper.set_regex(regex).get_not_matching_list(
            self.testing_list_subject)

        self.assertEqual(expected, actual)

    def test_matching_list(self) -> None:
        """
        Tests the method which let us get a list of
        matchint string from a given list of string.
        """

        regex = "fun"
        expected = ["funilrys", "funceble", "pyfunceble"]
        actual = self.helper.set_regex(regex).get_matching_list(
            self.testing_list_subject)

        self.assertEqual(expected, actual)

    def test_match_check(self) -> None:
        """
        Tests the matching method for the case that we want to just check.
        """

        regex = r"([a-z]{1,})\s([a-z]{1,})\s"
        expected = True
        actual = self.helper.set_regex(regex).match(self.testing_subject,
                                                    return_match=False)

        self.assertEqual(expected, actual)

    def test_match_not_check(self) -> None:
        """
        Tests the matching method for the case that we want to just check.
        """

        regex = r"@funilrys"
        expected = False
        actual = self.helper.set_regex(regex).match(self.testing_subject,
                                                    return_match=False)

        self.assertEqual(expected, actual)

    def test_match_rematch(self) -> None:
        """
        Tests the matching method for the case that we want to rematch
        the different groups.
        """

        regex = r"([a-z]{1,})\s([a-z]{1,})\s"
        expected = "is"
        actual = self.helper.set_regex(regex).match(self.testing_subject,
                                                    rematch=True,
                                                    group=1)

        self.assertEqual(expected, actual)

    def test_match_get_group(self) -> None:
        """
        Tests the matching method for the case that we want
        a specific group.
        """

        regex = "e"
        expected = "e"
        actual = self.helper.set_regex(regex).match(self.testing_subject,
                                                    group=0)

        self.assertEqual(expected, actual)

        regex = r"([a-z]{1,})\s([a-z]{1,})\s"
        expected = "this"
        actual = self.helper.set_regex(regex).match(self.testing_subject,
                                                    group=1)

        self.assertEqual(expected, actual)

    def test_replace_no_replacement(self) -> None:
        """
        Tests the replacement method for the case that no replacement
        is not given.
        """

        regex = "th"
        expected = self.testing_subject
        actual = self.helper.set_regex(regex).replace_match(
            self.testing_subject, None)

        self.assertEqual(expected, actual)

    def test_replace(self) -> None:
        """
        Tests the replacement method.
        """

        regex = "th"
        expected = ("Hello, htis is Fun Ilrys. I just wanted to know how "
                    "htings goes around hte tests.")
        actual = self.helper.set_regex(regex).replace_match(
            self.testing_subject, "ht")

        self.assertEqual(expected, actual)

    def test_split(self) -> None:
        """
        Tests the method which le us split occurences of a given regex.
        """

        regex = "th"
        expected = [
            "Hello, ",
            "is is Fun Ilrys. I just wanted to know how ",
            "ings goes around ",
            "e tests.",
        ]
        actual = self.helper.set_regex(regex).split(self.testing_subject)

        self.assertEqual(expected, actual)