Python Mining.Mining Exemples, PyFunceble.mining.Mining.Mining Python Exemples

Exemple #1

0

Afficher le fichier

    def test_backup(self):
        """
        Test the backup system.
        """

        File(self.file).delete()

        expected = False
        actual = PyFunceble.path.isfile(self.file)

        self.assertEqual(expected, actual)

        PyFunceble.INTERN["mined"] = self.excepted_content
        Mining()._backup()

        expected = True
        actual = PyFunceble.path.isfile(self.file)

        self.assertEqual(self.excepted_content,
                         Dict().from_json(File(self.file).read()))

        del PyFunceble.INTERN["mined"]

        File(self.file).delete()

        actual = PyFunceble.path.isfile(self.file)
        expected = False

        self.assertEqual(expected, actual)

Exemple #2

0

Afficher le fichier

    def test_remove(self):
        """
        Test the deletion subsystem.
        """

        File(self.file).delete()

        expected = False
        actual = PyFunceble.path.isfile(self.file)

        self.assertEqual(expected, actual)

        PyFunceble.INTERN["mined"] = self.excepted_content
        PyFunceble.INTERN["to_test_type"] = "domain"
        PyFunceble.INTERN["to_test"] = "www.google.com"

        expected = {
            PyFunceble.INTERN["file_to_test"]: {
                "myètherwället.com": ["www.facebook.com"]
            }
        }

        Mining().remove()

        self.assertEqual(expected, PyFunceble.INTERN["mined"])

        del PyFunceble.INTERN["mined"]
        del PyFunceble.INTERN["to_test"]
        del PyFunceble.INTERN["to_test_type"]

        File(self.file).delete()

        expected = False
        actual = PyFunceble.path.isfile(self.file)

Exemple #3

0

Afficher le fichier

    def test_retrieve_file_exist(self):
        """
        Test the case that we want to retrieve a file that exist.
        """

        File(self.file).delete()

        expected = False
        actual = PyFunceble.path.isfile(self.file)

        self.assertEqual(expected, actual)

        PyFunceble.INTERN["to_test_type"] = "domain"

        Dict(self.excepted_content).to_json(self.file)
        Mining()._retrieve()

        self.assertEqual(self.excepted_content, PyFunceble.INTERN["mined"])

        del PyFunceble.INTERN["mined"]
        del PyFunceble.INTERN["to_test_type"]

        File(self.file).delete()

        expected = False
        actual = PyFunceble.path.isfile(self.file)

        self.assertEqual(expected, actual)

Exemple #4

0

Afficher le fichier

    def test_retrieve_file_not_exist(self):
        """
        Test the case that we want to retrieve a file that does not exist.
        """

        File(self.file).delete()

        actual = PyFunceble.path.isfile(self.file)
        expected = False

        self.assertEqual(expected, actual)

        Mining()._retrieve()

        excepted = {}
        self.assertEqual(excepted, PyFunceble.INTERN["mined"])

        PyFunceble.INTERN["mined"] = {}

        File(self.file).delete()

        expected = False
        actual = PyFunceble.path.isfile(self.file)

        self.assertEqual(expected, actual)

Exemple #5

0

Afficher le fichier

    def test_add(self):
        """
        Test the addition subsystem.
        """

        File(self.file).delete()

        expected = False
        actual = PyFunceble.path.isfile(self.file)

        self.assertEqual(expected, actual)

        to_add = {"www.google.com": ["facebook.com", "www.facebook.com"]}

        expected = {PyFunceble.INTERN["file_to_test"]: to_add}

        Mining()._add(to_add)

        self.assertEqual(expected, PyFunceble.INTERN["mined"])

        to_add["www.google.com"].append("github.com")

        expected = {
            PyFunceble.INTERN["file_to_test"]: {
                "www.google.com":
                ["facebook.com", "github.com", "www.facebook.com"]
            }
        }

        Mining()._add(to_add)

        self.assertEqual(expected, PyFunceble.INTERN["mined"])

        del PyFunceble.INTERN["mined"]

        File(self.file).delete()

        expected = False
        actual = PyFunceble.path.isfile(self.file)

        self.assertEqual(expected, actual)

Exemple #6

0

Afficher le fichier

    def __init__(self, file, file_type="domain"):
        # We share the file we are working with.
        self.file = file
        # We share the file/test type.
        self.file_type = file_type

        # We construct the list of UP statuses.
        self.list_of_up_statuses = PyFunceble.STATUS["list"]["up"]
        self.list_of_up_statuses.extend(PyFunceble.STATUS["list"]["valid"])

        # We get/initiate the db.
        self.sqlite_db = SQLite()
        self.mysql_db = MySQL()

        # We get/initiate the preset class.
        self.preset = PyFunceble.Preset()
        # We get/initiate the autosave database/subsyste..
        self.autosave = AutoSave(start_time=PyFunceble.INTERN["start"])
        # We get/initiate the inactive database.
        self.inactive_db = InactiveDB(self.file,
                                      sqlite_db=self.sqlite_db,
                                      mysql_db=self.mysql_db)
        # We get/initiate the whois database.
        self.whois_db = WhoisDB(sqlite_db=self.sqlite_db,
                                mysql_db=self.mysql_db)
        # We get/initiate the mining subsystem.
        self.mining = Mining(self.file,
                             sqlite_db=self.sqlite_db,
                             mysql_db=self.mysql_db)
        # We get/initiate the autocontinue subsystem.
        self.autocontinue = AutoContinue(
            self.file,
            parent_process=True,
            sqlite_db=self.sqlite_db,
            mysql_db=self.mysql_db,
        )

        # We initiate a variable which will tell us when
        # we start testing for complements.
        self.complements_test_started = False

        # We download the file if it is a list.
        self.download_link()

Exemple #7

0

Afficher le fichier

Fichier : core.py Projet : mrijinm/PyFunceble

    def __init__(self, **args):
        # We initiate our list of optional arguments with their default values.
        optional_arguments = {
            "domain_or_ip_to_test": None,
            "file_path": None,
            "url_to_test": None,
            "url_file": None,
            "modulo_test": False,
            "link_to_test": None,
        }

        # We initiate our optional_arguments in order to be usable all over the
        # class.
        for (arg, default) in optional_arguments.items():
            setattr(self, arg, args.get(arg, default))

        # We initiate a variable in order to avoid having to recall/declare
        # Status() over and over.
        self.status = Status()
        # We initiate a variable in order to avoid having to recall/declare
        # Check() over and over.
        self.checker = Check()
        # We initiate a variable in order to avoid having to recall/declare
        # Percentage() over and over.
        self.percentage = Percentage()
        # We initiate a variable in order to avoid having to recall/declare
        # URL() over and over.
        self.url_status = URL()
        # We initiate a variable in order to avoid having to recall/declare
        # Mining() over and over.
        self.mining = Mining()
        # We initiate a variable in order to avoid having to recall/declare
        # AutoContinue() over and over.
        self.auto_continue = None
        # We initiate a variable in order to avoid having to recall/declare
        # Syntax() over and over.
        self.syntax_status = Syntax()
        # We initiate a variable in order to avoid having to recall/declare
        # Inactive() over and over.
        self.inactive_database = Inactive()

        # We manage the entries.
        self._entry_management()

Exemple #8

0

Afficher le fichier

    def test_list_of_mined(self):
        """
        Test Mining.list_of_mined
        """

        File(self.file).delete()

        expected = False
        actual = PyFunceble.path.isfile(self.file)

        self.assertEqual(expected, actual)

        PyFunceble.INTERN["mined"] = self.excepted_content

        expected = ["www.facebook.com", "www.google.com"]

        self.assertEqual(expected, Mining().list_of_mined())

        del PyFunceble.INTERN["mined"]

        File(self.file).delete()

        expected = False
        actual = PyFunceble.path.isfile(self.file)

Exemple #9

0

Afficher le fichier

    def setUp(self):
        """
        Setup everything needeed for the test.
        """

        PyFunceble.load_config(
            generate_directory_structure=False, custom={"db_type": "json"}
        )

        PyFunceble.CONFIGURATION["mining"] = True

        self.file_to_test = "this_file_is_a_ghost"

        self.file = (
            PyFunceble.CONFIG_DIRECTORY + PyFunceble.OUTPUTS["default_files"]["mining"]
        )

        self.excepted_content = {
            self.file_to_test: {
                "myètherwället.com": ["www.google.com", "www.facebook.com"]
            }
        }

        self.mining = Mining(self.file_to_test)

Exemple #10

0

Afficher le fichier

    def _test_line(self, line, manager_data=None):  # pylint: disable=too-many-branches  # pragma: no cover
        """
        Given a line, we test it.

        :param str line: A line to work with.
        :param multiprocessing.Manager.list manager_data: A Server process.
        """

        if PyFunceble.CONFIGURATION[
                "db_type"] == "json" and manager_data is not None:
            autocontinue = AutoContinue(self.file, parent_process=False)
            inactive_db = InactiveDB(self.file)
            mining = Mining(self.file)
        else:
            # We use the previously initiated autocontinue instance.
            autocontinue = self.autocontinue

            # We use the previously initiated inactive database instance.
            inactive_db = self.inactive_db

            # We use the previously initiated mining instance.
            mining = self.mining

        # We remove cariage from the given line.
        line = line.strip()

        if not line or line[0] == "#":
            # We line is a comment line.

            # We return None, there is nothing to test.
            return None

        if Regex(line, self.regex_ignore, escape=False,
                 return_data=False).match():
            # The line match our list of elemenet
            # to ignore.

            # We return None, there is nothing to test.
            return None

        # We format the line, it's the last
        # rush before starting to filter and test.
        subject = self._format_line(line)

        if (not PyFunceble.CONFIGURATION["local"]
                and PyFunceble.Check(subject).is_reserved_ipv4()):
            # * We are not testing for local components.
            # and
            # * The subject is a reserved IPv4.

            # We return None, there is nothing to test.
            return None

        if PyFunceble.CONFIGURATION["filter"]:
            # We have to filter.

            if Regex(subject,
                     PyFunceble.CONFIGURATION["filter"],
                     return_data=False).match():
                # The line match the given filter.

                # We get the status of the current line.
                status = self.__process_test(subject)
            else:
                # The line does not match the given filter.

                # We return None.
                return None
        else:
            # We do not have to filter.

            # We get the status of the current line.
            status = self.__process_test(subject)

        # We add the line into the auto continue database.
        autocontinue.add(subject, status)

        if status.lower() in self.list_of_up_statuses:
            # The status is in the list of UP status.

            # We mine if necessary.
            mining.mine(subject, self.file_type)

            if subject in inactive_db:
                # The subject is in the inactive database.

                # We generate the suspicous file.
                Generate(subject, "file_domain", PyFunceble.STATUS["official"]
                         ["up"]).analytic_file("suspicious")

                # And we remove the current subject from
                # the inactive database.
                inactive_db.remove(subject)
        else:
            # The status is not in the list of UP status.

            # We add the current subject into the
            # inactive database.
            inactive_db.add(subject, status)

        if (self.complements_test_started
                and PyFunceble.CONFIGURATION["db_type"] == "json"):
            # We started the test of the complements.

            if "complements" in autocontinue.database:
                # The complement index is present.

                while subject in autocontinue.database["complements"]:
                    # We loop untill the line is not present into the
                    # database.

                    # We remove the currently tested element.
                    autocontinue.database["complements"].remove(subject)

                    # We save the current state.
                    autocontinue.save()

        if manager_data is None:
            # We are not in a multiprocess environment.

            # We update the counters
            autocontinue.update_counters()

            # We process the autosaving if it is necessary.
            self.autosave.process(test_completed=False)
        elif PyFunceble.CONFIGURATION["db_type"] == "json":
            # We are in a multiprocess environment.

            # We save everything we initiated into the server process
            manager_data.append({
                "autocontinue": autocontinue.database,
                "inactive_db": inactive_db.database,
                "mining": mining.database,
            })

        # We return None.
        return None

Exemple #11

0

Afficher le fichier

Fichier : core.py Projet : smed79/PyFunceble

    def _file_list_to_test_filtering(self):
        """
        Unify the way we work before testing file contents.
        """

        # We get the list to test from the file we have to test.
        list_to_test = self._extract_domain_from_file()

        # We save the original list to test globally.
        PyFunceble.INTERN["extracted_list_to_test"] = list_to_test

        # We get the list of mined.
        mined_list = Mining().list_of_mined()

        if mined_list:
            list_to_test.extend(mined_list)

        # We generate the directory structure.
        DirectoryStructure()

        # We restore the data from the last session if it does exist.
        AutoContinue().restore()

        if PyFunceble.CONFIGURATION["adblock"]:
            # The adblock decoder is activated.

            # We get the list of domain to test (decoded).
            list_to_test = AdBlock(list_to_test).decode()
        else:
            # The adblock decoder is not activated.

            # We get the formatted list of domain to test.
            list_to_test = list(map(self._format_domain, list_to_test))

        # We clean the output directory if it is needed.
        PyFunceble.Clean(list_to_test)

        # We set the start time.
        ExecutionTime("start")

        # We get the list we have to test in the current session (from the database).
        Inactive().to_test()

        if (
            PyFunceble.CONFIGURATION["inactive_database"]
            and PyFunceble.INTERN["file_to_test"] in PyFunceble.INTERN["inactive_db"]
            and "to_test"
            in PyFunceble.INTERN["inactive_db"][PyFunceble.INTERN["file_to_test"]]
            and PyFunceble.INTERN["inactive_db"][PyFunceble.INTERN["file_to_test"]][
                "to_test"
            ]
        ):
            # * The current file to test in into the database.
            # and
            # * The `to_test` index is present into the database
            #   related to the file we are testing.
            # and
            # * The `to_test` index content is not empty.

            # We extend our list to test with the content of the `to_test` index
            # of the current file database.
            list_to_test.extend(
                PyFunceble.INTERN["inactive_db"][PyFunceble.INTERN["file_to_test"]][
                    "to_test"
                ]
            )

        # We set a regex of element to delete.
        # Understand with this variable that we don't want to test those.
        regex_delete = r"localhost$|localdomain$|local$|broadcasthost$|0\.0\.0\.0$|allhosts$|allnodes$|allrouters$|localnet$|loopback$|mcastprefix$|ip6-mcastprefix$|ip6-localhost$|ip6-loopback$|ip6-allnodes$|ip6-allrouters$|ip6-localnet$"  # pylint: disable=line-too-long

        # We load the flatten version of the database.
        PyFunceble.INTERN.update({"flatten_inactive_db": Inactive().content()})

        # We initiate a local variable which will save the current state of the list.
        not_filtered = list_to_test

        try:
            # We remove the element which are in the database from the
            # current list to test.
            list_to_test = List(
                list(
                    set(Regex(list_to_test, regex_delete).not_matching_list())
                    - set(PyFunceble.INTERN["flatten_inactive_db"])
                )
            ).format()
            _ = list_to_test[-1]
        except IndexError:
            # We test without the database removing.
            list_to_test = List(
                Regex(not_filtered, regex_delete).not_matching_list()
            ).format()

            # We delete the not_filtered variable.
            del not_filtered

        if PyFunceble.CONFIGURATION["filter"]:
            # The filter is not empty.

            # We get update our list to test. Indeed we only keep the elements which
            # matches the given filter.
            list_to_test = List(
                Regex(
                    list_to_test, PyFunceble.CONFIGURATION["filter"], escape=False
                ).matching_list()
            ).format()

        list_to_test = List(list(list_to_test)).custom_format(Sort.standard)

        if PyFunceble.CONFIGURATION["hierarchical_sorting"]:
            # The hierarchical sorting is desired by the user.

            # We format the list.
            list_to_test = List(list(list_to_test)).custom_format(Sort.hierarchical)

        # We return the final list to test.
        return list_to_test

Exemple #12

0

Afficher le fichier

Fichier : core.py Projet : smed79/PyFunceble

    def _file_decision(self, current, last, status=None):
        """
        Manage the database, autosave and autocontinue systems for the case that we are reading
        a file.

        :param current: The currently tested element.
        :type current: str

        :param last: The last element of the list.
        :type last: str

        :param status: The status of the currently tested element.
        :type status: str
        """

        if (
            status
            and not PyFunceble.CONFIGURATION["simple"]
            and PyFunceble.INTERN["file_to_test"]
        ):
            # * The status is given.
            # and
            # * The simple mode is deactivated.
            # and
            # * A file to test is set.

            # We run the mining logic.
            Mining().process()

            # We delete the currently tested element from the mining
            # database.
            # Indeed, as it is tested, it is already in our
            # testing process which means that we don't need it into
            # the mining database.
            Mining().remove()

            if (
                status.lower() in PyFunceble.STATUS["list"]["up"]
                or status.lower() in PyFunceble.STATUS["list"]["valid"]
            ):
                # The status is in the list of up status.

                if Inactive().is_present():
                    # The currently tested element is in the database.

                    # We generate the suspicious file(s).
                    Generate("strange").analytic_file(
                        "suspicious", PyFunceble.STATUS["official"]["up"]
                    )

                    # We remove the currently tested element from the
                    # database.
                    Inactive().remove()

            else:
                # The status is not in the list of up status.

                # We add the currently tested element to the
                # database.
                Inactive().add()

            # We backup the current state of the file reading
            # for the case that we need to continue later.
            AutoContinue().backup()

            if current != last:
                # The current element is not the last one.

                # We run the autosave logic.
                AutoSave()
            else:
                # The current element is the last one.

                # We stop and log the execution time.
                ExecutionTime("stop", True)

                # We show/log the percentage.
                Percentage().log()

                # We reset the counters as we end the process.
                self.reset_counters()

                # We backup the current state of the file reading
                # for the case that we need to continue later.
                AutoContinue().backup()

                # We show the colored logo.
                self.colorify_logo()

                # We save and stop the script if we are under
                # Travis CI.
                AutoSave(True)

        for index in ["http_code", "referer"]:
            # We loop through some configuration index we have to empty.

            if index in PyFunceble.INTERN:
                # The index is in the configuration.

                # We empty the configuration index.
                PyFunceble.INTERN[index] = ""