Esempio n. 1
0
    def update_ip_list() -> "UHBPyFuncebleSystemLauncher":
        """
        Updates the content of the :code:`ip.list` file.
        """

        input_file = FileHelper(outputs.IP_SUBJECTS_DESTINATION)
        ip_file = FileHelper(outputs.IP_DESTINATION)

        if input_file.exists():
            logging.info("Started generation of %r.", ip_file.path)

            with input_file.open(
                    "r", encoding="utf-8") as input_file_stream, ip_file.open(
                        "w", encoding="utf-8") as ip_file_stream:
                for line in input_file_stream:
                    if not line.strip() or line.startswith("#"):
                        continue

                    ip_file_stream.write("\n".join(line.split()[1:]) + "\n")

                ip_file_stream.write("\n")

            whitelist_core_tool(
                output_file=ip_file.path,
                use_official=True,
                processes=os.cpu_count(),
            ).filter(file=ip_file.path,
                     already_formatted=True,
                     standard_sort=False)

            logging.info("Finished generation of %r.", ip_file.path)
Esempio n. 2
0
    def update_clean_list(self) -> "UHBPyFuncebleSystemLauncher":
        """
        Updates the content of the :code:`clean.list` file.
        """

        input_file = FileHelper(outputs.ACTIVE_SUBJECTS_DESTINATION)
        clean_file = FileHelper(outputs.CLEAN_DESTINATION)

        if input_file.exists():
            logging.info("Started generation of %r.", clean_file.path)

            with input_file.open(
                    "r",
                    encoding="utf-8") as input_file_stream, clean_file.open(
                        "w", encoding="utf-8") as clean_file_stream:
                for line in input_file_stream:
                    line = line.strip()

                    if not line or line.startswith("#") or "." not in line:
                        continue

                    if line.endswith("."):
                        line = line[:-1]

                    clean_file_stream.write("\n" + line)

            logging.info("Finished generation of %r.", clean_file.path)

        return self
Esempio n. 3
0
    def update_volatile_list(self) -> "UHBPyFuncebleSystemLauncher":
        """
        Updates the content of the :code:`volatile.list` file.
        """

        input_file = FileHelper(outputs.TEMP_VOLATIVE_DESTINATION)
        volatile_file = FileHelper(outputs.VOLATILE_DESTINATION)
        clean_file = FileHelper(outputs.CLEAN_DESTINATION)

        logging.info("Started generation of %r.", volatile_file.path)

        with volatile_file.open("w", encoding="utf-8") as volatile_file_stream:
            if clean_file.exists():
                with clean_file.open("r",
                                     encoding="utf-8") as clean_file_stream:
                    for line in clean_file_stream:
                        line = line.strip()

                        if not line or line.startswith("#") or "." not in line:
                            continue

                        if line.endswith("."):
                            line = line[:-1]

                        volatile_file_stream.write(line + "\n")

            if input_file.exists():
                with input_file.open("r",
                                     encoding="utf-8") as input_file_stream:
                    for line in input_file_stream:
                        line = line.strip()

                        if not line or line.startswith("#") or "." not in line:
                            continue

                        if line.endswith("."):
                            line = line[:-1]

                        volatile_file_stream.write(line + "\n")

            volatile_file.write("\n")

        whitelist_core_tool(
            output_file=volatile_file.path,
            use_official=True,
            processes=os.cpu_count(),
        ).filter(file=volatile_file.path,
                 already_formatted=True,
                 standard_sort=False)

        logging.info("Finished generation of %r.", volatile_file.path)

        return self
Esempio n. 4
0
    def test_open(self) -> None:
        """
        Tests the method which let us open the given file as we want.
        """

        file_helper = FileHelper(tempfile.gettempdir())
        file_helper.set_path(file_helper.join_path(secrets.token_hex(8)))

        expected = False
        actual = file_helper.exists()

        self.assertEqual(expected, actual)

        with file_helper.open("w") as file_stream:
            file_stream.write("Hello, World!")

        expected = True
        actual = file_helper.exists()

        self.assertEqual(expected, actual)

        expected = "Hello, World!"
        actual = file_helper.read()

        self.assertEqual(expected, actual)
    def run_end(self):
        """
        Run the end logic.
        """

        self.info_manager["currently_under_test"] = False

        self.info_manager["latest_part_finish_datetime"] = datetime.utcnow()
        self.info_manager["latest_part_finish_timestamp"] = self.info_manager[
            "latest_part_finish_datetime"].timestamp()

        self.info_manager["finish_datetime"] = self.info_manager[
            "latest_part_finish_datetime"]
        self.info_manager["finish_timestamp"] = self.info_manager[
            "finish_datetime"].timestamp()

        logging.info(
            "Updated all timestamps and indexes that needed to be updated.")

        pyfunceble_active_list = FileHelper(
            os.path.join(
                self.info_manager.WORKSPACE_DIR,
                "output",
                dead_hosts.launcher.defaults.paths.ORIGIN_FILENAME,
                "domains",
                "ACTIVE",
                "list",
            ))

        clean_list = [
            "# File generated by the Dead-Hosts project with the help of PyFunceble.",
            "# Dead-Hosts: https://github.com/dead-hosts",
            "# PyFunceble: https://pyfunceble.github.io",
            f"# Generation Time: {datetime.utcnow().isoformat()}",
        ]

        logging.info(
            f"PyFunceble ACTIVE list output: {pyfunceble_active_list.path}")

        if pyfunceble_active_list.exists():
            logging.info(
                f"{pyfunceble_active_list.path} exists, getting and formatting its content."
            )

            self.output_file.write("\n".join(clean_list) + "\n\n",
                                   overwrite=True)

            with pyfunceble_active_list.open("r",
                                             encoding="utf-8") as file_stream:
                for line in file_stream:
                    if line.startswith("#"):
                        continue

                    self.output_file.write(line)

            self.output_file.write("\n")

            logging.info("Updated of the content of %r", self.output_file.path)
Esempio n. 6
0
    def migrate(self) -> "MigratorBase":
        """
        Provides the migrator (itself).
        """

        file_helper = FileHelper(self.source_file)

        if file_helper.exists():
            with file_helper.open("r", encoding="utf-8") as file_stream:
                first_line = next(file_stream)

            if any(x in first_line for x in self.TO_DELETE):
                temp_destination = tempfile.NamedTemporaryFile(
                    "a+", newline="", encoding="utf-8", delete=False
                )

                file_handler = file_helper.open(newline="")
                reader = csv.DictReader(file_handler)
                writer = csv.DictWriter(
                    temp_destination,
                    fieldnames=[x for x in self.FIELDS if x not in self.TO_DELETE],
                )
                writer.writeheader()

                keys_found = False
                for row in reader:
                    row = dict(row)
                    for key in self.TO_DELETE:
                        if key in row:
                            del row[key]
                            keys_found = True

                    if not keys_found:
                        break

                    writer.writerow(row)

                    if self.print_action_to_stdout:
                        print_single_line()

                temp_destination.seek(0)

                FileHelper(temp_destination.name).move(self.source_file)

        self.done = True
Esempio n. 7
0
    def get_csv_writer(self) -> Tuple[csv.DictWriter, open]:
        """
        Provides the standard and initiated CSV Dict writer along with the
        file that was open with it.
        """

        file_helper = FileHelper(self.source_file)

        add_header = not file_helper.exists()

        file_handler = file_helper.open("a+", newline="")
        writer = csv.DictWriter(file_handler, fieldnames=self.FIELDS)

        if add_header:
            writer.writeheader()

        return writer, file_handler
    def produce_diff(self) -> None:
        """
        Produce the difference from teh downloaded file.
        """

        file_helper = FileHelper(self.final_destination)

        new = set()
        kept = set()
        removed = set()

        if file_helper.exists():
            with file_helper.open("r", encoding="utf-8") as file_stream:
                current_content = set(x.strip() for x in file_stream)
        else:
            current_content = set()

        downloaded_empty = True

        for line in self.download_temp_file:
            if downloaded_empty:
                downloaded_empty = False

            line = line.strip()

            if not line:
                continue

            kept_kept, new_new = self.__get_diff_data(
                current_content, get_subjects_from_line(line, "availability"))

            new.update(new_new)
            kept.update(kept_kept)

        if downloaded_empty:
            kept = current_content
        else:
            compare_base = kept.copy()
            compare_base.update(new)

            removed = current_content - compare_base

        self.download_temp_file.seek(0)

        return kept, removed, new
Esempio n. 9
0
    def csv_file_delete_source_column_target(
        continuous_integration: ContinuousIntegrationBase,
    ) -> None:
        """
        Provides the target for the deletion of the source column.
        """

        migrator = InactiveDatasetDeleteSourceColumnMigrator(
            print_action_to_stdout=True
        )
        migrator.continuous_integration = continuous_integration

        file_helper = FileHelper(migrator.source_file)

        if file_helper.exists():
            with file_helper.open("r", encoding="utf-8") as file_stream:
                first_line = next(file_stream)

            if any(x in first_line for x in migrator.TO_DELETE):
                print(
                    f"{colorama.Fore.MAGENTA}{colorama.Style.BRIGHT}"
                    "Started deletion of the 'source' column into "
                    f"{migrator.source_file!r}."
                )

                migrator.start()

                if migrator.done:
                    print(
                        f"{colorama.Fore.GREEN}{colorama.Style.BRIGHT}"
                        "Finished deletion of the 'source' column into "
                        f"{migrator.source_file!r}."
                    )
                else:
                    print(
                        f"{colorama.Fore.MAGENTA}{colorama.Style.BRIGHT}"
                        "unfinished deletion of the 'source' column into "
                        f"{migrator.source_file!r}."
                    )
        else:
            PyFunceble.facility.Logger.info(
                "Stopped csv_file_delete_source_column_target. File does not exist."
            )
Esempio n. 10
0
    def get_content(self) -> open:
        """
        Provides a file handler which does let you read the content line by
        line.

        :raise FileNotFoundError:
            When the declared file does not exists.
        """

        file_helper = FileHelper(self.source_file)

        if not file_helper.exists() and bool(
                self.DOWNLOADER):  # pragma: no cover
            ## pragma reason: Safety.
            self.DOWNLOADER.start()

            if not file_helper.exists():
                raise FileNotFoundError(file_helper.path)

        return file_helper.open("r", encoding="utf-8")
Esempio n. 11
0
    def get_content(self) -> Generator[Optional[dict], None, None]:
        """
        Provides a generator which provides the next line to read.
        """

        file_helper = FileHelper(self.source_file)

        if file_helper.exists():
            file_handler = file_helper.open(newline="")
            reader = csv.DictReader(file_handler)

            for row in reader:
                if "tested_at" in row:
                    try:
                        row["tested_at"] = datetime.fromisoformat(
                            row["tested_at"])
                    except (TypeError, ValueError):
                        row["tested_at"] = datetime.utcnow() - timedelta(
                            days=365)

                yield row

            file_handler.close()
Esempio n. 12
0
    def migrate(self) -> "InactiveJSON2CSVMigrator":
        """
        Starts the migration.
        """

        file_helper = FileHelper(self.source_file)

        if file_helper.exists():
            self.dataset.set_authorized(True)
            dataset = {
                "idna_subject": None,
                "status": None,
                "status_source": None,
                "checker_type": "AVAILABILITY",
                "destination": None,
                "source": None,
                "tested_at": None,
                "session_id": None,
            }

            delete_file = True

            with file_helper.open("r", encoding="utf-8") as file_stream:
                for line in file_stream:
                    if (self.continuous_integration and
                            self.continuous_integration.is_time_exceeded()):
                        delete_file = False
                        break

                    line = (line.strip().replace('"',
                                                 "").replace(",", "").replace(
                                                     "{",
                                                     "",
                                                 ).replace("}", ""))

                    if ":" not in line:
                        continue

                    index, value = [x.strip() for x in line.rsplit(":", 1)]

                    if not value:
                        if index.isdigit():
                            dataset[
                                "tested_at"] = datetime.datetime.fromtimestamp(
                                    float(index)).isoformat()
                        else:
                            dataset["source"] = os.path.abspath(index)
                            dataset[
                                "destination"] = get_destination_from_origin(
                                    dataset["source"])

                        continue

                    dataset["idna_subject"] = domain2idna.domain2idna(index)
                    dataset["status"] = value

                    if not dataset["tested_at"]:
                        dataset["tested_at"] = datetime.datetime.utcnow(
                        ).isoformat()

                    PyFunceble.facility.Logger.debug("Decoded dataset:\n%r.",
                                                     dataset)

                    self.dataset.update(dataset)

                    if self.print_action_to_stdout:
                        print_single_line()

                    PyFunceble.facility.Logger.info("Added %r into %r",
                                                    dataset["idna_subject"],
                                                    self.dataset)

            if delete_file:
                file_helper.delete()
                self.done = True
        return self
Esempio n. 13
0
    def start(self, print_dots: bool = False) -> "FilePreloader":
        """
        Starts the pre-loading of the currently set file path.
        """

        self.__load_description()

        broken = False
        file_helper = FileHelper(self.protocol["subject"])
        self.__description[
            self.__matching_index]["hash"] = HashHelper().hash_file(
                file_helper.path)

        if isinstance(self.continue_dataset, CSVContinueDataset):
            self.continue_dataset.set_base_directory(
                self.protocol["output_dir"])

        if (self.__description[self.__matching_index]["checker_type"] !=
                self.protocol["checker_type"]
                or self.__description[self.__matching_index]["subject_type"] !=
                self.protocol["subject_type"]):
            try:
                self.continue_dataset.cleanup()
            except TypeError:
                self.continue_dataset.cleanup(
                    session_id=self.protocol["session_id"])

        if (self.__description[self.__matching_index]["previous_hash"]
                and self.__description[self.__matching_index]["hash"] !=
                self.__description[self.__matching_index]["previous_hash"]):
            # Forces the reading of each lines because there is literally no
            # way to know where something has been changed.
            self.__description[self.__matching_index]["line_number"] = 1

        if (self.__description[self.__matching_index]["checker_type"] !=
                self.protocol["checker_type"]
                or self.__description[self.__matching_index]["subject_type"] !=
                self.protocol["subject_type"]
                or self.__description[self.__matching_index]["hash"] !=
                self.__description[self.__matching_index]["previous_hash"]):
            try:
                with file_helper.open("r", encoding="utf-8") as file_stream:
                    line_num = 1

                    for line in file_stream:
                        if (line_num < self.__description[
                                self.__matching_index]["line_number"]):
                            line_num += 1
                            continue

                        if (self.continuous_integration and
                                self.continuous_integration.is_time_exceeded()
                            ):
                            broken = True
                            break

                        line = line.strip()

                        if self.rpz_policy2subject and "SOA" in line:
                            self.rpz_policy2subject.set_soa(line.split()[0])

                        for subject in get_subjects_from_line(
                                line,
                                self.checker_type,
                                adblock_inputline2subject=self.
                                adblock_inputline2subject,
                                wildcard2subject=self.wildcard2subject,
                                rpz_policy2subject=self.rpz_policy2subject,
                                rpz_inputline2subject=self.
                                rpz_inputline2subject,
                                inputline2subject=self.inputline2subject,
                                subject2complements=self.subject2complements,
                                url2netloc=self.url2netloc,
                                cidr2subject=self.cidr2subject,
                        ):

                            to_send = copy.deepcopy(self.protocol)
                            to_send["subject"] = subject
                            to_send["idna_subject"] = domain2idna(subject)
                            to_send["tested_at"] = datetime.utcnow(
                            ) - timedelta(days=365.25 * 20)

                            if self.inactive_dataset.exists(to_send):
                                print_single_line("I")
                                continue

                            if TesterWorker.should_be_ignored(
                                    subject=to_send["idna_subject"]):
                                print_single_line("X")
                                continue

                            self.continue_dataset.update(to_send,
                                                         ignore_if_exist=True)

                            if print_dots:
                                print_single_line()

                        self.__description[
                            self.__matching_index]["line_number"] += 1
                        line_num += 1
            except KeyboardInterrupt as exception:
                self.__save_description()
                raise exception

        if not broken:
            self.__description[
                self.__matching_index]["previous_hash"] = self.__description[
                    self.__matching_index]["hash"]

        self.__save_description()

        return self
Esempio n. 14
0
    def migrate(self) -> "WhoisJSON2CSVMigrator":
        """
        Provides the migration logic.
        """

        file_helper = FileHelper(self.source_file)

        if file_helper.exists():
            self.dataset.set_authorized(True)
            dataset = {
                "subject": None,
                "idna_subject": None,
                "expiration_date": None,
                "epoch": None,
            }

            delete_file = True

            with file_helper.open("r", encoding="utf-8") as file_stream:
                for line in file_stream:
                    if (self.continuous_integration and
                            self.continuous_integration.is_time_exceeded()):
                        delete_file = False
                        break

                    line = (line.strip().replace('"',
                                                 "").replace(",", "").replace(
                                                     "{",
                                                     "",
                                                 ).replace("}", ""))

                    if ":" not in line:
                        continue

                    index, value = [x.strip() for x in line.split(":")]

                    if not value:
                        dataset["subject"], dataset["idna_subject"] = (
                            index,
                            domain2idna.domain2idna(index),
                        )
                        continue

                    if index == "epoch":
                        dataset["epoch"] = float(value)
                    elif index == "expiration_date":
                        dataset["expiration_date"] = value
                    elif index == "state":
                        PyFunceble.facility.Logger.debug(
                            "Decoded dataset:\n%r.", dataset)

                        self.dataset.update(dataset)

                        if self.print_action_to_stdout:
                            print_single_line()

                        PyFunceble.facility.Logger.info(
                            "Added %r into %r", dataset["idna_subject"],
                            self.dataset)

            if delete_file:
                file_helper.delete()
                self.done = True
        return self
Esempio n. 15
0
    def process_file_sorting(
        cls,
        file: str,
        remove_duplicates: bool = True,
        write_header: bool = True,
        sorting_key: Any = None,
    ) -> None:
        """
        Process the sorting of the given file.

        The idea is to split the file piece by piece and at the end join all
        sorted files. For that job, we create a temporary directory which will
        store the temporary files.

        :param file:
            The file to sort.
        :param remove_duplicates:
            Activates the deletion of duplicates.
        :param write_header:
            Activates the writing of the PyFunceble related header.

            .. warning::
                When this is set to :py:class:`True`, we assume that the header
                itself was already given. Meaning that the first 2 commented
                lines will be excluded from the sorting and regenerated.
        :param sorting_key:
            The sorting key to apply while sorting.

            This is the lambda/function that goes into the :code:`key` argument
            of the :py:class:`sorted` function.
        """

        # pylint: disable=too-many-locals,too-many-statements

        def merge_files(
            files: List[TextIOWrapper],
        ) -> Generator[Tuple[List[TextIOWrapper]], str, None]:
            """
            Merges the given files and yield each "lines" of the merged file.

            :param files:
                The files to merge.
            """

            result = []

            for index, file in enumerate(files):
                try:
                    iterator = iter(file)
                    value = next(iterator)

                    heapq.heappush(
                        result,
                        ((sorting_key(value), index, value, iterator, file)))
                except StopIteration:
                    file.close()

            previous = None
            comment_count = 0
            max_comment_count = 2

            while result:
                ignore = False

                _, index, value, iterator, file = heapq.heappop(result)

                if remove_duplicates and value == previous:
                    ignore = True

                if (write_header and comment_count < max_comment_count
                        and value[0] == "#"):
                    ignore = True
                    max_comment_count += 1

                if not ignore:
                    yield value
                    previous = value

                try:
                    value = next(iterator)

                    heapq.heappush(
                        result,
                        ((sorting_key(value), index, value, iterator, file)))
                except StopIteration:
                    file.close()

        temp_directory = tempfile.TemporaryDirectory()
        temporary_output_file = os.path.join(temp_directory.name,
                                             secrets.token_hex(6))

        if not sorting_key:
            sorting_key = get_best_sorting_key()

        file_helper = FileHelper(file)

        sorted_files = []

        PyFunceble.facility.Logger.info("Started sort of %r.", file)

        with file_helper.open("r",
                              encoding="utf-8",
                              buffering=cls.FILE_BUFFER_SIZE) as file_stream:
            while True:
                to_sort = list(islice(file_stream, cls.MAX_LINES))

                if not to_sort:
                    break

                new_file = open(
                    os.path.join(temp_directory.name, secrets.token_hex(6)),
                    "w+",
                    encoding="utf-8",
                    buffering=cls.FILE_BUFFER_SIZE,
                )
                new_file.writelines(
                    ListHelper(to_sort).remove_duplicates().custom_sort(
                        key_method=sorting_key).subject)
                new_file.flush()
                new_file.seek(0)
                sorted_files.append(new_file)

        with open(temporary_output_file,
                  "w",
                  cls.FILE_BUFFER_SIZE,
                  encoding="utf-8") as file_stream:
            if write_header:
                file_stream.write(FilePrinter.STD_FILE_GENERATION)
                file_stream.write(FilePrinter.get_generation_date_line())
                file_stream.write("\n\n")

            file_stream.writelines(merge_files(sorted_files))

        FileHelper(temporary_output_file).move(file)

        PyFunceble.facility.Logger.info("Finished sort of %r.", file)

        temp_directory.cleanup()