def __init__(self, conf: Conf):
        super().__init__()

        # validate python version
        Validation.python_version(
            Evaluator.REQUIRED_PYTHON, f"Unsupported Python version.\n"
            f"Required Python {Evaluator.REQUIRED_PYTHON[0]}.{Evaluator.REQUIRED_PYTHON[1]} or higher."
        )
        Validation.can_read(
            conf.dataset_train,
            f"Training set file *must* exists and be readable. "
            f"Current file: '{conf.dataset_train}'.\n"
            f"Training set path (fully qualified) can be specified in conf.ini file or using Conf object."
        )
        Validation.can_read(
            conf.dataset_test, f"Test set file *must* exists and be readable. "
            f"Current file: '{conf.dataset_test}'.\n"
            f"Test set path (fully qualified) can be specified in conf.ini file or using Conf object."
        )

        self.__LOG = LogManager.get_instance().logger(LogManager.Logger.EVAL)
        self.__conf = conf

        # using full dataset as training set
        self.__training = Set(pd.read_csv(self.conf.dataset_train))

        # load test set if it has same format as training_set.csv provided
        # as example file see ./res/dataset/test_set_no_index.csv
        self.__test = Set(pd.read_csv(self.conf.dataset_test))
        # load test set if it has header (F1-20 and CLASS row) and index, so a test test saved using
        #   command pd.to_csv('/path', index=True)
        # as example file see ./res/dataset/test_set_index.csv
        # self.__test = Set(pd.read_csv(self.conf.dataset_test, index_col=0))
        # load test set if it does not have header row (does not have F1-20 and CLASS row) and
        #   it is was not saved using command pd.to_csv('/path', index=True), so it has not index
        # as example file see ./res/dataset/test_set_no_index_features.csv
        # self.__test = Set(pd.read_csv(self.conf.dataset_test, header=None,
        #                               names=[f"F{i}" for i in range(1, 21)] + ["CLASS"]))

        # current classifiers used
        self.__classifiers = {
            Evaluator._MULTILAYER_PERCEPTRON:
            None,
            Evaluator._SUPPORT_VECTOR_MACHINE:
            None,
            # Evaluator._DECISION_TREE: None,
            Evaluator._RANDOM_FOREST:
            None,
            # Evaluator._KNEAREST_NEIGHBORS: None,
            # Evaluator._STOCHASTIC_GRADIENT_DESCENT: None,
            Evaluator._ADA_BOOST:
            None,
            # Evaluator._NAIVE_BAYES: None,
            # Evaluator._KMEANS: None
        }
Ejemplo n.º 2
0
    def __check_permissions(self):
        """
        Check permissions on directories before performing the operations

        :raise ValueError if input directory is equal to output directory
        :raise NotADirectoryError
        :raise PermissionError
        :raise LinksError
        """
        sources = self.__dispatcher_config.dispatcher_sources
        destinations = self.__dispatcher_config.dispatcher_destinations

        for source in sources:
            Validation.is_dir(
                sources[source],
                f"Missing input directory '{sources[source]}'"
            )
            Validation.can_read(
                sources[source],
                f"Missing read permission on '{sources[source]}'"
            )
            Validation.can_write(
                sources[source],
                f"Missing write permission on '{sources[source]}'"
            )

        for destination in destinations:
            try:
                Validation.is_dir_writeable(
                    destinations[destination],
                    f"Directory '{destinations[destination]}' *must* exists and be writable"
                )
            except NotADirectoryError:
                parent_directory = Path(destinations[destination]).parent
                Validation.can_write(
                    parent_directory,
                    f"Missing write permission on '{parent_directory}'"
                )
                FileObserver.__LOG.info(f"Creating missing destination directory '{destinations[destination]}'")
                # create if not exists
                Path(destinations[destination]).mkdir(parents=True, exist_ok=True)

            for source in sources:
                Validation.are_symlinks(
                    sources[source],
                    destinations[destination],
                    f"Input ('{sources[source]}') and output ('{destinations[destination]}') directory can not be the same (or symlinks)"
                )