Esempio n. 1
0
    def __init__(self, **cli_args):
        self.cmd: str = cli_args.get("cmd")
        self.data_path: str = cli_args.get("data_path")
        self.config_path: str = cli_args.get("yaml_path")
        self.task = cli_args.get("task")
        logger.info(f"Executing command: {self.cmd}")
        logger.info(f"Reading data from: {self.data_path}")
        logger.info(f"Reading yaml configs from: {self.config_path}")

        if self.cmd == "train":
            if not self.config_path:
                self.model_type = self.task
            else:
                self.file_ext: str = self.config_path.split(".")[1]

                if self.file_ext not in ["yaml", "yml", "json"]:
                    raise Exception(
                        "Configuration file can be a yaml or a json file!"
                    )

                self.configs: dict = (
                    read_json(self.config_path)
                    if self.file_ext == "json"
                    else read_yaml(self.config_path)
                )

                self.dataset_props: dict = self.configs.get(
                    "dataset", self.defaults.dataset_props
                )
                self.model_props: dict = self.configs.get(
                    "model", self.defaults.model_props
                )
                self.training_args: dict = self.configs.get(
                    "training", self.defaults.training_args
                )
                self.model_args = self.model_props.get("arguments")
                self.model_type = self.task or self.model_props.get("type")

        else:
            self.model_path = cli_args.get(
                "model_path", self.defaults.model_path
            )
            logger.info(f"path of the pre-fitted model => {self.model_path}")
            self.prediction_file = cli_args.get(
                "prediction_file", self.defaults.prediction_file
            )
            # set description.json if provided:
            self.description_file = cli_args.get(
                "description_file", self.defaults.description_file
            )
            # load description file to read stored training parameters
            with open(self.description_file) as f:
                dic = json.load(f)
                self.model_type: str = dic.get("task")  # type of the model
                self.dataset_props: dict = dic.get(
                    "dataset_props"
                )  # dataset props entered while fitting
        getattr(self, self.cmd)()
Esempio n. 2
0
    def __init__(self, **cli_args):
        logger.info(f"Entered CLI args: {cli_args}")
        logger.info(f"Executing command: {cli_args.get('cmd')} ...")
        self.data_path: str = cli_args.get('data_path')  # path to the dataset
        logger.info(f"reading data from {self.data_path}")
        self.command = cli_args.get('cmd', None)
        if not self.command or self.command not in self.available_commands:
            raise Exception(f"You must enter a valid command.\n"
                            f"available commands: {self.available_commands}")

        if self.command == "fit":
            self.yml_path = cli_args.get('yaml_path')
            file_ext = self.yml_path.split('.')[-1]
            logger.info(f"You passed the configurations as a {file_ext} file.")

            self.yaml_configs = read_yaml(self.yml_path) if file_ext == 'yaml' else read_json(self.yml_path)
            logger.info(f"your chosen configuration: {self.yaml_configs}")

            # dataset options given by the user
            self.dataset_props: dict = self.yaml_configs.get('dataset', self.default_dataset_props)
            # model options given by the user
            self.model_props: dict = self.yaml_configs.get('model', self.default_model_props)
            # list of target(s) to predict
            self.target: list = self.yaml_configs.get('target')

            self.model_type: str = self.model_props.get('type')
            logger.info(f"dataset_props: {self.dataset_props} \n"
                        f"model_props: {self.model_props} \n "
                        f"target: {self.target} \n")

            # handle random numbers generation
            random_num_options = self.dataset_props.get('random_numbers', None)
            if random_num_options:
                generate_reproducible = random_num_options.get('generate_reproducible', None)
                if generate_reproducible:
                    logger.info("You provided the generate reproducible results option.")
                    seed = random_num_options.get('seed', 42)
                    np.random.seed(seed)
                    logger.info(f"Setting a seed = {seed} to generate same random numbers on each experiment..")

        # if entered command is evaluate or predict, then the pre-fitted model needs to be loaded and used
        else:
            self.model_path = cli_args.get('model_path', self.default_model_path)
            logger.info(f"path of the pre-fitted model => {self.model_path}")
            # load description file to read stored training parameters
            with open(self.description_file, 'r') as f:
                dic = json.load(f)
                self.target: list = dic.get("target")  # target to predict as a list
                self.model_type: str = dic.get("type")  # type of the model -> regression, classification or clustering
                self.dataset_props: dict = dic.get('dataset_props')  # dataset props entered while fitting
        getattr(self, self.command)()