Esempi in Python per _reshape, esempi in Python per igel.utils._reshape

Esempio n. 1

0

Mostra file

    def predict(self):
        """
        use a pre-fitted model to make predictions and save them as csv
        @return: None
        """
        try:
            model = self._load_model(f=self.model_path)
            x_val = self._prepare_predict_data(
            )  # the same is used for clustering
            y_pred = model.predict(x_val)
            y_pred = _reshape(y_pred)
            logger.info(
                f"predictions shape: {y_pred.shape} | shape len: {len(y_pred.shape)}"
            )
            logger.info(f"predict on targets: {self.target}")
            df_pred = pd.DataFrame.from_dict({
                self.target[i]: y_pred[:,
                                       i] if len(y_pred.shape) > 1 else y_pred
                for i in range(len(self.target))
            })

            logger.info(f"saving the predictions to {self.prediction_file}")
            df_pred.to_csv(self.prediction_file)

        except Exception as e:
            logger.exception(f"Error while preparing predictions: {e}")

Esempio n. 2

0

Mostra file

    def _get_predictions(self, **kwargs):
        """
        use a pre-fitted model to generate predictions
        @return: None
        """
        try:
            model = self._load_model(f=self.model_path)
            x_val = (
                self._prepare_predict_data()
            )  # the same is used for clustering
            y_pred = model.predict(x_val)
            y_pred = _reshape(y_pred)
            logger.info(
                f"predictions shape: {y_pred.shape} | shape len: {len(y_pred.shape)}"
            )
            logger.info(f"predict on targets: {self.target}")
            if not self.target:
                self.target = ["result"]
            df_pred = pd.DataFrame.from_dict(
                {
                    self.target[i]: y_pred[:, i]
                    if len(y_pred.shape) > 1
                    else y_pred
                    for i in range(len(self.target))
                }
            )
            return df_pred

        except Exception as e:
            logger.exception(f"Error while preparing predictions: {e}")

Esempio n. 3

0

Mostra file

File: igel.py Progetto: PhilanaKatharina/igel

    def _process_data(self, fit=True):
        """
        read and return data as x and y
        @return: list of separate x and y
        """
        assert isinstance(self.target, list), "provide target(s) as a list in the yaml file"
        assert len(self.target) > 0, "please provide at least a target to predict"
        try:
            dataset = pd.read_csv(self.data_path)
            logger.info(f"dataset shape: {dataset.shape}")
            attributes = list(dataset.columns)
            logger.info(f"dataset attributes: {attributes}")

            # handle missing values in the dataset
            preprocess_props = self.dataset_props.get('preprocess', None)
            if preprocess_props:
                # preprocessing strategy: mean, median, mode etc..
                strategy = preprocess_props.get('missing_values')
                if strategy:
                    dataset = handle_missing_values(dataset,
                                                    strategy=strategy)

            if any(col not in attributes for col in self.target):
                raise Exception("chosen target(s) to predict must exist in the dataset")

            y = pd.concat([dataset.pop(x) for x in self.target], axis=1)
            x = _reshape(dataset.to_numpy())
            y = _reshape(y.to_numpy())
            logger.info(f"y shape: {y.shape} and x shape: {x.shape}")
            if not fit:
                return x, y

            split_options = self.dataset_props.get('split')
            test_size = split_options.get('test_size')
            shuffle = split_options.get('shuffle')
            stratify = split_options.get('stratify')
            x_train, x_test, y_train, y_test = train_test_split(x,
                                                                y,
                                                                test_size=test_size,
                                                                shuffle=shuffle,
                                                                stratify=None if not stratify or stratify.lower() == "none" else stratify)
            return x_train, y_train, x_test, y_test

        except Exception as e:
            logger.exception(f"error occured while preparing the data: {e.args}")

Esempio n. 4

0

Mostra file

File: igel.py Progetto: PhilanaKatharina/igel

    def _prepare_predict_data(self):
        """
        read and return x_pred
        @return: x_pred
        """
        try:
            x_val = pd.read_csv(self.data_path)
            logger.info(f"shape of the prediction data: {x_val.shape}")

            return _reshape(x_val)
        except Exception as e:
            logger.exception(f"exception while preparing prediction data: {e}")

Esempio n. 5

0

Mostra file

    def _process_data(self, target='fit'):
        """
        read and return data as x and y
        @return: list of separate x and y
        """
        assert isinstance(self.target,
                          list), "provide target(s) as a list in the yaml file"
        if self.model_type != "clustering":
            assert len(
                self.target) > 0, "please provide at least a target to predict"

        try:
            read_data_options = self.dataset_props.get('read_data_options',
                                                       None)
            dataset = pd.read_csv(
                self.data_path) if not read_data_options else pd.read_csv(
                    self.data_path, **read_data_options)
            logger.info(f"dataset shape: {dataset.shape}")
            attributes = list(dataset.columns)
            logger.info(f"dataset attributes: {attributes}")

            # handle missing values in the dataset
            preprocess_props = self.dataset_props.get('preprocess', None)
            if preprocess_props:
                # handle encoding
                encoding = preprocess_props.get('encoding')
                if encoding:
                    encoding_type = encoding.get('type', None)
                    column = encoding.get('column', None)
                    if column in attributes:
                        dataset, classes_map = encode(
                            df=dataset,
                            encoding_type=encoding_type.lower(),
                            column=column)
                        if classes_map:
                            self.dataset_props[
                                'label_encoding_classes'] = classes_map
                            logger.info(
                                f"adding classes_map to dataset props: \n{classes_map}"
                            )
                        logger.info(
                            f"shape of the dataset after encoding => {dataset.shape}"
                        )

                # preprocessing strategy: mean, median, mode etc..
                strategy = preprocess_props.get('missing_values')
                if strategy:
                    dataset = handle_missing_values(dataset, strategy=strategy)
                    logger.info(
                        f"shape of the dataset after handling missing values => {dataset.shape}"
                    )

            if target == 'predict' or target == 'fit_cluster':
                x = _reshape(dataset.to_numpy())
                if not preprocess_props:
                    return x
                scaling_props = preprocess_props.get('scale', None)
                if not scaling_props:
                    return x
                else:
                    scaling_method = scaling_props.get('method', None)
                    return normalize(x, method=scaling_method)

            if any(col not in attributes for col in self.target):
                raise Exception(
                    "chosen target(s) to predict must exist in the dataset")

            y = pd.concat([dataset.pop(x) for x in self.target], axis=1)
            x = _reshape(dataset.to_numpy())
            y = _reshape(y.to_numpy())
            logger.info(f"y shape: {y.shape} and x shape: {x.shape}")

            # handle data scaling
            if preprocess_props:
                scaling_props = preprocess_props.get('scale', None)
                if scaling_props:
                    scaling_method = scaling_props.get('method', None)
                    scaling_target = scaling_props.get('target', None)
                    if scaling_target == 'all':
                        x = normalize(x, method=scaling_method)
                        y = normalize(y, method=scaling_method)
                    elif scaling_target == 'inputs':
                        x = normalize(x, method=scaling_method)
                    elif scaling_target == 'outputs':
                        y = normalize(y, method=scaling_method)

            if target == 'evaluate':
                return x, y

            split_options = self.dataset_props.get('split', None)
            if not split_options:
                return x, y, None, None
            test_size = split_options.get('test_size')
            shuffle = split_options.get('shuffle')
            stratify = split_options.get('stratify')
            x_train, x_test, y_train, y_test = train_test_split(
                x,
                y,
                test_size=test_size,
                shuffle=shuffle,
                stratify=None
                if not stratify or stratify.lower() == "default" else stratify)

            return x_train, y_train, x_test, y_test

        except Exception as e:
            logger.exception(
                f"error occured while preparing the data: {e.args}")