Ejemplo n.º 1
0
    def score_from_file(
        self,
        file_path: str,
        target_column: Optional[str] = None,
        encoding: Optional[str] = None,
        **kwargs,
    ) -> float:
        """ Calculate `self.scoring` metric of the model on data in the file.

        Parameters
        ----------
        file_path: str
            A csv or ARFF file with which to calculate the score.
        target_column: str, optional (default=None)
            Specifies which column the model should predict.
            If left None, the last column is taken to be the target.
        encoding: str, optional
            Encoding of the ARFF file.
        **kwargs:
            Any additional arguments for calls to pandas.read_csv or arff.load.

        Returns
        -------
        float
            The score obtained on the given test data according to the `scoring` metric.
        """
        x, y = X_y_from_file(file_path,
                             split_column=target_column,
                             encoding=encoding,
                             **kwargs)
        return self.score(x, y)
Ejemplo n.º 2
0
    def fit_from_file(
        self,
        file_path: str,
        target_column: Optional[str] = None,
        encoding: Optional[str] = None,
        warm_start: Optional[List[Individual]] = None,
        **kwargs,
    ) -> None:
        """ Find and fit a model to predict the target column (last) from other columns.

        Parameters
        ----------
        file_path: str
            Path to a csv or ARFF file containing the training data.
        target_column: str, optional (default=None)
            Specifies which column the model should predict.
            If left None, the last column is taken to be the target.
        encoding: str, optional
            Encoding of the file.
        warm_start: List[Individual], optional (default=None)
            A list of individual to start the search  procedure with.
            If None is given, random start candidates are generated.
        **kwargs:
            Any additional arguments for calls to pandas.read_csv or arff.load.

        """
        x, y = X_y_from_file(file_path, target_column, encoding, **kwargs)
        self.fit(x, y, warm_start)
Ejemplo n.º 3
0
    def predict_proba_from_file(
        self,
        arff_file_path: str,
        target_column: Optional[str] = None,
        encoding: Optional[str] = None,
    ):
        """ Predict the class probabilities for input in the arff_file.

        Parameters
        ----------
        arff_file_path: str
            An ARFF file with the same columns as the one that used in fit.
            Target column must be present in file, but its values are ignored.
        target_column: str, optional (default=None)
            Specifies which column the model should predict.
            If left None, the last column is taken to be the target.
        encoding: str, optional
            Encoding of the ARFF file.

        Returns
        -------
        numpy.ndarray
            Numpy array with class probabilities.
            The array is of shape (N, K) where N is len(X),
            and K is the number of class labels found in `y` of `fit`.
        """
        x, _ = X_y_from_file(arff_file_path, target_column, encoding)
        x = self._prepare_for_prediction(x)
        return self._predict_proba(x)
Ejemplo n.º 4
0
    def predict_from_file(
        self,
        file_path: str,
        target_column: Optional[str] = None,
        encoding: Optional[str] = None,
        **kwargs,
    ) -> np.ndarray:
        """ Predict the target for input found in the ARFF file.

        Parameters
        ----------
        file_path: str
            A csv or ARFF file with the same columns as the one that used in fit.
            Target column must be present in file, but its values are ignored.
        target_column: str, optional (default=None)
            Specifies which column the model should predict.
            If left None, the last column is taken to be the target.
        encoding: str, optional
            Encoding of the ARFF file.
        **kwargs:
            Any additional arguments for calls to pandas.read_csv or arff.load.

        Returns
        -------
        numpy.ndarray
            array with predictions for each row in the ARFF file.
        """
        x, _ = X_y_from_file(file_path,
                             split_column=target_column,
                             encoding=encoding,
                             **kwargs)
        x = self._prepare_for_prediction(x)
        return self._predict(x)
Ejemplo n.º 5
0
Archivo: cli.py Proyecto: prabhant/gama
def main():
    args = parse_args()

    print("CLI: Processing input")
    if not os.path.exists(args.input_file.lower()):
        raise FileNotFoundError(args.input_file)
    if args.input_file.lower().split(".")[-1] not in ["csv", "arff"]:
        raise ValueError("Unknown file extension. Please use csv or arff.")

    kwargs = {}
    if args.input_file.lower().endswith(".csv") and args.separator is not None:
        kwargs["sep"] = args.seperator

    x, y = X_y_from_file(
        file_path=args.input_file.lower(),
        split_column=args.target,
        **kwargs,
    )
    if args.mode is None:
        if is_categorical_dtype(y.dtype):
            args.mode = "classification"
        else:
            args.mode = "regression"
        print(f"Detected a {args.mode} problem.")

    print("CLI: Initializing GAMA")
    log_level = logging.INFO if args.verbose else logging.WARNING
    configuration = dict(
        regularize_length=args.prefer_short,
        max_total_time=args.time_limit_m * 60,
        max_eval_time=args.max_eval_time_m * 60,
        n_jobs=args.n_jobs,
        verbosity=log_level,
        output_directory=args.outdir,
        store="nothing" if args.dry_run else "logs",
    )
    if args.metric:
        configuration["scoring"] = args.metric

    if args.mode == "regression":
        automl = GamaRegressor(**configuration)
    elif args.mode == "classification":
        automl = GamaClassifier(**configuration)
    else:
        raise ValueError(f"Mode {args.mode} is not valid (--mode).")

    if not args.dry_run:
        print("CLI: Starting model search")
        automl.fit(x, y)

        # == Model Export ===
        print("CLI: Exporting models.")
        with open(args.output_file, "wb") as fh:
            pickle.dump(automl.model, fh)

        if args.export_python is not None:
            automl.export_script(args.export_python, raise_if_exists=False)
    else:
        automl.cleanup("all")
    print("done!")
Ejemplo n.º 6
0
 def test_X_y_from_file_default_split_column(self):
     _, y = X_y_from_file(ARFF_CJS)
     assert y.name == "INTERNODE_29"
Ejemplo n.º 7
0
 def test_X_y_from_file_invalid_split_column(self):
     with pytest.raises(ValueError,
                        match="No column named NOT_EXIST found"):
         X_y_from_file(ARFF_CJS, split_column="NOT_EXIST")
Ejemplo n.º 8
0
 def test_X_y_from_arff(self):
     x, y = X_y_from_file(ARFF_CJS, split_column="TR")
     _test_x_y_d23380(x, y)
Ejemplo n.º 9
0
 def test_X_y_from_csv(self):
     x, y = X_y_from_file(CSV_CJS_FULL, split_column="TR")
     _test_x_y_d23380(x, y)