def main(readcsv=read_csv, method='defaultDense'):
    # input data file
    infile = "./data/batch/decision_tree_train.csv"
    prunefile = "./data/batch/decision_tree_prune.csv"
    testfile = "./data/batch/decision_tree_test.csv"

    # Configure a training object (5 classes)
    train_algo = d4p.decision_tree_classification_training(5)

    # Read data. Let's use 5 features per observation
    data = readcsv(infile, range(5), t=np.float32)
    labels = readcsv(infile, range(5, 6), t=np.float32)
    prunedata = readcsv(prunefile, range(5), t=np.float32)
    prunelabels = readcsv(prunefile, range(5, 6), t=np.float32)
    train_result = train_algo.compute(data, labels, prunedata, prunelabels)

    # Now let's do some prediction
    predict_algo = d4p.decision_tree_classification_prediction()
    # read test data (with same #features)
    pdata = readcsv(testfile, range(5), t=np.float32)
    plabels = readcsv(testfile, range(5, 6), t=np.float32)
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # Prediction result provides prediction
    assert(predict_result.prediction.shape == (pdata.shape[0], 1))

    return (train_result, predict_result, plabels)
Ejemplo n.º 2
0
    def _daal4py_fit(self, X, y, w, pruning_set=None):
        X_fptype = getFPType(X)
        X = make2d(X)
        y = make2d(y)

        if pruning_set is None:
            _pruning = "none"
            _pruning_X = None
            _pruning_y = None
        else:
            _pruning = "reducedErrorPruning"
            if isinstance(pruning_set,
                          (tuple, list)) and len(pruning_set) == 2:
                _pruning_X, _pruning_y = pruning_set
                check_consistent_length(_pruning_X, _pruning_y)
                _pruning_X = make2d(_pruning_X)
                _pruning_y = make2d(_pruning_y)
            else:
                raise ValueError("pruning_set parameter is expected to be "
                                 "a tuple of pruning features and pruning "
                                 "dependent variables")

        if w is not None:
            w = make2d(np.asarray(w))

        daal_max_tree_depth = 0 if (
            self.max_depth is None) else int(self.max_depth) + 1
        alg = d4p.decision_tree_classification_training(
            fptype=X_fptype,
            method="defaultDense",
            nClasses=int(self.n_classes_),
            splitCriterion=self.split_criterion,
            maxTreeDepth=daal_max_tree_depth,
            minObservationsInLeafNodes=int(self.min_observations_in_leaf_node),
            pruning=_pruning)
        res = alg.compute(X,
                          y,
                          dataForPruning=_pruning_X,
                          labelsForPruning=_pruning_y,
                          weights=w)
        self.daal_model_ = res.model
        self._cached_tree_state_ = None
Ejemplo n.º 3
0
    def fit(self, X, y):
        if self.split_criterion not in ('gini', 'infoGain'):
            raise ValueError('Parameter "split_criterion" must be '
                             '"gini" or "infoGain".')
        if not isinstance(self.max_tree_depth, numbers.Integral) or \
                self.max_tree_depth < 0:
            raise ValueError('Parameter "max_tree_depth" must be '
                             'positive integer value or zero.')
        if not isinstance(self.min_observations_in_leaf_node, numbers.Integral) or \
                self.min_observations_in_leaf_node <= 0:
            raise ValueError(
                'Parameter "min_observations_in_leaf_node" must be '
                'non-zero positive integer value.')
        if not isinstance(self.max_iterations, numbers.Integral) or \
                self.max_iterations <= 0:
            raise ValueError('Parameter "max_iterations" must be '
                             'non-zero positive integer value.')
        if self.learning_rate <= 0:
            raise ValueError('Parameter "learning_rate" must be '
                             'non-zero positive value.')
        # it is not clear why it is so but we will get error from
        # Intel(R) oneAPI Data Analytics
        # Library otherwise
        if self.accuracy_threshold < 0 and self.accuracy_threshold >= 1:
            raise ValueError('Parameter "accuracy_threshold" must be '
                             'more or equal to 0 and less than 1.')

        # Check that X and y have correct shape
        X, y = check_X_y(X, y, y_numeric=False, dtype=[np.single, np.double])

        check_classification_targets(y)

        # Encode labels
        le = preprocessing.LabelEncoder()
        le.fit(y)
        self.classes_ = le.classes_
        y_ = le.transform(y)

        # Convert to 2d array
        y_ = y_.reshape((-1, 1))

        self.n_classes_ = len(self.classes_)

        self.n_features_in_ = X.shape[1]

        # Classifier can't train when only one class is present.
        # Trivial case
        if self.n_classes_ == 1:
            return self

        # Define type of data
        fptype = getFPType(X)

        # Fit the model
        tr = d4p.decision_tree_classification_training(
            fptype=fptype,
            nClasses=self.n_classes_,
            # this parameter is strict upper bound in DAAL
            maxTreeDepth=self.max_tree_depth + 1,
            minObservationsInLeafNodes=self.min_observations_in_leaf_node,
            splitCriterion=self.split_criterion,
            pruning='none')

        pr = d4p.decision_tree_classification_prediction(
            fptype=fptype, nClasses=self.n_classes_)

        train_algo = d4p.adaboost_training(
            fptype=fptype,
            nClasses=self.n_classes_,
            weakLearnerTraining=tr,
            weakLearnerPrediction=pr,
            maxIterations=self.max_iterations,
            learningRate=self.learning_rate,
            accuracyThreshold=self.accuracy_threshold)

        train_result = train_algo.compute(X, y_)

        # Store the model
        self.daal_model_ = train_result.model

        # Return the classifier
        return self