Beispiel #1
0
def main(readcsv=read_csv, method='defaultDense'):
    # Input data set parameters
    train_file = os.path.join('data', 'batch', 'k_nearest_neighbors_train.csv')
    predict_file = os.path.join('data', 'batch',
                                'k_nearest_neighbors_test.csv')

    # Read data. Let's use 5 features per observation
    nFeatures = 5
    nClasses = 5
    train_data = readcsv(train_file, range(nFeatures))
    train_labels = readcsv(train_file, range(nFeatures, nFeatures + 1))

    # Create an algorithm object and call compute
    train_algo = d4p.kdtree_knn_classification_training(nClasses=nClasses)
    # 'weights' is optional argument, let's use equal weights
    # in this case results must be the same as without weights
    weights = np.ones((train_data.shape[0], 1))
    train_result = train_algo.compute(train_data, train_labels, weights)

    # Now let's do some prediction
    predict_data = readcsv(predict_file, range(nFeatures))
    predict_labels = readcsv(predict_file, range(nFeatures, nFeatures + 1))

    # Create an algorithm object and call compute
    predict_algo = d4p.kdtree_knn_classification_prediction()
    predict_result = predict_algo.compute(predict_data, train_result.model)

    # We expect less than 170 mispredicted values
    assert np.count_nonzero(predict_labels != predict_result.prediction) < 170

    return (train_result, predict_result, predict_labels)
    def predict(self, X):
        # Check is fit had been called
        check_is_fitted(self, ['n_features_', 'n_classes_'])

        # Input validation
        X = check_array(X, dtype=[np.single, np.double])
        if X.shape[1] != self.n_features_:
            raise ValueError(
                'Shape of input is different from what was seen in `fit`')

        # Trivial case
        if self.n_classes_ == 1:
            return np.full(X.shape[0], self.classes_[0])

        check_is_fitted(self, ['daal_model_'])

        # Define type of data
        fptype = getFPType(X)

        # Prediction
        predict_algo = d4p.kdtree_knn_classification_prediction(
            fptype=fptype, k=self.n_neighbors)
        predict_result = predict_algo.compute(X, self.daal_model_)

        # Decode labels
        le = preprocessing.LabelEncoder()
        le.classes_ = self.classes_
        return le.inverse_transform(predict_result.prediction.ravel().astype(
            np.int64, copy=False))
    def predict(self, X):
        # Check is fit had been called
        if LooseVersion(sklearn_version) >= LooseVersion("0.22"):
            check_is_fitted(self)
        else:
            check_is_fitted(self, ['n_features_', 'n_classes_'])

        # Input validation
        X = check_array(X, dtype=[np.single, np.double])
        if X.shape[1] != self.n_features_:
            raise ValueError('Shape of input is different from what was seen in `fit`')

        # Trivial case
        if self.n_classes_ == 1:
            return np.full(X.shape[0], self.classes_[0])

        if not hasattr(self, 'daal_model_'):
            raise ValueError(("The class {} instance does not have 'daal_model_' attribute set. "
                              "Call 'fit' with appropriate arguments before using this method.").format(type(self).__name__))

        # Define type of data
        fptype = getFPType(X)

        # Prediction
        predict_algo = d4p.kdtree_knn_classification_prediction(fptype=fptype,
                                                                k=self.n_neighbors)
        predict_result = predict_algo.compute(X, self.daal_model_)

        # Decode labels
        le = preprocessing.LabelEncoder()
        le.classes_ = self.classes_
        return le.inverse_transform(predict_result.prediction.ravel().astype(np.int64, copy=False))
Beispiel #4
0
    def predict(cls, input):
        """For the input, do the predictions and return them.
		Args:
			input (a pandas dataframe): The data on which to do the predictions. There will be
				one prediction per row in the dataframe"""
        with open(param_path, "r") as pf:
            params = json.load(pf)
            predict_algo = kdtree_knn_classification_prediction(
                nClasses=int(params["nClasses"]),
                fptype=params["fptype"],
                method=params["method"],
                dataUseInModel=params["dataUseInModel"],
                k=int(params["k"]),
                distributed=(True
                             if params["distributed"] == "True" else False))
            clf = cls.get_model()
        return predict_algo.compute(input, clf)
Beispiel #5
0
def main():
    # Input data set parameters
    train_file = os.path.join('data', 'batch', 'k_nearest_neighbors_train.csv')
    predict_file = os.path.join('data', 'batch',
                                'k_nearest_neighbors_test.csv')

    # Read data. Let's use 5 features per observation
    nFeatures = 5
    train_data = read_csv(train_file, range(nFeatures))
    train_labels = read_csv(train_file, range(nFeatures, nFeatures + 1))

    # Create an algorithm object and call compute
    train_algo = d4p.kdtree_knn_classification_training()
    train_result = train_algo.compute(train_data, train_labels)

    # Now let's do some prediction
    predict_data = read_csv(predict_file, range(nFeatures))
    predict_labels = read_csv(predict_file, range(nFeatures, nFeatures + 1))

    # Create an algorithm object and call compute
    predict_algo = d4p.kdtree_knn_classification_prediction()
    predict_result = predict_algo.compute(predict_data, train_result.model)

    return (train_result, predict_result, predict_labels)