def main(readcsv=read_csv, method='defaultDense'): # Input data set parameters train_file = os.path.join('data', 'batch', 'k_nearest_neighbors_train.csv') predict_file = os.path.join('data', 'batch', 'k_nearest_neighbors_test.csv') # Read data. Let's use 5 features per observation nFeatures = 5 nClasses = 5 train_data = readcsv(train_file, range(nFeatures)) train_labels = readcsv(train_file, range(nFeatures, nFeatures + 1)) # Create an algorithm object and call compute train_algo = d4p.kdtree_knn_classification_training(nClasses=nClasses) # 'weights' is optional argument, let's use equal weights # in this case results must be the same as without weights weights = np.ones((train_data.shape[0], 1)) train_result = train_algo.compute(train_data, train_labels, weights) # Now let's do some prediction predict_data = readcsv(predict_file, range(nFeatures)) predict_labels = readcsv(predict_file, range(nFeatures, nFeatures + 1)) # Create an algorithm object and call compute predict_algo = d4p.kdtree_knn_classification_prediction() predict_result = predict_algo.compute(predict_data, train_result.model) # We expect less than 170 mispredicted values assert np.count_nonzero(predict_labels != predict_result.prediction) < 170 return (train_result, predict_result, predict_labels)
def predict(self, X): # Check is fit had been called check_is_fitted(self, ['n_features_', 'n_classes_']) # Input validation X = check_array(X, dtype=[np.single, np.double]) if X.shape[1] != self.n_features_: raise ValueError( 'Shape of input is different from what was seen in `fit`') # Trivial case if self.n_classes_ == 1: return np.full(X.shape[0], self.classes_[0]) check_is_fitted(self, ['daal_model_']) # Define type of data fptype = getFPType(X) # Prediction predict_algo = d4p.kdtree_knn_classification_prediction( fptype=fptype, k=self.n_neighbors) predict_result = predict_algo.compute(X, self.daal_model_) # Decode labels le = preprocessing.LabelEncoder() le.classes_ = self.classes_ return le.inverse_transform(predict_result.prediction.ravel().astype( np.int64, copy=False))
def predict(self, X): # Check is fit had been called if LooseVersion(sklearn_version) >= LooseVersion("0.22"): check_is_fitted(self) else: check_is_fitted(self, ['n_features_', 'n_classes_']) # Input validation X = check_array(X, dtype=[np.single, np.double]) if X.shape[1] != self.n_features_: raise ValueError('Shape of input is different from what was seen in `fit`') # Trivial case if self.n_classes_ == 1: return np.full(X.shape[0], self.classes_[0]) if not hasattr(self, 'daal_model_'): raise ValueError(("The class {} instance does not have 'daal_model_' attribute set. " "Call 'fit' with appropriate arguments before using this method.").format(type(self).__name__)) # Define type of data fptype = getFPType(X) # Prediction predict_algo = d4p.kdtree_knn_classification_prediction(fptype=fptype, k=self.n_neighbors) predict_result = predict_algo.compute(X, self.daal_model_) # Decode labels le = preprocessing.LabelEncoder() le.classes_ = self.classes_ return le.inverse_transform(predict_result.prediction.ravel().astype(np.int64, copy=False))
def predict(cls, input): """For the input, do the predictions and return them. Args: input (a pandas dataframe): The data on which to do the predictions. There will be one prediction per row in the dataframe""" with open(param_path, "r") as pf: params = json.load(pf) predict_algo = kdtree_knn_classification_prediction( nClasses=int(params["nClasses"]), fptype=params["fptype"], method=params["method"], dataUseInModel=params["dataUseInModel"], k=int(params["k"]), distributed=(True if params["distributed"] == "True" else False)) clf = cls.get_model() return predict_algo.compute(input, clf)
def main(): # Input data set parameters train_file = os.path.join('data', 'batch', 'k_nearest_neighbors_train.csv') predict_file = os.path.join('data', 'batch', 'k_nearest_neighbors_test.csv') # Read data. Let's use 5 features per observation nFeatures = 5 train_data = read_csv(train_file, range(nFeatures)) train_labels = read_csv(train_file, range(nFeatures, nFeatures + 1)) # Create an algorithm object and call compute train_algo = d4p.kdtree_knn_classification_training() train_result = train_algo.compute(train_data, train_labels) # Now let's do some prediction predict_data = read_csv(predict_file, range(nFeatures)) predict_labels = read_csv(predict_file, range(nFeatures, nFeatures + 1)) # Create an algorithm object and call compute predict_algo = d4p.kdtree_knn_classification_prediction() predict_result = predict_algo.compute(predict_data, train_result.model) return (train_result, predict_result, predict_labels)