Beispiel #1
0
def main(readcsv=read_csv, method='defaultDense'):
    # Input data set parameters
    train_file = os.path.join('data', 'batch', 'k_nearest_neighbors_train.csv')
    predict_file = os.path.join('data', 'batch',
                                'k_nearest_neighbors_test.csv')

    # Read data. Let's use 5 features per observation
    nFeatures = 5
    nClasses = 5
    train_data = readcsv(train_file, range(nFeatures))
    train_labels = readcsv(train_file, range(nFeatures, nFeatures + 1))

    # Create an algorithm object and call compute
    train_algo = d4p.kdtree_knn_classification_training(nClasses=nClasses)
    # 'weights' is optional argument, let's use equal weights
    # in this case results must be the same as without weights
    weights = np.ones((train_data.shape[0], 1))
    train_result = train_algo.compute(train_data, train_labels, weights)

    # Now let's do some prediction
    predict_data = readcsv(predict_file, range(nFeatures))
    predict_labels = readcsv(predict_file, range(nFeatures, nFeatures + 1))

    # Create an algorithm object and call compute
    predict_algo = d4p.kdtree_knn_classification_prediction()
    predict_result = predict_algo.compute(predict_data, train_result.model)

    # We expect less than 170 mispredicted values
    assert np.count_nonzero(predict_labels != predict_result.prediction) < 170

    return (train_result, predict_result, predict_labels)
Beispiel #2
0
def main():
    # Input data set parameters
    train_file = os.path.join('data', 'batch', 'k_nearest_neighbors_train.csv')
    predict_file = os.path.join('data', 'batch',
                                'k_nearest_neighbors_test.csv')

    # Read data. Let's use 5 features per observation
    nFeatures = 5
    train_data = read_csv(train_file, range(nFeatures))
    train_labels = read_csv(train_file, range(nFeatures, nFeatures + 1))

    # Create an algorithm object and call compute
    train_algo = d4p.kdtree_knn_classification_training()
    train_result = train_algo.compute(train_data, train_labels)

    # Now let's do some prediction
    predict_data = read_csv(predict_file, range(nFeatures))
    predict_labels = read_csv(predict_file, range(nFeatures, nFeatures + 1))

    # Create an algorithm object and call compute
    predict_algo = d4p.kdtree_knn_classification_prediction()
    predict_result = predict_algo.compute(predict_data, train_result.model)

    return (train_result, predict_result, predict_labels)
    def fit(self, X, y):
        # Check the algorithm parameters
        if not ((isinstance(self.n_neighbors, numbers.Integral)) and
                (self.n_neighbors > 0)):
            raise ValueError('Parameter "n_neighbors" must be '
                             'non-zero positive integer value.')
        if not self.weights == 'uniform':
            warnings.warn('Value "{}" for argument "weights" not supported. '
                          'Using default "uniform".'.format(self.weights),
                          RuntimeWarning,
                          stacklevel=2)
            self.weights = 'uniform'
        if not self.algorithm == 'kd_tree':
            warnings.warn('Value "{}" for argument "algorithm" not supported. '
                          'Using default "kd_tree".'.format(self.algorithm),
                          RuntimeWarning,
                          stacklevel=2)
            self.algorithm = 'kd_tree'
        if not self.leaf_size == 31:
            warnings.warn('Value "{}" for argument "leaf_size" not supported. '
                          'Using default "31".'.format(self.leaf_size),
                          RuntimeWarning,
                          stacklevel=2)
            self.leaf_size = 31
        if not self.p == 2:
            warnings.warn('Value "{}" for argument "p" not supported. '
                          'Using default "2".'.format(self.p),
                          RuntimeWarning,
                          stacklevel=2)
            self.p = 2
        if not self.metric == 'minkowski':
            warnings.warn('Value "{}" for argument "metric" not supported. '
                          'Using default "minkowski".'.format(self.metric),
                          RuntimeWarning,
                          stacklevel=2)
            self.metric = 'minkowski'
        if self.metric_params is not None:
            warnings.warn(
                'Argument "metric_params" not (yet) supported. '
                'Ignored.',
                RuntimeWarning,
                stacklevel=2)
            self.metric_params = None
        if self.n_jobs is not None:
            warnings.warn(
                'Argument "n_jobs" not (yet) supported. '
                'Ignored. All available processors will be used.',
                RuntimeWarning,
                stacklevel=2)
            self.n_jobs = None

        # Check that X and y have correct shape
        X, y = check_X_y(X, y, y_numeric=False, dtype=[np.single, np.double])

        check_classification_targets(y)

        # Encode labels
        le = preprocessing.LabelEncoder()
        le.fit(y)
        self.classes_ = le.classes_
        y_ = le.transform(y)

        # Convert to 2d array
        y_ = y_.reshape((-1, 1))

        self.n_classes_ = len(self.classes_)

        self.n_features_ = X.shape[1]

        # Classifier can't train when only one class is present.
        # Trivial case
        if self.n_classes_ == 1:
            return self

        # Get random seed
        rs = check_random_state(None)
        self.seed_ = rs.randint(np.iinfo('i').max)

        # Define type of data
        fptype = getFPType(X)

        # Fit the model
        train_algo = d4p.kdtree_knn_classification_training(
            fptype=fptype, engine=d4p.engines_mcg59(seed=self.seed_))
        train_result = train_algo.compute(X, y_)

        # Store the model
        self.daal_model_ = train_result.model

        # Return the classifier
        return self