예제 #1
0
def main(readcsv=read_csv, method='defaultDense'):
    nFeatures = 20
    nClasses = 5

    # read training data from file with nFeatures features per observation and 1 class label
    train_file = 'data/batch/svm_multi_class_train_dense.csv'
    train_data = readcsv(train_file, range(nFeatures))
    train_labels = readcsv(train_file, range(nFeatures, nFeatures + 1))

    # Create and configure algorithm object
    algorithm = d4p.multi_class_classifier_training(nClasses=nClasses,
                                                    training=d4p.svm_training(method='thunder'),
                                                    prediction=d4p.svm_prediction())
    
    # Pass data to training. Training result provides model
    train_result = algorithm.compute(train_data, train_labels)
    assert train_result.model.NumberOfFeatures == nFeatures
    assert isinstance(train_result.model.TwoClassClassifierModel(0), d4p.svm_model)

    # Now the prediction stage
    # Read data
    pred_file = 'data/batch/svm_multi_class_test_dense.csv'
    pred_data = readcsv(pred_file, range(nFeatures))
    pred_labels = readcsv(pred_file, range(nFeatures, nFeatures + 1))
    
    # Create an algorithm object to predict multi-class SVM values
    algorithm = d4p.multi_class_classifier_prediction(nClasses,
                                                      training=d4p.svm_training(method='thunder'),
                                                      prediction=d4p.svm_prediction())
    # Pass data to prediction. Prediction result provides prediction
    pred_result = algorithm.compute(pred_data, train_result.model)
    assert pred_result.prediction.shape == (train_data.shape[0], 1)
    
    return (pred_result, pred_labels)
예제 #2
0
def _daal4py_predict(self, X):
    X_fptype = getFPType(X)
    num_classes = len(self.classes_)

    kf = _daal4py_kf(self.kernel, X_fptype, gamma=self._gamma)

    svm_predict = daal4py.svm_prediction(fptype=X_fptype,
                                         method='defaultDense',
                                         kernel=kf)
    if num_classes == 2:
        alg = svm_predict
    else:
        alg = daal4py.multi_class_classifier_prediction(
            nClasses=num_classes,
            fptype=X_fptype,
            maxIterations=int(self.max_iter if self.max_iter > 0 else 2**30),
            accuracyThreshold=float(self.tol),
            pmethod="voteBased",
            tmethod='oneAgainstOne',
            prediction=svm_predict)

    predictionRes = alg.compute(X, self.daal_model_)

    res = predictionRes.prediction
    res = res.ravel()

    if num_classes == 2:
        # Convert from Intel(R) DAAL format back to original classes
        np.greater(res, 0, out=res)

    return res
예제 #3
0
def compute(train_indep_data,
            train_dep_data,
            test_indep_data,
            method='defaultDense'):
    # Configure a SVM object to use linear kernel
    kernel_function = d4p.kernel_function_linear(fptype='float',
                                                 method='defaultDense',
                                                 k=1.0,
                                                 b=0.0)
    train_algo = d4p.svm_training(fptype='float',
                                  method=method,
                                  kernel=kernel_function,
                                  C=1.0,
                                  accuracyThreshold=1e-3,
                                  tau=1e-8,
                                  cacheSize=600000000)

    train_result = train_algo.compute(train_indep_data, train_dep_data)

    # Create an algorithm object and call compute
    predict_algo = d4p.svm_prediction(fptype='float', kernel=kernel_function)
    predict_result = predict_algo.compute(test_indep_data, train_result.model)
    decision_result = predict_result.prediction
    predict_labels = np.where(decision_result >= 0, 1, -1)
    return predict_labels, decision_result
예제 #4
0
def main(readcsv=read_csv, method='defaultDense'):
    # input data file
    infile = "./data/batch/svm_two_class_train_dense.csv"
    testfile = "./data/batch/svm_two_class_test_dense.csv"

    # Configure a SVM object to use rbf kernel (and adjusting cachesize)
    kern = d4p.kernel_function_linear(
    )  # need an object that lives when creating train_algo
    train_algo = d4p.svm_training(doShrinking=True,
                                  kernel=kern,
                                  cacheSize=600000000)

    # Read data. Let's use features per observation
    data = readcsv(infile, range(20))
    labels = readcsv(infile, range(20, 21))
    train_result = train_algo.compute(data, labels)

    # Now let's do some prediction
    predict_algo = d4p.svm_prediction(kernel=kern)
    # read test data (with same #features)
    pdata = readcsv(testfile, range(20))
    plabels = readcsv(testfile, range(20, 21))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # Prediction result provides prediction
    assert (predict_result.prediction.shape == (pdata.shape[0], 1))

    return (predict_result, plabels)
예제 #5
0
def test_predict(X, training_result, params):

    fptype = getFPType(X)
    kf = kernel_function_linear(fptype=fptype)

    svm_predict = svm_prediction(fptype=fptype,
                                 method='defaultDense',
                                 kernel=kf)
    if params.n_classes == 2:
        prdct = svm_predict
    else:
        prdct = multi_class_classifier_prediction(nClasses=params.n_classes,
                                                  fptype=fptype,
                                                  maxIterations=params.maxiter,
                                                  accuracyThreshold=params.tol,
                                                  pmethod='voteBased',
                                                  tmethod='oneAgainstOne',
                                                  prediction=svm_predict)

    res = prdct.compute(X, training_result.model)

    if params.n_classes == 2:
        y_predict = np.greater(res.prediction.ravel(), 0)
    else:
        y_predict = res.prediction.ravel()

    return y_predict
예제 #6
0
def bench(meta_info,
          X_train,
          y_train,
          fit_samples,
          fit_repetitions,
          predict_samples,
          predict_repetitions,
          classes,
          cache_size,
          accuracy_threshold=1e-16,
          max_iterations=2000):

    kf = kernel_function_linear(fptype='double')

    if classes == 2:
        y_train[y_train == 0] = -1
    else:
        y_train[y_train == -1] = 0

    fit_times = []
    for it in range(fit_samples):
        start = time()
        for __ in range(fit_repetitions):
            svm_train = svm_training(fptype='double',
                                     C=0.01,
                                     maxIterations=max_iterations,
                                     tau=1e-12,
                                     cacheSize=cache_size,
                                     accuracyThreshold=accuracy_threshold,
                                     doShrinking=True,
                                     kernel=kf)

            if classes == 2:
                clf = svm_train
            else:
                clf = multi_class_classifier_training(
                    nClasses=classes,
                    fptype='double',
                    accuracyThreshold=accuracy_threshold,
                    method='oneAgainstOne',
                    maxIterations=max_iterations,
                    training=svm_train)

            training_result = clf.compute(X_train, y_train)

            support = construct_dual_coefs(training_result.model, classes,
                                           X_train, y_train)
            indices = y_train.take(support, axis=0)
            if classes == 2:
                n_support_ = np.array(
                    [np.sum(indices == -1),
                     np.sum(indices == 1)],
                    dtype=np.int32)
            else:
                n_support_ = np.array([
                    np.sum(indices == c)
                    for c in [-1] + list(range(1, classes))
                ],
                                      dtype=np.int32)
        stop = time()
        fit_times.append(stop - start)

    predict_times = []
    for it in range(predict_samples):
        svm_predict = svm_prediction(fptype='double',
                                     method='defaultDense',
                                     kernel=kf)
        if classes == 2:
            prdct = svm_predict
        else:
            prdct = multi_class_classifier_prediction(
                nClasses=classes,
                fptype='double',
                maxIterations=max_iterations,
                accuracyThreshold=accuracy_threshold,
                pmethod='voteBased',
                tmethod='oneAgainstOne',
                prediction=svm_predict)

        start = time()
        for __ in range(predict_repetitions):
            res = prdct.compute(X_train, training_result.model)
        stop = time()
        predict_times.append(stop - start)

    if classes == 2:
        y_predict = np.greater(res.prediction.ravel(), 0)
        y_train = np.greater(y_train, 0)
    else:
        y_predict = res.prediction.ravel()

    print("{meta_info},{fit_t:0.6g},{pred_t:0.6g},{acc:0.3f},{sv_len},{cl}".
          format(meta_info=meta_info,
                 fit_t=min(fit_times) / fit_repetitions,
                 pred_t=min(predict_times) / predict_repetitions,
                 acc=100 * accuracy_score(y_train.ravel(), y_predict),
                 sv_len=support.shape[0],
                 cl=n_support_.shape[0]))