예제 #1
0
                    default=100,
                    type=int,
                    help='Data multiplier')
parser.add_argument('--maxiter',
                    type=int,
                    default=100,
                    help='Maximum number of iterations')
params = parse_args(parser, loop_types=('fit', 'predict'), prefix='daal4py')

# Load generated data
X = np.load(params.filex)
X_init = np.load(params.filei)
X_mult = np.vstack((X, ) * params.data_multiplier)
tol = np.load(params.filet)

params.size = size_str(X.shape)
params.n_clusters = X_init.shape[0]
params.dtype = X.dtype


# Define functions to time
def test_fit(X, X_init):
    algorithm = kmeans(fptype=getFPType(X),
                       nClusters=params.n_clusters,
                       maxIterations=params.maxiter,
                       assignFlag=True,
                       accuracyThreshold=tol)
    return algorithm.compute(X, X_init)


def test_predict(X, X_init):
예제 #2
0
def main():
    parser = argparse.ArgumentParser(description='daal4py SVC benchmark with '
                                     'linear kernel')
    parser.add_argument('-x',
                        '--filex',
                        '--fileX',
                        type=argparse.FileType('r'),
                        required=True,
                        help='Input file with features, in NPY format')
    parser.add_argument('-y',
                        '--filey',
                        '--fileY',
                        type=argparse.FileType('r'),
                        required=True,
                        help='Input file with labels, in NPY format')
    parser.add_argument('-C',
                        dest='C',
                        type=float,
                        default=0.01,
                        help='SVM slack parameter')
    parser.add_argument('--kernel',
                        choices=('linear', ),
                        default='linear',
                        help='SVM kernel function')
    parser.add_argument('--maxiter',
                        type=int,
                        default=2000,
                        help='Maximum iterations for the iterative solver. '
                        '-1 means no limit.')
    parser.add_argument('--max-cache-size',
                        type=int,
                        default=64,
                        help='Maximum cache size, in gigabytes, for SVM.')
    parser.add_argument('--tau',
                        type=float,
                        default=1e-12,
                        help='Tau parameter for working set selection scheme')
    parser.add_argument('--tol', type=float, default=1e-16, help='Tolerance')
    parser.add_argument('--no-shrinking',
                        action='store_false',
                        default=True,
                        dest='shrinking',
                        help="Don't use shrinking heuristic")
    params = parse_args(parser,
                        loop_types=('fit', 'predict'),
                        prefix='daal4py')

    # Load data and cast to float64
    X_train = np.load(params.filex.name).astype('f8')
    y_train = np.load(params.filey.name).astype('f8')

    cache_size_bytes = get_optimal_cache_size(X_train.shape[0],
                                              max_cache=params.max_cache_size)
    params.cache_size_mb = cache_size_bytes / 2**20
    params.cache_size_bytes = cache_size_bytes
    params.n_classes = np.unique(y_train).size

    # This is necessary for daal
    y_train[y_train == 0] = -1
    y_train = y_train[:, np.newaxis]

    columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype',
               'size', 'kernel', 'cache_size_mb', 'C', 'sv_len', 'n_classes',
               'accuracy', 'time')
    params.size = size_str(X_train.shape)
    params.dtype = X_train.dtype

    print_header(columns, params)

    # Time fit and predict
    fit_time, res = time_mean_min(test_fit,
                                  X_train,
                                  y_train,
                                  params,
                                  outer_loops=params.fit_outer_loops,
                                  inner_loops=params.fit_inner_loops)
    res, support, indices, n_support = res
    params.sv_len = support.shape[0]
    print_row(columns, params, function='SVM.fit', time=fit_time)

    predict_time, yp = time_mean_min(test_predict,
                                     X_train,
                                     res,
                                     params,
                                     outer_loops=params.predict_outer_loops,
                                     inner_loops=params.predict_inner_loops)
    print_row(columns,
              params,
              function='SVM.predict',
              time=predict_time,
              accuracy=f'{100*accuracy_score(yp, y_train):.3}')