Beispiel #1
0
def init_kernels():
    global sum_cols_kernel, iadd_kernel, multiply_kernel, m_dot_kernel, \
        mv_batched_kernel, initialized

    if initialized:
        warnings.warn("Kernels already initialized, skipping init")
        return

    from pycuda import autoinit, driver, compiler
    from skcuda import misc

    dev = autoinit.device
    print("GPU found, using %s %s" % (dev.name(), dev.compute_capability()))

    misc.init()

    DTYPES = ["double", "float"]

    def parse_kernels():
        with open(os.path.join(os.path.dirname(__file__), "kernels.cu")) as f:
            code = f.read()

        code = code.replace("%tile_len%", "32")

        funcs = code.split("__global__ void")
        new_funcs = []
        for f in funcs:
            if "%float_type%" in f:
                for t in DTYPES:
                    new_funcs += [f.replace("%float_type%", t)]
            else:
                new_funcs += [f]

        funcs = new_funcs
        new_funcs = []
        for f in funcs:
            if "%transpose_a%" in f:
                for t_a in ["0", "1"]:
                    new_funcs += [f.replace("%transpose_a%", t_a)]
            else:
                new_funcs += [f]

        funcs = new_funcs
        new_funcs = []
        for f in funcs:
            if "%transpose_b%" in f:
                for t_b in ["0", "1"]:
                    new_funcs += [f.replace("%transpose_b%", t_b)]
            else:
                new_funcs += [f]

        code = "__global__ void".join(new_funcs)

        return code

    try:
        kernels = compiler.SourceModule(parse_kernels())
    except driver.CompileError:
        with open("kernel_code.txt", "w") as f:
            for i, line in enumerate(parse_kernels().split("\n")):
                f.write("%03d %s\n" % (i, line))

        raise

    sum_cols_kernel = [
        kernels.get_function("sum_cols_%s" % dtype).prepare("PPiii")
        for dtype in DTYPES
    ]
    iadd_kernel = [
        kernels.get_function("iadd_%s" % dtype).prepare("PPii")
        for dtype in DTYPES
    ]
    multiply_kernel = [
        kernels.get_function("multiply_%s" % dtype).prepare("PPPii")
        for dtype in DTYPES
    ]
    m_dot_kernel = [[[
        kernels.get_function("shared_m_dot_%s_%s_%s" %
                             (dtype, a, b)).prepare("PPPiiii")
        for b in ["0", "1"]
    ] for a in ["0", "1"]] for dtype in DTYPES]
    mv_batched_kernel = [[
        kernels.get_function("mv_batched_%s_%s" % (dtype, a)).prepare("PPPiii")
        for a in ["0", "1"]
    ] for dtype in DTYPES]

    initialized = True
Beispiel #2
0
## check gpu functionality! ##

global gpu_func
gpu_func = True

try:
    __import__("pycuda")
except ImportError:
    gpu_func = False
else:
    import pycuda.autoinit
    import pycuda.gpuarray as gpuarray
    import skcuda.fft as cu_fft
    from skcuda import misc
    misc.init()

    def to_gpu_c(somedata):
        # all my complex data
        return gpuarray.to_gpu(somedata.astype('complex64'))

    def to_gpu_f(somedata):
        # all my float data
        return gpuarray.to_gpu(somedata.astype('float32'))


## check gpu functionality! ##


class MFTIE():
    """
Beispiel #3
0
 def setUp(self):
     np.random.seed(0)
     misc.init()
Beispiel #4
0
assert excProfile.shape == refProfile.shape and excProfile.ndim == 1, "Slice profiles must be one-dimensional vectors and contain the same number of samples"


###########################################################
## Initialization
###########################################################    

if useGPU:
    import pycuda.driver as cuda
    import pycuda.autoinit
    import skcuda.linalg as sklinalg
    import skcuda.misc as skmisc
    from FatFractionLookup_GPU import FatFractionLookup_GPU as FatFractionLookup
    import findmax_ff
    
    skmisc.init()
    NTHREADS = 1
else:
    from FatFractionLookup import FatFractionLookup
    
[dicomStack, infos] = load3dDicom(baseDir)

etl = int(infos[0].EchoTrainLength)
echoSpacing = float(infos[0].EchoTime)

oldShape = dicomStack.shape
newShape = (oldShape[0], oldShape[1], etl, int(oldShape[2]/etl))

print(newShape)

nSlices = newShape[3] 
from sklearn.exceptions import ConvergenceWarning
from ..utils.extmath import safe_sparse_dot
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.multiclass import _check_partial_fit_first_call, unique_labels
from sklearn.utils.multiclass import type_of_target

from ..base import RegressorMixin

import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import pycuda.gpuarray as gpuarray

import skcuda.misc as cumisc
import skcuda.linalg as culinalg
cumisc.init()
culinalg.init()

_STOCHASTIC_SOLVERS = ['sgd', 'adam']


def _pack(coefs_, intercepts_):
    """Pack the parameters into a single vector."""
    return np.hstack([l.ravel() for l in coefs_ + intercepts_])


class BaseMultilayerPerceptron(six.with_metaclass(ABCMeta, BaseEstimator)):
    """Base class for MLP classification and regression.

    Warning: This class should not be used directly.
    Use derived classes instead.
import copy
from functools import wraps
import warnings

import numpy as np
from pycuda import gpuarray
from skcuda import cublas, misc

import hessianfree as hf

misc.init()


def debug_wrapper(cpu_func, debug=False):
    """Decorator used to specify an equivalent CPU function that can be used
    to verify the output of a GPU function (for debugging)."""

    def debug_func_parametrized(gpu_func):
        @wraps(gpu_func)
        def debug_func(*args, **kwargs):
            if debug:
                cpu_args = list(args)
                for i, a in enumerate(cpu_args):
                    if isinstance(a, gpuarray.GPUArray):
                        cpu_args[i] = a.get()
                cpu_kwargs = copy.copy(kwargs)
                for k in cpu_kwargs:
                    if isinstance(cpu_kwargs[k], gpuarray.GPUArray):
                        cpu_kwargs[k] = cpu_kwargs[k].get()

                cpu_kwargs.pop("stream", None)
Beispiel #7
0
 def setUp(self):
     np.random.seed(0)
     misc.init()
Beispiel #8
0
 def setUpClass(cls):
     cls.ctx = make_default_context()
     misc.init()
Beispiel #9
0
def decompose(d, beta, betaT, _gamma, verbose):
    print('start decomposing')
    sk_misc.init()
    D = gpuarray.to_gpu(d)
    Beta = gpuarray.to_gpu(beta)
    BetaT = gpuarray.to_gpu(betaT)
    l, m, n = D.shape
    step_size = np.maximum(m, n)
    lmn, k = Beta.shape

    kernel_code = kernel_code_template % {
        'MATRIX_A_COL_SIZE': lmn,
        'BLOCK_ROW_SIZE': 1,
        'BLOCK_COL_SIZE': 1,
        'STEP_SIZE': step_size,
    }

    mod = compiler.SourceModule(kernel_code)
    matrixmul = mod.get_function("MatrixMulKernel")
    grid = (1, k)
    block = (step_size, 1, 1)

    D_v = D.reshape(l * m * n, 1)
    _, k = Beta.shape

    tol = 0.2
    max_iter = 10000

    tau = 0.1
    sigma = 1 / (12 * tau)

    x_t = gpuarray.zeros((l, m, n), D.dtype)
    x_a = gpuarray.zeros((k, 1), D.dtype)
    y_t = gpuarray.zeros((3, l, m, n), D.dtype)

    EL, ET, E = computeEnergy(D_v, x_t, _gamma, x_a, Beta)
    print('Initial Energy: E = ' + str(E) + ', EL=' + str(EL) + ', ET= ' +
          str(ET))
    Es = E
    change = 10
    t0 = time.clock()
    for i in range(max_iter):
        ks_yt = -div(y_t)
        xt_new, xa_new = ProxG(x_t - tau * ks_yt, x_a, D_v, tau, Beta, BetaT,
                               matrixmul, grid, block)
        yt_new = ProxFSs(y_t + sigma * grad(2 * xt_new - x_t), _gamma)

        x_t = xt_new
        x_a = xa_new
        y_t = yt_new

        EL, ET, E = computeEnergy(D_v, x_t, _gamma, x_a, Beta)
        Es = np.append(Es, E)
        length = Es.shape[0]
        El5 = np.mean(Es[np.maximum(0, length - 6):length - 1])
        El5c = np.mean(Es[np.maximum(0, length - 5):length])
        change = np.append(change, El5c - El5)
        t1 = time.clock() - t0

        if np.mod(i + 1, 100) == 0:
            print('Iter ' + str(i + 1) + ': E = ' + str(E) + '; EL=' +
                  str(EL) + ', ET=' + str(ET) + ', avechg = ' +
                  str(change[length - 1]))
        if i >= 100 and np.max(np.abs(
                change[np.maximum(0, length - 3):length])) < tol:
            print('Converged after ' + str(i + 1) + ' iterations.')
            break
    T = x_t
    Alpha = x_a
    L = D - T
    l = L.get()
    t = T.get()
    alpha = Alpha.get()

    sk_misc.shutdown()
    return (l, t, alpha)
Beispiel #10
0
def init_kernels():
    global sum_cols_kernel, iadd_kernel, multiply_kernel, m_dot_kernel, \
        mv_batched_kernel, initialized

    if initialized:
        warnings.warn("Kernels already initialized, skipping init")
        return

    from pycuda import autoinit, driver, compiler
    from skcuda import misc

    dev = autoinit.device
    print("GPU found, using %s %s" % (dev.name(), dev.compute_capability()))

    misc.init()

    DTYPES = ["double", "float"]

    def parse_kernels():
        with open(os.path.join(os.path.dirname(__file__), "kernels.cu")) as f:
            code = f.read()

        code = code.replace("%tile_len%", "32")

        funcs = code.split("__global__ void")
        new_funcs = []
        for f in funcs:
            if "%float_type%" in f:
                for t in DTYPES:
                    new_funcs += [f.replace("%float_type%", t)]
            else:
                new_funcs += [f]

        funcs = new_funcs
        new_funcs = []
        for f in funcs:
            if "%transpose_a%" in f:
                for t_a in ["0", "1"]:
                    new_funcs += [f.replace("%transpose_a%", t_a)]
            else:
                new_funcs += [f]

        funcs = new_funcs
        new_funcs = []
        for f in funcs:
            if "%transpose_b%" in f:
                for t_b in ["0", "1"]:
                    new_funcs += [f.replace("%transpose_b%", t_b)]
            else:
                new_funcs += [f]

        code = "__global__ void".join(new_funcs)

        return code

    try:
        kernels = compiler.SourceModule(parse_kernels())
    except driver.CompileError:
        with open("kernel_code.txt", "w") as f:
            for i, line in enumerate(parse_kernels().split("\n")):
                f.write("%03d %s\n" % (i, line))

        raise

    sum_cols_kernel = [kernels.get_function("sum_cols_%s" %
                                            dtype).prepare("PPiii")
                       for dtype in DTYPES]
    iadd_kernel = [kernels.get_function("iadd_%s" % dtype).prepare("PPii")
                   for dtype in DTYPES]
    multiply_kernel = [kernels.get_function("multiply_%s" %
                                            dtype).prepare("PPPii")
                       for dtype in DTYPES]
    m_dot_kernel = [[[kernels.get_function("shared_m_dot_%s_%s_%s" %
                                           (dtype, a, b)).prepare("PPPiiii")
                     for b in ["0", "1"]] for a in ["0", "1"]]
                    for dtype in DTYPES]
    mv_batched_kernel = [[kernels.get_function("mv_batched_%s_%s" %
                                               (dtype, a)).prepare("PPPiii")
                          for a in ["0", "1"]] for dtype in DTYPES]

    initialized = True
Beispiel #11
0
def decompose(d, beta, betaT, _lambda, _gamma, _lambda_c, _gamma_c, verbose):
    print 'start decomposing in GPU'
    sk_misc.init()
    D = gpuarray.to_gpu(d)
    Beta = gpuarray.to_gpu(beta)
    BetaT = gpuarray.to_gpu(betaT)
    l, m, n = D.shape
    step_size = np.maximum(m, n)
    lmn, k = Beta.shape
    _Lambda = gpuarray.to_gpu(_lambda)
    _Gamma = gpuarray.to_gpu(_gamma)

    kernel_code = kernel_code_template % {
        'MATRIX_A_COL_SIZE': lmn,
        'BLOCK_ROW_SIZE': 1,
        'BLOCK_COL_SIZE': 1,
        'STEP_SIZE': step_size,
    }

    mod = compiler.SourceModule(kernel_code)
    matrixmul = mod.get_function("MatrixMulKernel")
    grid = (1, k)
    block = (step_size, 1, 1)

    D_v = D.reshape(lmn, 1)

    tol = 0.1
    max_iter = 10000

    tau = 0.1
    sigma = 1 / (13 * tau)
    x_s = gpuarray.zeros((l, m, n), D.dtype)
    x_t = gpuarray.zeros((l, m, n), D.dtype)
    x_a = gpuarray.zeros((k, 1), D.dtype)
    y_t = gpuarray.zeros((3, l, m, n), D.dtype)
    y_s = x_s

    xs_new = gpuarray.zeros_like(x_s)
    xt_new = gpuarray.zeros_like(x_t)
    xa_new = gpuarray.zeros_like(x_a)
    ys_new = gpuarray.zeros_like(y_s)
    yt_new = gpuarray.zeros_like(y_t)
    assign_matrix(x_s, _Lambda, D)

    EL, ES, ET, Es = computeEnergy(D_v, x_s, x_t, _Lambda, _gamma_c, x_a, Beta)
    print 'Initial Energy: E = ' + str(Es) + ', EL=' + str(EL) + ', ES=' + str(
        ES) + ', ET=' + str(ET)
    change = 10
    t0 = time.time()
    print_iters = 200
    if verbose == True:
        print_iters = 50
    for i in range(max_iter):
        ks_yt = -div(y_t)
        ks_ys = y_s

        xs_new, xt_new, xa_new = ProxG(x_s - tau * ks_ys, x_t - tau * ks_yt,
                                       x_a, D_v, tau, Beta, BetaT, matrixmul,
                                       grid, block)
        assign_matrix(xs_new, _Lambda, D)
        assign_matrix(xt_new, _Gamma, D)

        ys_new, yt_new = ProxFSs(y_s + sigma * (2 * xs_new - x_s),
                                 y_t + sigma * grad(2 * xt_new - x_t), _Lambda,
                                 _gamma_c)

        x_s = xs_new
        x_t = xt_new
        x_a = xa_new
        y_s = ys_new
        y_t = yt_new

        EL, ES, ET, E = computeEnergy(D_v, x_s, x_t, _Lambda, _gamma_c, x_a,
                                      Beta)
        Es = np.append(Es, E)
        length = Es.shape[0]
        El5 = np.mean(Es[np.maximum(0, length - 6):length - 1])
        El5c = np.mean(Es[np.maximum(0, length - 5):length])
        change = np.append(change, El5c - El5)
        t1 = time.time() - t0

        if np.mod(i + 1, print_iters) == 0:
            print 'Iter ' + str(i + 1) + ': E = ' + str(E) + '; EL=' + str(
                EL) + ', ES=' + str(ES) + ', ET=' + str(
                    ET) + ', aechg = ' + str(change[length - 1])
        if i >= 100 and np.max(np.abs(
                change[np.maximum(0, length - 3):length])) < tol:
            print 'Iter ' + str(i + 1) + ': E = ' + str(E) + '; EL=' + str(
                EL) + ', ES=' + str(ES) + ', ET=' + str(
                    ET) + ', aechg = ' + str(change[length - 1])
            print 'Converged after ' + str(i + 1) + ' iterations.'
            break

    S = x_s
    T = x_t
    Alpha = x_a
    L = D - S - T
    l = L.get()
    s = S.get()
    t = T.get()
    alpha = Alpha.get()

    sk_misc.shutdown()
    return (l, s, t, alpha)
Beispiel #12
0
 def setUpClass(cls):
     misc.init()
Beispiel #13
0
 def setUpClass(cls):
     cls.ctx = make_default_context()
     misc.init()