Example #1
0
def init_kernels():
    global sum_cols_kernel, iadd_kernel, multiply_kernel, m_dot_kernel, \
        mv_batched_kernel, initialized

    if initialized:
        warnings.warn("Kernels already initialized, skipping init")
        return

    from pycuda import autoinit, driver, compiler
    from skcuda import misc

    dev = autoinit.device
    print("GPU found, using %s %s" % (dev.name(), dev.compute_capability()))

    misc.init()

    DTYPES = ["double", "float"]

    def parse_kernels():
        with open(os.path.join(os.path.dirname(__file__), "kernels.cu")) as f:
            code = f.read()

        code = code.replace("%tile_len%", "32")

        funcs = code.split("__global__ void")
        new_funcs = []
        for f in funcs:
            if "%float_type%" in f:
                for t in DTYPES:
                    new_funcs += [f.replace("%float_type%", t)]
            else:
                new_funcs += [f]

        funcs = new_funcs
        new_funcs = []
        for f in funcs:
            if "%transpose_a%" in f:
                for t_a in ["0", "1"]:
                    new_funcs += [f.replace("%transpose_a%", t_a)]
            else:
                new_funcs += [f]

        funcs = new_funcs
        new_funcs = []
        for f in funcs:
            if "%transpose_b%" in f:
                for t_b in ["0", "1"]:
                    new_funcs += [f.replace("%transpose_b%", t_b)]
            else:
                new_funcs += [f]

        code = "__global__ void".join(new_funcs)

        return code

    try:
        kernels = compiler.SourceModule(parse_kernels())
    except driver.CompileError:
        with open("kernel_code.txt", "w") as f:
            for i, line in enumerate(parse_kernels().split("\n")):
                f.write("%03d %s\n" % (i, line))

        raise

    sum_cols_kernel = [
        kernels.get_function("sum_cols_%s" % dtype).prepare("PPiii")
        for dtype in DTYPES
    ]
    iadd_kernel = [
        kernels.get_function("iadd_%s" % dtype).prepare("PPii")
        for dtype in DTYPES
    ]
    multiply_kernel = [
        kernels.get_function("multiply_%s" % dtype).prepare("PPPii")
        for dtype in DTYPES
    ]
    m_dot_kernel = [[[
        kernels.get_function("shared_m_dot_%s_%s_%s" %
                             (dtype, a, b)).prepare("PPPiiii")
        for b in ["0", "1"]
    ] for a in ["0", "1"]] for dtype in DTYPES]
    mv_batched_kernel = [[
        kernels.get_function("mv_batched_%s_%s" % (dtype, a)).prepare("PPPiii")
        for a in ["0", "1"]
    ] for dtype in DTYPES]

    initialized = True
Example #2
0
## check gpu functionality! ##

global gpu_func
gpu_func = True

try:
    __import__("pycuda")
except ImportError:
    gpu_func = False
else:
    import pycuda.autoinit
    import pycuda.gpuarray as gpuarray
    import skcuda.fft as cu_fft
    from skcuda import misc
    misc.init()

    def to_gpu_c(somedata):
        # all my complex data
        return gpuarray.to_gpu(somedata.astype('complex64'))

    def to_gpu_f(somedata):
        # all my float data
        return gpuarray.to_gpu(somedata.astype('float32'))


## check gpu functionality! ##


class MFTIE():
    """
Example #3
0
 def setUp(self):
     np.random.seed(0)
     misc.init()
Example #4
0
assert excProfile.shape == refProfile.shape and excProfile.ndim == 1, "Slice profiles must be one-dimensional vectors and contain the same number of samples"


###########################################################
## Initialization
###########################################################    

if useGPU:
    import pycuda.driver as cuda
    import pycuda.autoinit
    import skcuda.linalg as sklinalg
    import skcuda.misc as skmisc
    from FatFractionLookup_GPU import FatFractionLookup_GPU as FatFractionLookup
    import findmax_ff
    
    skmisc.init()
    NTHREADS = 1
else:
    from FatFractionLookup import FatFractionLookup
    
[dicomStack, infos] = load3dDicom(baseDir)

etl = int(infos[0].EchoTrainLength)
echoSpacing = float(infos[0].EchoTime)

oldShape = dicomStack.shape
newShape = (oldShape[0], oldShape[1], etl, int(oldShape[2]/etl))

print(newShape)

nSlices = newShape[3] 
from sklearn.exceptions import ConvergenceWarning
from ..utils.extmath import safe_sparse_dot
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.multiclass import _check_partial_fit_first_call, unique_labels
from sklearn.utils.multiclass import type_of_target

from ..base import RegressorMixin

import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import pycuda.gpuarray as gpuarray

import skcuda.misc as cumisc
import skcuda.linalg as culinalg
cumisc.init()
culinalg.init()

_STOCHASTIC_SOLVERS = ['sgd', 'adam']


def _pack(coefs_, intercepts_):
    """Pack the parameters into a single vector."""
    return np.hstack([l.ravel() for l in coefs_ + intercepts_])


class BaseMultilayerPerceptron(six.with_metaclass(ABCMeta, BaseEstimator)):
    """Base class for MLP classification and regression.

    Warning: This class should not be used directly.
    Use derived classes instead.
Example #6
0
import copy
from functools import wraps
import warnings

import numpy as np
from pycuda import gpuarray
from skcuda import cublas, misc

import hessianfree as hf

misc.init()


def debug_wrapper(cpu_func, debug=False):
    """Decorator used to specify an equivalent CPU function that can be used
    to verify the output of a GPU function (for debugging)."""

    def debug_func_parametrized(gpu_func):
        @wraps(gpu_func)
        def debug_func(*args, **kwargs):
            if debug:
                cpu_args = list(args)
                for i, a in enumerate(cpu_args):
                    if isinstance(a, gpuarray.GPUArray):
                        cpu_args[i] = a.get()
                cpu_kwargs = copy.copy(kwargs)
                for k in cpu_kwargs:
                    if isinstance(cpu_kwargs[k], gpuarray.GPUArray):
                        cpu_kwargs[k] = cpu_kwargs[k].get()

                cpu_kwargs.pop("stream", None)
Example #7
0
 def setUp(self):
     np.random.seed(0)
     misc.init()
Example #8
0
 def setUpClass(cls):
     cls.ctx = make_default_context()
     misc.init()
Example #9
0
def decompose(d, beta, betaT, _gamma, verbose):
    print('start decomposing')
    sk_misc.init()
    D = gpuarray.to_gpu(d)
    Beta = gpuarray.to_gpu(beta)
    BetaT = gpuarray.to_gpu(betaT)
    l, m, n = D.shape
    step_size = np.maximum(m, n)
    lmn, k = Beta.shape

    kernel_code = kernel_code_template % {
        'MATRIX_A_COL_SIZE': lmn,
        'BLOCK_ROW_SIZE': 1,
        'BLOCK_COL_SIZE': 1,
        'STEP_SIZE': step_size,
    }

    mod = compiler.SourceModule(kernel_code)
    matrixmul = mod.get_function("MatrixMulKernel")
    grid = (1, k)
    block = (step_size, 1, 1)

    D_v = D.reshape(l * m * n, 1)
    _, k = Beta.shape

    tol = 0.2
    max_iter = 10000

    tau = 0.1
    sigma = 1 / (12 * tau)

    x_t = gpuarray.zeros((l, m, n), D.dtype)
    x_a = gpuarray.zeros((k, 1), D.dtype)
    y_t = gpuarray.zeros((3, l, m, n), D.dtype)

    EL, ET, E = computeEnergy(D_v, x_t, _gamma, x_a, Beta)
    print('Initial Energy: E = ' + str(E) + ', EL=' + str(EL) + ', ET= ' +
          str(ET))
    Es = E
    change = 10
    t0 = time.clock()
    for i in range(max_iter):
        ks_yt = -div(y_t)
        xt_new, xa_new = ProxG(x_t - tau * ks_yt, x_a, D_v, tau, Beta, BetaT,
                               matrixmul, grid, block)
        yt_new = ProxFSs(y_t + sigma * grad(2 * xt_new - x_t), _gamma)

        x_t = xt_new
        x_a = xa_new
        y_t = yt_new

        EL, ET, E = computeEnergy(D_v, x_t, _gamma, x_a, Beta)
        Es = np.append(Es, E)
        length = Es.shape[0]
        El5 = np.mean(Es[np.maximum(0, length - 6):length - 1])
        El5c = np.mean(Es[np.maximum(0, length - 5):length])
        change = np.append(change, El5c - El5)
        t1 = time.clock() - t0

        if np.mod(i + 1, 100) == 0:
            print('Iter ' + str(i + 1) + ': E = ' + str(E) + '; EL=' +
                  str(EL) + ', ET=' + str(ET) + ', avechg = ' +
                  str(change[length - 1]))
        if i >= 100 and np.max(np.abs(
                change[np.maximum(0, length - 3):length])) < tol:
            print('Converged after ' + str(i + 1) + ' iterations.')
            break
    T = x_t
    Alpha = x_a
    L = D - T
    l = L.get()
    t = T.get()
    alpha = Alpha.get()

    sk_misc.shutdown()
    return (l, t, alpha)
Example #10
0
def init_kernels():
    global sum_cols_kernel, iadd_kernel, multiply_kernel, m_dot_kernel, \
        mv_batched_kernel, initialized

    if initialized:
        warnings.warn("Kernels already initialized, skipping init")
        return

    from pycuda import autoinit, driver, compiler
    from skcuda import misc

    dev = autoinit.device
    print("GPU found, using %s %s" % (dev.name(), dev.compute_capability()))

    misc.init()

    DTYPES = ["double", "float"]

    def parse_kernels():
        with open(os.path.join(os.path.dirname(__file__), "kernels.cu")) as f:
            code = f.read()

        code = code.replace("%tile_len%", "32")

        funcs = code.split("__global__ void")
        new_funcs = []
        for f in funcs:
            if "%float_type%" in f:
                for t in DTYPES:
                    new_funcs += [f.replace("%float_type%", t)]
            else:
                new_funcs += [f]

        funcs = new_funcs
        new_funcs = []
        for f in funcs:
            if "%transpose_a%" in f:
                for t_a in ["0", "1"]:
                    new_funcs += [f.replace("%transpose_a%", t_a)]
            else:
                new_funcs += [f]

        funcs = new_funcs
        new_funcs = []
        for f in funcs:
            if "%transpose_b%" in f:
                for t_b in ["0", "1"]:
                    new_funcs += [f.replace("%transpose_b%", t_b)]
            else:
                new_funcs += [f]

        code = "__global__ void".join(new_funcs)

        return code

    try:
        kernels = compiler.SourceModule(parse_kernels())
    except driver.CompileError:
        with open("kernel_code.txt", "w") as f:
            for i, line in enumerate(parse_kernels().split("\n")):
                f.write("%03d %s\n" % (i, line))

        raise

    sum_cols_kernel = [kernels.get_function("sum_cols_%s" %
                                            dtype).prepare("PPiii")
                       for dtype in DTYPES]
    iadd_kernel = [kernels.get_function("iadd_%s" % dtype).prepare("PPii")
                   for dtype in DTYPES]
    multiply_kernel = [kernels.get_function("multiply_%s" %
                                            dtype).prepare("PPPii")
                       for dtype in DTYPES]
    m_dot_kernel = [[[kernels.get_function("shared_m_dot_%s_%s_%s" %
                                           (dtype, a, b)).prepare("PPPiiii")
                     for b in ["0", "1"]] for a in ["0", "1"]]
                    for dtype in DTYPES]
    mv_batched_kernel = [[kernels.get_function("mv_batched_%s_%s" %
                                               (dtype, a)).prepare("PPPiii")
                          for a in ["0", "1"]] for dtype in DTYPES]

    initialized = True
Example #11
0
def decompose(d, beta, betaT, _lambda, _gamma, _lambda_c, _gamma_c, verbose):
    print 'start decomposing in GPU'
    sk_misc.init()
    D = gpuarray.to_gpu(d)
    Beta = gpuarray.to_gpu(beta)
    BetaT = gpuarray.to_gpu(betaT)
    l, m, n = D.shape
    step_size = np.maximum(m, n)
    lmn, k = Beta.shape
    _Lambda = gpuarray.to_gpu(_lambda)
    _Gamma = gpuarray.to_gpu(_gamma)

    kernel_code = kernel_code_template % {
        'MATRIX_A_COL_SIZE': lmn,
        'BLOCK_ROW_SIZE': 1,
        'BLOCK_COL_SIZE': 1,
        'STEP_SIZE': step_size,
    }

    mod = compiler.SourceModule(kernel_code)
    matrixmul = mod.get_function("MatrixMulKernel")
    grid = (1, k)
    block = (step_size, 1, 1)

    D_v = D.reshape(lmn, 1)

    tol = 0.1
    max_iter = 10000

    tau = 0.1
    sigma = 1 / (13 * tau)
    x_s = gpuarray.zeros((l, m, n), D.dtype)
    x_t = gpuarray.zeros((l, m, n), D.dtype)
    x_a = gpuarray.zeros((k, 1), D.dtype)
    y_t = gpuarray.zeros((3, l, m, n), D.dtype)
    y_s = x_s

    xs_new = gpuarray.zeros_like(x_s)
    xt_new = gpuarray.zeros_like(x_t)
    xa_new = gpuarray.zeros_like(x_a)
    ys_new = gpuarray.zeros_like(y_s)
    yt_new = gpuarray.zeros_like(y_t)
    assign_matrix(x_s, _Lambda, D)

    EL, ES, ET, Es = computeEnergy(D_v, x_s, x_t, _Lambda, _gamma_c, x_a, Beta)
    print 'Initial Energy: E = ' + str(Es) + ', EL=' + str(EL) + ', ES=' + str(
        ES) + ', ET=' + str(ET)
    change = 10
    t0 = time.time()
    print_iters = 200
    if verbose == True:
        print_iters = 50
    for i in range(max_iter):
        ks_yt = -div(y_t)
        ks_ys = y_s

        xs_new, xt_new, xa_new = ProxG(x_s - tau * ks_ys, x_t - tau * ks_yt,
                                       x_a, D_v, tau, Beta, BetaT, matrixmul,
                                       grid, block)
        assign_matrix(xs_new, _Lambda, D)
        assign_matrix(xt_new, _Gamma, D)

        ys_new, yt_new = ProxFSs(y_s + sigma * (2 * xs_new - x_s),
                                 y_t + sigma * grad(2 * xt_new - x_t), _Lambda,
                                 _gamma_c)

        x_s = xs_new
        x_t = xt_new
        x_a = xa_new
        y_s = ys_new
        y_t = yt_new

        EL, ES, ET, E = computeEnergy(D_v, x_s, x_t, _Lambda, _gamma_c, x_a,
                                      Beta)
        Es = np.append(Es, E)
        length = Es.shape[0]
        El5 = np.mean(Es[np.maximum(0, length - 6):length - 1])
        El5c = np.mean(Es[np.maximum(0, length - 5):length])
        change = np.append(change, El5c - El5)
        t1 = time.time() - t0

        if np.mod(i + 1, print_iters) == 0:
            print 'Iter ' + str(i + 1) + ': E = ' + str(E) + '; EL=' + str(
                EL) + ', ES=' + str(ES) + ', ET=' + str(
                    ET) + ', aechg = ' + str(change[length - 1])
        if i >= 100 and np.max(np.abs(
                change[np.maximum(0, length - 3):length])) < tol:
            print 'Iter ' + str(i + 1) + ': E = ' + str(E) + '; EL=' + str(
                EL) + ', ES=' + str(ES) + ', ET=' + str(
                    ET) + ', aechg = ' + str(change[length - 1])
            print 'Converged after ' + str(i + 1) + ' iterations.'
            break

    S = x_s
    T = x_t
    Alpha = x_a
    L = D - S - T
    l = L.get()
    s = S.get()
    t = T.get()
    alpha = Alpha.get()

    sk_misc.shutdown()
    return (l, s, t, alpha)
Example #12
0
 def setUpClass(cls):
     misc.init()
Example #13
0
 def setUpClass(cls):
     cls.ctx = make_default_context()
     misc.init()