Beispiel #1
0
    def prepare_gpu(self, type_t="double"):
        from pycuda import gpuarray

        nfuns, npoints = self.I.shape
        nregions, nsubpoints = self.J.shape
        ndim = self.v.shape[1]

        if type_t == "double":
            v, b = np.float64(self.v), np.float64(self.b)
        else:
            v, b = np.float32(self.v), np.float32(self.b)

        constvars = {
            'EPIGRAPH_PROJ': 1,
            'ndim': ndim,
            'nfuns': nfuns,
            'nregions': nregions,
            'npoints': npoints,
            'nsubpoints': nsubpoints,
            'I': self.I,
            'J': self.J,
            'A_STORE': v,
            'B_STORE': b,
            'term_maxiter': ndim * nsubpoints,
            'term_tolerance': 1e-9,
            'TYPE_T': type_t,
        }
        for f in ['fabs']:
            constvars[f.upper()] = f if type_t == "double" else (f + "f")
        files = [resource_stream('opymize.operators', 'proj.cu')]
        templates = [("epigraphproj", "P", (nfuns, nregions, 1), (24, 12, 1))]
        self._kernel = prepare_kernels(files, templates,
                                       constvars)['epigraphproj']
Beispiel #2
0
def indexedmult_prepare_gpu(B, P, x, type_t="double"):
    J = B.shape[0]
    K = x[0]['shape'][1]
    L = B.shape[2]
    M = B.shape[1]
    N = x[0]['shape'][0]
    constvars = {
        'J': J, 'K': K, 'L': L, 'M': M, 'N': N,
        'P': P, 'B': B, 'TYPE_T': type_t
    }
    files = [resource_stream('opymize.linear', 'indexed.cu')]
    templates = [
        ("indexedmult", "PP", (N, J, M), (32, 24, 1)),
        ("indexedmultadj", "PP", (N, 1, 1), (512, 1, 1))
    ]
    return prepare_kernels(files, templates, constvars)
Beispiel #3
0
 def prepare_gpu(self, type_t="double"):
     constvars = {
         'L1_NORMS_PROJ': 1,
         'lbd': self.lbd,
         'N': self.N,
         'M1': self.M[0],
         'M2': self.M[1],
         'matrixnorm': self.matrixnorm[0].upper(),
         'TYPE_T': type_t,
     }
     for f in ['fmin', 'fmax', 'sqrt', 'hypot']:
         constvars[f.upper()] = f if type_t == "double" else (f + "f")
     files = [resource_stream('opymize.operators', 'proj.cu')]
     templates = [("l1normsproj", "P", (self.N, 1, 1), (200, 1, 1))]
     self._kernel = prepare_kernels(files, templates,
                                    constvars)['l1normsproj']
Beispiel #4
0
 def prepare_gpu(self, type_t="double"):
     if self._kernel is not None: return
     N, K = self.x[0]['shape']
     _, J = self.y[0]['shape']
     constvars = {
         'A': self.A,
         'J': J,
         'K': K,
         'N': N,
         'trans': 't' if self.trans else 'n',
         'MATRIX_MULT_R': 1,
         'TYPE_T': type_t
     }
     files = [resource_stream('opymize.linear', 'einsum.cu')]
     templates = [("matrixmultr", "PP", (N, J, 1), (32, 24, 1))]
     self._kernel = prepare_kernels(files, templates,
                                    constvars)['matrixmultr']
     self.adjoint.prepare_gpu(type_t=type_t)
Beispiel #5
0
 def prepare_gpu(self, type_t="double"):
     if self._kernel is not None: return
     J, L, K = self.y[0]['shape']
     M = self.x[0]['shape'][2]
     constvars = {
         'A': self.A,
         'J': J,
         'K': K,
         'L': L,
         'M': M,
         'trans': 't' if self.trans else 'n',
         'MATRIX_MULT_R_BATCHED': 1,
         'TYPE_T': type_t
     }
     files = [resource_stream('opymize.linear', 'einsum.cu')]
     templates = [("matrixmultrbatched", "PP", (J, L, K), (8, 16, 4))]
     self._kernel = prepare_kernels(files, templates,
                                    constvars)['matrixmultrbatched']
     self.adjoint.prepare_gpu(type_t=type_t)
Beispiel #6
0
 def prepare_gpu(self, type_t="double"):
     constvars = {
         'QUAD_EPI_PROJ': 1,
         'alph': np.float64(self.alph),
         'N': self.N,
         'M': self.M,
         'TYPE_T': type_t,
     }
     if self.lbd < np.inf:
         constvars['lbd'] = np.float64(self.lbd)
         constvars['USE_LBD'] = 1
     if self.shift is not None:
         constvars['shift'] = self.shift
         constvars['USE_SHIFT'] = 1
     for f in ['sqrt', 'cbrt', 'acos', 'cos', 'fabs']:
         constvars[f.upper()] = f if type_t == "double" else (f + "f")
     files = [resource_stream('opymize.operators', 'proj.cu')]
     templates = [("quadepiproj", "P", (self.N, 1, 1), (200, 1, 1))]
     self._kernel = prepare_kernels(files, templates,
                                    constvars)['quadepiproj']
Beispiel #7
0
 def prepare_gpu(self, type_t="double"):
     if self._kernel is not None: return
     npoints = np.prod(self.imagedims)
     ndims = len(self.imagedims)
     skips = imagedim_skips(self.imagedims)
     constvars = {
         'LAPLACIAN': 1,
         'ADJOINT': 1 if self.bc[-4:] == "_adj" else 0,
         'boundary_conditions': self.bc[0].upper(),
         'N': npoints, 'D': ndims, 'C': self.nchannels,
         'skips': np.array(skips, dtype=np.int64, order='C'),
         'imagedims': np.array(self.imagedims, dtype=np.int64, order='C'),
         'imageh': self.imageh,
         'TYPE_T': type_t
     }
     files = [resource_stream('opymize.linear', 'diff.cu')]
     templates = [
         ("laplacian", "PP", (npoints, self.nchannels, 1), (32, 24, 1)),
     ]
     self._kernel = prepare_kernels(files, templates, constvars)['laplacian']
     self.adjoint.prepare_gpu(type_t=type_t)
Beispiel #8
0
def diff_prepare_gpu(scheme, imagedims, imageh, nchannels, weights, type_t):
    npoints = np.prod(imagedims)
    ndims = len(imagedims)
    skips = imagedim_skips(imagedims)
    constvars = {
        'SCHEME_%s'%scheme.upper(): 1,
        'GRAD_DIV': 1,
        'N': npoints, 'D': ndims, 'C': nchannels, 'dc_skip': ndims*nchannels,
        'skips': np.array(skips, dtype=np.int64, order='C'),
        'imagedims': np.array(imagedims, dtype=np.int64, order='C'),
        'avgskips': staggered_diff_avgskips(imagedims),
        'navgskips': 1 << (ndims - 1),
        'weights': weights,
        'imageh': imageh,
        'TYPE_T': type_t
    }
    files = [resource_stream('opymize.linear', 'diff.cu')]
    templates = [
        ("gradient", "PP", (npoints, nchannels, ndims), (24, 16, 2)),
        ("divergence", "PP", (npoints, nchannels, 1), (32, 24, 1)),
    ]
    return prepare_kernels(files, templates, constvars)