def __init__(self, context, devices, dImg): self.context = context self.queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE) self.dim = dImg.dim options = [ '-D IMAGEW='+str(self.dim[0]), '-D IMAGEH='+str(self.dim[1]), ] self.dImg = dImg self.dFg = Buffer2D(context, cl.mem_flags.READ_WRITE, self.dim, np.uint32) self.dBg = Buffer2D(context, cl.mem_flags.READ_WRITE, self.dim, np.uint32) self.dLcv = Buffer2D(context, cl.mem_flags.READ_WRITE, self.dim, np.float32) self.dAlpha = Buffer2D(context, cl.mem_flags.READ_WRITE, self.dim, np.float32) filename = os.path.join(os.path.dirname(__file__), 'sharedmatting.cl') program = createProgram(context, devices, options, filename) self.kernGather = cl.Kernel(program, 'gather') self.kernLcv = cl.Kernel(program, 'local_color_variation') self.kernRefine = cl.Kernel(program, 'refine') self.kernProcessTrimap = cl.Kernel(program, 'process_trimap') self.trimapFilter = cl.Kernel(program, 'trimap_filter') gWorksize = roundUp(self.dim, SharedMatting.lw) args = [ self.dImg, self.dLcv, self.dAlpha ] self.kernLcv(self.queue, gWorksize, SharedMatting.lw, *args)
def execute(self, queue, input): output = Image2D(self.context, cl.mem_flags.READ_WRITE, cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNORM_INT8), input.dim) gw = roundUp(input.dim, LWORKGROUP) args = [ np.array(self.range, np.float32), np.array(self.hues, np.float32), np.array(self.vals, np.float32), np.array(self.sats, np.float32), input, output, np.array(input.dim, np.int32) ] if type(input) == Buffer2D: if input.dtype == np.uint8: self.kern_ui8(queue, gw, LWORKGROUP, *args).wait() elif input.dtype == np.uint32: self.kern_ui(queue, gw, LWORKGROUP, *args).wait() elif input.dtype == np.int32: self.kern_i(queue, gw, LWORKGROUP, *args).wait() elif input.dtype == np.float32: self.kern_f(queue, gw, LWORKGROUP, *args).wait() else: raise NotImplementedError return output
def execute(self, queue, args): buf = args[-1] args.append(np.array(buf.dim, np.int32)) gw = roundUp(buf.dim, LWORKGROUP) self.trimapFilter(queue, gw, LWORKGROUP, *args)
def __init__(self, context, devices, capacity): self.context = context if PROFILE_GPU == True: self.queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE) else: self.queue = cl.CommandQueue(context) filename = os.path.join(os.path.dirname(__file__), "prefixsum.cl") program = createProgram(context, devices, [], filename) self.kernScan_pad_to_pow2 = cl.Kernel(program, "scan_pad_to_pow2") self.kernScan_subarrays = cl.Kernel(program, "scan_subarrays") self.kernScan_inc_subarrays = cl.Kernel(program, "scan_inc_subarrays") self.lw = (LEN_WORKGROUP,) self.capacity = roundUp(capacity, ELEMENTS_PER_WORKGROUP) self.d_parts = [] len = self.capacity / ELEMENTS_PER_WORKGROUP while len > 0: self.d_parts.append(cl.Buffer(context, cl.mem_flags.READ_WRITE, szInt * len)) len = len / ELEMENTS_PER_WORKGROUP self.elapsed = 0
def label(self, d_points, n_points, label): gWorksize = roundUp((n_points, ), LWORKGROUP_1D) args = [ self.dLabelsIn, self.dStrengthIn, d_points, np.uint8(label), np.int32(n_points), self.tilelist.d_tiles, np.int32(self.tilelist.iteration) ] self.kernLabel(self.queue, gWorksize, LWORKGROUP_1D, *args).wait()
def processTrimap(self, dTriOut, dTriIn, dStrength, threshold): gWorksize = roundUp((self.dim[0], self.dim[1]), SharedMatting.lw) args = [ dTriOut, dTriIn, dStrength, np.float32(threshold), cl.LocalMemory(szChar*(TILEW+2*DILATE)*(TILEH+2*DILATE)) ] self.kernProcessTrimap(self.queue, gWorksize, SharedMatting.lw, *args) self.queue.finish()
def calcMatte(self, dTri): gWorksize = roundUp(self.dim, SharedMatting.lw) args = [ self.dImg, dTri, self.dFg, self.dBg, ] self.kernGather(self.queue, gWorksize, SharedMatting.lw, *args) args = [ self.dImg, dTri, self.dFg, self.dBg, self.dAlpha, self.dLcv, ] self.kernRefine(self.queue, gWorksize, SharedMatting.lw, *args) self.queue.finish()
cm = cl.mem_flags img = Image.open("/Users/marcdeklerk/msc/code/dataset/processed/source/800x600/GT04.png") if img.mode != 'RGBA': img = img.convert('RGBA') app = QtGui.QApplication(sys.argv) canvas = CLCanvas(img.size) window = CLWindow(canvas) clContext = canvas.context devices = clContext.get_info(cl.context_info.DEVICES) queue = cl.CommandQueue(clContext, properties=cl.command_queue_properties.PROFILING_ENABLE) shapeNP = (img.size[1], img.size[0]) shapeNP = roundUp(shapeNP, QuickBrush.lWorksize) shapeCL = (shapeNP[1], shapeNP[0]) hImg = padArray2D(np.array(img).view(np.uint32).squeeze(), shapeNP, 'edge') dImg = Image2D(clContext, cl.mem_flags.READ_ONLY, cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNSIGNED_INT8), shapeCL ) cl.enqueue_copy(queue, dImg, hImg, origin=(0, 0), region=shapeCL).wait() dBuf = Buffer2D(clContext, cm.READ_ONLY | cm.COPY_HOST_PTR, hostbuf=hImg) dStrokes = Buffer2D(clContext, cm.READ_WRITE, shapeCL, dtype=np.uint8) brush = QuickBrush(clContext, devices, dImg, dStrokes)
def draw(self, p0, p1): Brush.draw(self, p0, p1) #self.probFg(x1-20, x1+20, y1-20, y1+20) #return """color = self.colorTri[self.type] #self.argsScore[5] = np.int32(self.nComponentsFg) #seed = [] hasSeeds = False redoBg = False minX = sys.maxint maxX = -sys.maxint minY = sys.maxint maxY = -sys.maxint for point in self.points[0:nPoints]: #if self.hTriFlat[point] != color: self.hTriFlat[point] = color #seed += point hasSeeds = True minX = min(minX, point%self.width) maxX = max(maxX, point%self.width) minY = min(minY, point/self.width) maxY = max(maxY, point/self.width) #if (point[1]*self.width + point[0]) in self.randIdx: # redoBg = True #if redoBg: # self.probBg(0) #if len(seed) == 0: if not hasSeeds: return minX = max(0, minX-DILATE) maxX = min(self.width-1, maxX + DILATE) minY = max(0, minY-DILATE) maxY = min(self.height-1, maxY + DILATE) """ args = [ np.int32(self.n_points), self.d_points, cl.Sampler(self.context, False, cl.addressing_mode.NONE, cl.filter_mode.NEAREST), self.d_img, self.dSampleFg ] gWorksize = roundUp((self.n_points, ), (256, )) self.kern_get_samples(self.queue, gWorksize, (256,), *args).wait() cl.enqueue_copy(self.queue, self.hSampleFg, self.dSampleFg) # print self.hSampleFg.view(np.uint8).reshape(10240, 4)[0:self.n_points, :] # print self.n_points self.gmmFg.fit(self.dSampleFg, self.n_points) # print w # print m # print c self.gmmFg.score(self.d_img, self.dScoreFg) # self.argsSampleBg = [ # self.d_labels, # np.int32(self.label), # cl.Sampler(self.context, False, cl.addressing_mode.NONE, # cl.filter_mode.NEAREST), # self.d_img, # self.dSampleFg # ] # # gWorksize = roundUp(self.dim, (16, 16)) # # self.kernSampleBg(self.queue, gWorksize, (16, 16), # *(self.argsSampleBg)).wait() # cl.enqueue_copy(self.queue, self.hSampleBg, self.dSampleBg).wait() pass
def __init__(self, context, devices, d_img, d_labels): Brush.__init__(self, context, devices, d_labels) self.context = context self.queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE) nComponentsFg = 4 nComponentsBg = 4 self.nDim = 3 self.dim = d_img.dim filename = os.path.join(os.path.dirname(__file__), 'quick.cl') program = createProgram(context, context.devices, [], filename) # self.kernSampleBg = cl.Kernel(program, 'sampleBg') self.kern_get_samples = cl.Kernel(program, 'get_samples') self.lWorksize = (16, 16) self.gWorksize = roundUp(self.dim, self.lWorksize) nSamples = 4 * (self.gWorksize[0] / self.lWorksize[0]) * ( self.gWorksize[1] / self.lWorksize[1]) # self.gmmFg_cpu = mixture.GMM(4) self.gmmFg = GMM(context, 65, nComponentsFg, 10240) self.gmmBg = GMM(context, 65, nComponentsBg, nSamples) self.hScore = np.empty(self.dim, np.float32) self.hSampleFg = np.empty((10240, ), np.uint32) self.hSampleBg = np.empty((12000, ), np.uint32) self.hA = np.empty((max(nComponentsFg, nComponentsBg), 8), np.float32) self.d_img = d_img cm = cl.mem_flags self.dSampleFg = cl.Buffer(context, cm.READ_WRITE, size=4 * 10240) self.dSampleBg = cl.Buffer(context, cm.READ_WRITE, size=4 * 12000) self.dA = cl.Buffer(context, cm.READ_ONLY | cm.COPY_HOST_PTR, hostbuf=self.hA) self.dScoreFg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32) self.dScoreBg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32) #self.points = Set() self.capPoints = 200 * 200 * 300 #brush radius 200, stroke length 300 self.points = np.empty((self.capPoints), np.uint32) # self.colorize = Colorize.Colorize(clContext, clContext.devices) # self.hTriFlat = self.hTri.reshape(-1) # self.probBg(1200) self.h_img = np.empty(self.dim, np.uint32) self.h_img = self.h_img.ravel() cl.enqueue_copy(self.queue, self.h_img, self.d_img, origin=(0, 0), region=self.dim).wait() self.samples_bg_idx = np.random.randint(0, self.dim[0] * self.dim[1], 12000) self.hSampleBg = self.h_img[self.samples_bg_idx] cl.enqueue_copy(self.queue, self.dSampleBg, self.hSampleBg).wait() w,m,c = self.gmmBg.fit(self.dSampleBg, 300, retParams=True) print w print m print c self.gmmBg.score(self.d_img, self.dScoreBg) pass
np.set_printoptions(suppress=True) src = Image.open("/Users/marcdeklerk/msc/code/dataset/processed/source/800x600/GT04.png") if src.mode != 'RGBA': src = src.convert('RGBA') width = src.size[0] height = src.size[1] shape = src.size shapeNp = (src.size[1], src.size[0]) size = width * height lWorksize = (16, 16) hSrc = padArray2D(np.array(src).view(np.uint32).squeeze(), roundUp(shapeNp, lWorksize), 'edge') dSrc = cl.Buffer(context, cm.READ_ONLY | cm.COPY_HOST_PTR, hostbuf=hSrc) rgb = hSrc.reshape(-1, 1).view(np.uint8).astype(np.float32)[:, 0:3] x0 = 0 x1 = 500 y0 = 200 y1 = 220 rect = (y1-y0, x1-x0) nSamples = rect[0]*rect[1] nIter = 65 nComp = 4 nDim = 3
def paintGL(self): if len(self.layers) == 0: return #clear the read renderbuffer glBindRenderbuffer(GL_RENDERBUFFER, self.rbos[self.rboRead]) glBindFramebuffer(GL_DRAW_FRAMEBUFFER, self.fbo) glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, self.rbos[self.rboRead]) glClear(GL_COLOR_BUFFER_BIT) cl.enqueue_acquire_gl_objects(self.queue, self.rbosCL) visible = [] for layer in self.layers: if not layer.enabled or layer.opacity == 0: continue visible.append(layer) # if layer.opacity == 1.0: # break i = 0 for layer in reversed(visible): input = layer.clobj for filter in layer.filters: input = filter.execute(self.queue, input) args = [ cl.Sampler(self.context, False, cl.addressing_mode.NONE, cl.filter_mode.NEAREST), self.rbosCL[self.rboRead], self.rbosCL[self.rboWrite], np.float32(layer.opacity), input ] if type(input) == Image2D: if (input.format.channel_order, input.format.channel_data_type) == ( cl.channel_order.RGBA, cl.channel_type.UNORM_INT8): self.kernBlendImgf(self.queue, input.dim, LWORKGROUP, *args).wait() elif (input.format.channel_order, input.format.channel_data_type) == ( cl.channel_order.RGBA, cl.channel_type.UNSIGNED_INT8): self.kernBlendImgui(self.queue, input.dim, LWORKGROUP, *args).wait() else: raise NotImplementedError() elif type(input) == Buffer2D: args.append(np.array(input.dim, np.int32)) if input.dtype == np.uint32: self.kernBlendBufui(self.queue, input.dim, LWORKGROUP, *args).wait() else: raise NotImplementedError() else: raise NotImplementedError() self.queue.finish() self.swapRbos() i += 1 cl.enqueue_release_gl_objects(self.queue, self.rbosCL) gw = roundUp(self.shape, LWORKGROUP) args = [ self.rbosCL[self.rboRead], self.rbosCL[self.rboWrite], cl.Sampler(self.context, False, cl.addressing_mode.NONE, cl.filter_mode.NEAREST), ] self.kernFlip(self.queue, gw, LWORKGROUP, *args) self.queue.finish() #Prepare to render into the renderbuffer glBindRenderbuffer(GL_RENDERBUFFER, self.rbos[self.rboWrite]) glBindFramebuffer(GL_READ_FRAMEBUFFER, self.fbo) glFramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, self.rbos[self.rboWrite]) #Set up to read from the renderbuffer and draw to window-system framebuffer glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); glViewport(0, 0, self.viewport[0], self.viewport[1]) #Do the copy glBlitFramebuffer( int(self.rect.x() / self.zoom), self.height - int((self.rect.y() + self.rect.height()) / self.zoom), int((self.rect.x() + self.rect.width()) / self.zoom), self.height - int(self.rect.y() / self.zoom), 0, 0, self.rect.width(), self.rect.height(), GL_COLOR_BUFFER_BIT, GL_NEAREST);
def __init__(self, context, devices, img, neighbourhood=NEIGHBOURHOOD.VON_NEUMANN, weight=None): self.context = context self.queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE) if weight == None: weight = GrowCut.WEIGHT_DEFAULT if isinstance(img, cl.Image): self.dImg = img width = img.get_image_info(cl.image_info.WIDTH) height = img.get_image_info(cl.image_info.HEIGHT) dim = (width, height) else: raise NotImplementedError('Not implemented for {0}'.format(type( img))) self.tilelist = IncrementalTileList(context, devices, dim, (TILEW, TILEH)) self.hHasConverged = np.empty((1,), np.int32) self.hHasConverged[0] = False self.dLabelsIn = Buffer2D(context, cm.READ_WRITE, dim, np.uint8) self.dLabelsOut = Buffer2D(context, cm.READ_WRITE, dim, np.uint8) self.dStrengthIn = Buffer2D(context, cm.READ_WRITE, dim, np.float32) self.dStrengthOut = Buffer2D(context, cm.READ_WRITE, dim, np.float32) self.dHasConverged = cl.Buffer(context, cm.READ_WRITE | cm.COPY_HOST_PTR, hostbuf=self.hHasConverged) self.args = [ self.tilelist.d_list, self.dLabelsIn, self.dLabelsOut, self.dStrengthIn, self.dStrengthOut, self.dHasConverged, np.int32(self.tilelist.iteration), self.tilelist.d_tiles, cl.LocalMemory(szInt * 9), cl.LocalMemory(szInt * (TILEW + 2) * (TILEH + 2)), cl.LocalMemory(szFloat * (TILEW + 2) * (TILEH + 2)), # cl.LocalMemory(4*szFloat*(TILEW+2)*(TILEH+2)), self.dImg, cl.Sampler(context, False, cl.addressing_mode.NONE, cl.filter_mode.NEAREST) ] self.gWorksize = roundUp(dim, self.lw) self.gWorksizeTiles16 = roundUp(dim, self.lWorksizeTiles16) options = [ '-D TILESW=' + str(self.tilelist.dim[0]), '-D TILESH=' + str(self.tilelist.dim[1]), '-D IMAGEW=' + str(dim[0]), '-D IMAGEH=' + str(dim[1]), '-D TILEW=' + str(TILEW), '-D TILEH=' + str(TILEH), '-D G_NORM(X)=' + weight ] filename = os.path.join(os.path.dirname(__file__), 'growcut.cl') program = createProgram(context, devices, options, filename) if neighbourhood == GrowCut.NEIGHBOURHOOD.VON_NEUMANN: self.kernEvolve = cl.Kernel(program, 'evolveVonNeumann') elif neighbourhood == GrowCut.NEIGHBOURHOOD.MOORE: self.kernEvolve = cl.Kernel(program, 'evolveMoore') self.kernLabel = cl.Kernel(program, 'label') self.isComplete = False
from GrowCut import GrowCut import sys img = Image.open("/Users/marcdeklerk/msc/code/dataset/processed/source/800x600/GT04.png").convert('RGBA') tri = Image.open("/Users/marcdeklerk/msc/code/dataset/processed/trimap1/800x600/GT04.png").convert('RGBA') app = QtGui.QApplication(sys.argv) canvas = CLCanvas(img.size) window = CLWindow(canvas) context = canvas.context devices = context.get_info(cl.context_info.DEVICES) queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE) shape = (img.size[1], img.size[0]) shape = roundUp(shape, SharedMatting.lw) dim = (shape[1], shape[0]) hImg = padArray2D(np.array(img).view(np.uint32).squeeze(), shape, 'edge') dImgGC = cl.Image(context, cl.mem_flags.READ_ONLY, cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNSIGNED_INT8), dim ) cl.enqueue_copy(queue, dImgGC, hImg, origin=(0,0), region=dim) dImg = Buffer2D(context, cm.READ_WRITE | cm.COPY_HOST_PTR, hostbuf=hImg) dStrokes = Buffer2D(context, cm.READ_WRITE, dim, dtype=np.uint8)
import Image import os, sys from clutil import roundUp, padArray2D from Buffer2D import Buffer2D import numpy as np import pyopencl as cl import time cm = cl.mem_flags img = Image.open("/Users/marcdeklerk/msc/code/dataset/processed/source/800x600/GT04.png") if img.mode != 'RGBA': img = img.convert('RGBA') shape = (img.size[1], img.size[0]) hImg = padArray2D(np.array(img).view(np.uint32).squeeze(), roundUp(shape, GraphCut.lWorksize), 'edge') width = hImg.shape[1] height = hImg.shape[0] dim = (width, height) shape = (height, width) app = QtGui.QApplication(sys.argv) canvas = CLCanvas(dim) window = CLWindow(canvas) context = canvas.context queue = cl.CommandQueue(context) dImg = Buffer2D(context, cm.READ_WRITE | cm.COPY_HOST_PTR, hostbuf=hImg)