コード例 #1
0
    def render_planar_update(self, rx, ry, rw, rh, x_scale=1, y_scale=1):
        log("%s.render_planar_update%s pixel_format=%s", self, (rx, ry, rw, rh, x_scale, y_scale), self.pixel_format)
        if self.pixel_format not in ("YUV420P", "YUV422P", "YUV444P", "GBRP"):
            #not ready to render yet
            return
        if self.pixel_format == "GBRP":
            self.set_rgbP_paint_state()
        self.gl_marker("painting planar update, format %s", self.pixel_format)
        divs = get_subsampling_divs(self.pixel_format)
        glEnable(GL_FRAGMENT_PROGRAM_ARB)
        for texture, index in ((GL_TEXTURE0, 0), (GL_TEXTURE1, 1), (GL_TEXTURE2, 2)):
            glActiveTexture(texture)
            glBindTexture(GL_TEXTURE_RECTANGLE_ARB, self.textures[index])

        tw, th = self.texture_size
        log("%s.render_planar_update(..) texture_size=%s, size=%s", self, self.texture_size, self.size)
        glBegin(GL_QUADS)
        for x,y in ((0, 0), (0, rh), (rw, rh), (rw, 0)):
            ax = min(tw, x)
            ay = min(th, y)
            for texture, index in ((GL_TEXTURE0, 0), (GL_TEXTURE1, 1), (GL_TEXTURE2, 2)):
                (div_w, div_h) = divs[index]
                glMultiTexCoord2i(texture, ax//div_w, ay//div_h)
            glVertex2i(int(rx+ax*x_scale), int(ry+ay*y_scale))
        glEnd()
        if self.pixel_format == "GBRP":
            self.unset_rgbP_paint_state()
コード例 #2
0
def make_test_image(pixel_format, w, h):
    from xpra.codecs.image_wrapper import ImageWrapper
    from xpra.codecs.codec_constants import get_subsampling_divs
    #import time
    #start = monotonic_time()
    if pixel_format.startswith("YUV") or pixel_format=="GBRP":
        divs = get_subsampling_divs(pixel_format)
        ydiv = divs[0]  #always (1, 1)
        y = makebuf(w//ydiv[0]*h//ydiv[1])
        udiv = divs[1]
        u = makebuf(w//udiv[0]*h//udiv[1])
        vdiv = divs[2]
        v = makebuf(w//vdiv[0]*h//vdiv[1])
        image = ImageWrapper(0, 0, w, h, [y, u, v], pixel_format, 32, [w//ydiv[0], w//udiv[0], w//vdiv[0]], planes=ImageWrapper._3_PLANES, thread_safe=True)
        #l = len(y)+len(u)+len(v)
    elif pixel_format in ("RGB", "BGR", "RGBX", "BGRX", "XRGB", "BGRA", "RGBA", "r210"):
        stride = w*len(pixel_format)
        rgb_data = makebuf(stride*h)
        image = ImageWrapper(0, 0, w, h, rgb_data, pixel_format, 32, stride, planes=ImageWrapper.PACKED, thread_safe=True)
        #l = len(rgb_data)
    else:
        raise Exception("don't know how to create a %s image" % pixel_format)
    #log("make_test_image%30s took %3ims for %6iMBytes",
    #    (pixel_format, w, h), 1000*(monotonic_time()-start), l//1024//1024)
    return image
コード例 #3
0
    def convert_image_yuv(self, image):
        start = time.time()
        iplanes = image.get_planes()
        width = image.get_width()
        height = image.get_height()
        strides = image.get_rowstride()
        pixels = image.get_pixels()
        assert iplanes==ImageWrapper._3_PLANES, "we only handle planar data as input!"
        assert image.get_pixel_format()==self.src_format, "invalid source format: %s (expected %s)" % (image.get_pixel_format(), self.src_format)
        assert len(strides)==len(pixels)==3, "invalid number of planes or strides (should be 3)"
        assert width>=self.src_width and height>=self.src_height, "expected source image with dimensions of at least %sx%s but got %sx%s" % (self.src_width, self.src_height, width, height)

        #adjust work dimensions for subsampling:
        #(we process N pixels at a time in each dimension)
        divs = get_subsampling_divs(self.src_format)
        wwidth = dimdiv(self.dst_width, max(x_div for x_div, _ in divs))
        wheight = dimdiv(self.dst_height, max(y_div for _, y_div in divs))
        globalWorkSize, localWorkSize  = self.get_work_sizes(wwidth, wheight)

        kernelargs = [self.queue, globalWorkSize, localWorkSize]

        iformat = pyopencl.ImageFormat(pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8)
        input_images = []
        for i in range(3):
            _, y_div = divs[i]
            plane = pixels[i]
            if type(plane)==str:
                flags = mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR
            else:
                flags = mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR
            shape = strides[i], self.src_height/y_div
            iimage = pyopencl.Image(self.context, flags, iformat, shape=shape, hostbuf=plane)
            input_images.append(iimage)

        #output image:
        oformat = pyopencl.ImageFormat(self.channel_order, pyopencl.channel_type.UNORM_INT8)
        oimage = pyopencl.Image(self.context, mem_flags.WRITE_ONLY, oformat, shape=(self.dst_width, self.dst_height))

        kernelargs += input_images + [numpy.int32(self.src_width), numpy.int32(self.src_height),
                       numpy.int32(self.dst_width), numpy.int32(self.dst_height),
                       self.sampler, oimage]

        kstart = time.time()
        log("convert_image(%s) calling %s%s after upload took %.1fms",
              image, self.kernel_function_name, tuple(kernelargs), 1000.0*(kstart-start))
        self.kernel_function(*kernelargs)
        self.queue.finish()
        #free input images:
        for iimage in input_images:
            iimage.release()
        kend = time.time()
        log("%s took %.1fms", self.kernel_function, 1000.0*(kend-kstart))

        out_array = numpy.empty(self.dst_width*self.dst_height*4, dtype=numpy.byte)
        pyopencl.enqueue_read_image(self.queue, oimage, (0, 0), (self.dst_width, self.dst_height), out_array)
        self.queue.finish()
        log("readback using %s took %.1fms", CHANNEL_ORDER_TO_STR.get(self.channel_order), 1000.0*(time.time()-kend))
        self.time += time.time()-start
        self.frames += 1
        return ImageWrapper(0, 0, self.dst_width, self.dst_height, out_array.data, self.dst_format, 24, self.dst_width*4, planes=ImageWrapper.PACKED)
コード例 #4
0
    def render_planar_update(self, rx, ry, rw, rh, x_scale=1, y_scale=1):
        log("%s.render_planar_update%s pixel_format=%s", self, (rx, ry, rw, rh, x_scale, y_scale), self.pixel_format)
        if self.pixel_format not in ("YUV420P", "YUV422P", "YUV444P", "GBRP"):
            #not ready to render yet
            return
        if self.pixel_format == "GBRP":
            self.set_rgbP_paint_state()
        self.gl_marker("painting planar update, format %s", self.pixel_format)
        divs = get_subsampling_divs(self.pixel_format)
        glEnable(GL_FRAGMENT_PROGRAM_ARB)
        for texture, index in ((GL_TEXTURE0, 0), (GL_TEXTURE1, 1), (GL_TEXTURE2, 2)):
            glActiveTexture(texture)
            glBindTexture(GL_TEXTURE_RECTANGLE_ARB, self.textures[index])

        tw, th = self.texture_size
        log("%s.render_planar_update(..) texture_size=%s, size=%s", self, self.texture_size, self.size)
        glBegin(GL_QUADS)
        for x,y in ((0, 0), (0, rh), (rw, rh), (rw, 0)):
            ax = min(tw, x)
            ay = min(th, y)
            for texture, index in ((GL_TEXTURE0, 0), (GL_TEXTURE1, 1), (GL_TEXTURE2, 2)):
                (div_w, div_h) = divs[index]
                glMultiTexCoord2i(texture, ax//div_w, ay//div_h)
            glVertex2i(int(rx+ax*x_scale), int(ry+ay*y_scale))
        glEnd()
        if self.pixel_format == "GBRP":
            self.unset_rgbP_paint_state()
コード例 #5
0
ファイル: video_scoring.py プロジェクト: ilyapoz/Xpra
def get_quality_score(csc_format, csc_spec, encoder_spec, scaling, target_quality=100, min_quality=0):
    quality = encoder_spec.quality
    if csc_format in ("YUV420P", "YUV422P", "YUV444P"):
        #account for subsampling: reduces quality
        y,u,v = get_subsampling_divs(csc_format)
        div = 0.5   #any colourspace convertion will lose at least some quality (due to rounding)
        for div_x, div_y in (y, u, v):
            div += (div_x+div_y)/2.0/3.0
        quality /= div

    if csc_spec:
        #csc_spec.quality is the upper limit (up to 100):
        quality += csc_spec.quality
        quality /= 2.0

    if scaling==(1, 1) and csc_format not in ("YUV420P", "YUV422P") and target_quality==100 and encoder_spec.has_lossless_mode:
        #we want lossless!
        qscore = quality + 80
    else:
        #how far are we from the current quality heuristics?
        qscore = 100-abs(target_quality - quality)
        if min_quality>=quality:
            #if this encoder's quality is lower than the min_quality
            #then it isn't very suitable, discount its score:
            mqs = (min_quality - quality) // 2
            qscore = max(0, qscore - mqs)
        #when downscaling, YUV420P should always win:
        if csc_format=="YUV420P" and scaling!=(1, 1):
            qscore *= 2.0
    return int(qscore)
コード例 #6
0
    def get_quality_score(self, csc_format, csc_spec, encoder_spec):
        quality = encoder_spec.quality
        if csc_format and csc_format in ("YUV420P", "YUV422P", "YUV444P"):
            #account for subsampling (reduces quality):
            y,u,v = get_subsampling_divs(csc_format)
            div = 0.5   #any colourspace convertion will lose at least some quality (due to rounding)
            for div_x, div_y in (y, u, v):
                div += (div_x+div_y)/2.0/3.0
            quality = quality / div

        if csc_spec:
            #csc_spec.quality is the upper limit (up to 100):
            quality += csc_spec.quality
            quality /= 2.0

        #the lower the current quality
        #the more we need an HQ encoder/csc to improve things:
        qscore = max(0, (100.0-self.get_current_quality()) * quality/100.0)
        mq = self.get_min_quality()
        if mq>=0:
            #if the encoder quality is lower or close to min_quality
            #then it isn't very suitable:
            mqs = max(0, quality - mq)*100/max(1, 100-mq)
            qscore = (qscore + mqs)/2.0
        return qscore
コード例 #7
0
    def render_planar_update(self, rx, ry, rw, rh, x_scale=1, y_scale=1):
        log("%s.render_planar_update%s pixel_format=%s", self, (rx, ry, rw, rh, x_scale, y_scale), self.pixel_format)
        if self.pixel_format not in ("YUV420P", "YUV422P", "YUV444P", "GBRP"):
            #not ready to render yet
            return
        if self.pixel_format == "GBRP":
            # Set GL state for planar RGB: change fragment program
            glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, self.shaders[RGBP2RGB_SHADER])
        self.gl_marker("painting planar update, format %s", self.pixel_format)
        divs = get_subsampling_divs(self.pixel_format)
        glEnable(GL_FRAGMENT_PROGRAM_ARB)
        for texture, index in ((GL_TEXTURE0, TEX_Y), (GL_TEXTURE1, TEX_U), (GL_TEXTURE2, TEX_V)):
            glActiveTexture(texture)
            glBindTexture(GL_TEXTURE_RECTANGLE_ARB, self.textures[index])

        tw, th = self.texture_size
        log("%s.render_planar_update(..) texture_size=%s, size=%s", self, self.texture_size, self.size)
        glBegin(GL_QUADS)
        for x,y in ((0, 0), (0, rh), (rw, rh), (rw, 0)):
            ax = min(tw, x)
            ay = min(th, y)
            for texture, index in ((GL_TEXTURE0, TEX_Y), (GL_TEXTURE1, TEX_U), (GL_TEXTURE2, TEX_V)):
                (div_w, div_h) = divs[index]
                glMultiTexCoord2i(texture, ax//div_w, ay//div_h)
            glVertex2i(int(rx+ax*x_scale), int(ry+ay*y_scale))
        glEnd()
        for texture in (GL_TEXTURE0, GL_TEXTURE1, GL_TEXTURE2):
            glActiveTexture(texture)
            glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0)
        glDisable(GL_FRAGMENT_PROGRAM_ARB)
        if self.pixel_format == "GBRP":
            # Reset state to our default (YUV painting)
            glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, self.shaders[YUV2RGB_SHADER])
        glActiveTexture(GL_TEXTURE0)
コード例 #8
0
ファイル: test_codec.py プロジェクト: Brainiarc7/xpra
def make_planar_input(src_format, w, h, use_strings=False, populate=False, seed=0):
    assert src_format in ("YUV420P", "YUV422P", "YUV444P", "GBRP"), "invalid source format %s" % src_format
    start = time.time()
    Ydivs, Udivs, Vdivs = get_subsampling_divs(src_format)
    Yxd, Yyd = Ydivs
    Uxd, Uyd = Udivs
    Vxd, Vyd = Vdivs
    Ysize = w*h/Yxd/Yyd
    Usize = w*h/Uxd/Uyd
    Vsize = w*h/Vxd/Vyd
    def make_buffer(size):
        if populate:
            return bytearray(get_source_data(size))
        else:
            return bytearray(size)
    Ydata = make_buffer(Ysize)
    Udata = make_buffer(Usize)
    Vdata = make_buffer(Vsize)
    if use_strings:
        pixels = (str(Ydata), str(Udata), str(Vdata))
    else:
        pixels = (Ydata, Udata, Vdata)
    strides = (w/Yxd, w/Uxd, w/Vxd)
    end = time.time()
    if DEBUG:
        print("make_planar_input%s took %.1fms" % ((src_format, w, h, use_strings, populate), end-start))
    return strides, pixels
コード例 #9
0
ファイル: video_scoring.py プロジェクト: svn2github/Xpra
def get_speed_score(csc_format, csc_spec, encoder_spec, scaling, target_speed=100, min_speed=0):
    #score based on speed:
    speed = encoder_spec.speed
    if csc_spec:
        #when subsampling, add the speed gains to the video encoder
        #which now has less work to do:
        mult = 1.0
        if csc_format and csc_format in ("YUV420P", "YUV422P", "YUV444P"):
            #account for subsampling: increases encoding speed
            y,u,v = get_subsampling_divs(csc_format)
            mult = 0.0
            for div_x, div_y in (y, u, v):
                mult += (div_x+div_y)/2.0/3.0
        #average and add 0.25 for the extra cost of doing the csc step:
        speed = (encoder_spec.speed * mult + csc_spec.speed) / 2.25

    #the lower the current speed
    #the more we need a fast encoder/csc to cancel it out:
    sscore = max(0, (100.0-target_speed) * speed/100.0)
    if min_speed>=0:
        #if the encoder speed is lower or close to min_speed
        #then it isn't very suitable:
        mss = max(0, speed - min_speed)*100/max(1, 100-min_speed)
        sscore = (sscore + mss)/2.0
    #then always favour fast encoders:
    sscore += speed
    sscore /= 2
    return int(sscore)
コード例 #10
0
ファイル: video_scoring.py プロジェクト: svn2github/Xpra
def get_quality_score(csc_format, csc_spec, encoder_spec, scaling, target_quality=100, min_quality=0):
    quality = encoder_spec.quality
    if csc_format in ("YUV420P", "YUV422P", "YUV444P"):
        #account for subsampling: reduces quality
        y,u,v = get_subsampling_divs(csc_format)
        div = 0.5   #any colourspace convertion will lose at least some quality (due to rounding)
        for div_x, div_y in (y, u, v):
            div += (div_x+div_y)/2.0/3.0
        quality /= div

    if csc_spec:
        #csc_spec.quality is the upper limit (up to 100):
        quality += csc_spec.quality
        quality /= 2.0

    if scaling==(1, 1) and csc_format not in ("YUV420P", "YUV422P") and target_quality==100 and encoder_spec.has_lossless_mode:
        #we want lossless!
        qscore = quality + 80
    else:
        #how far are we from the current quality heuristics?
        qscore = 100-abs(target_quality - quality)
        if min_quality>=quality:
            #if this encoder's quality is lower than the min_quality
            #then it isn't very suitable, discount its score:
            mqs = (min_quality - quality) // 2
            qscore = max(0, qscore - mqs)
        #when downscaling, YUV420P should always win:
        if csc_format=="YUV420P" and scaling!=(1, 1):
            qscore *= 2.0
    return int(qscore)
コード例 #11
0
def get_speed_score(csc_format,
                    csc_spec,
                    encoder_spec,
                    scaling,
                    target_speed=100,
                    min_speed=0):
    #score based on speed:
    speed = encoder_spec.speed
    if csc_spec:
        #when subsampling, add the speed gains to the video encoder
        #which now has less work to do:
        mult = 1.0
        if csc_format and csc_format in ("YUV420P", "YUV422P", "YUV444P"):
            #account for subsampling: increases encoding speed
            y, u, v = get_subsampling_divs(csc_format)
            mult = 0.0
            for div_x, div_y in (y, u, v):
                mult += (div_x + div_y) / 2.0 / 3.0
        #average and add 0.25 for the extra cost of doing the csc step:
        speed = (encoder_spec.speed * mult + csc_spec.speed) / 2.25

    #the lower the current speed
    #the more we need a fast encoder/csc to cancel it out:
    sscore = max(0, (100.0 - target_speed) * speed / 100.0)
    if min_speed >= 0:
        #if the encoder speed is lower or close to min_speed
        #then it isn't very suitable:
        mss = max(0, speed - min_speed) * 100 / max(1, 100 - min_speed)
        sscore = (sscore + mss) / 2.0
    #then always favour fast encoders:
    sscore += speed
    sscore /= 2
    return int(sscore)
コード例 #12
0
def make_planar_input(src_format, w, h, use_strings=False, populate=False, seed=0):
    assert src_format in ("YUV420P", "YUV422P", "YUV444P", "GBRP"), "invalid source format %s" % src_format
    start = time.time()
    Ydivs, Udivs, Vdivs = get_subsampling_divs(src_format)
    Yxd, Yyd = Ydivs
    Uxd, Uyd = Udivs
    Vxd, Vyd = Vdivs
    Ysize = w*h//Yxd//Yyd
    Usize = w*h//Uxd//Uyd
    Vsize = w*h//Vxd//Vyd
    def make_buffer(size):
        if populate:
            return bytearray(get_source_data(size, seed))
        else:
            return bytearray(size)
    Ydata = make_buffer(Ysize)
    Udata = make_buffer(Usize)
    Vdata = make_buffer(Vsize)
    if use_strings:
        pixels = (str(Ydata), str(Udata), str(Vdata))
    else:
        pixels = (Ydata, Udata, Vdata)
    strides = (w//Yxd, w//Uxd, w//Vxd)
    end = time.time()
    if DEBUG:
        print("make_planar_input%s took %.1fms" % ((src_format, w, h, use_strings, populate), end-start))
    return strides, pixels
コード例 #13
0
    def get_quality_score(self, csc_format, csc_spec, encoder_spec):
        quality = encoder_spec.quality
        if csc_format and csc_format in ("YUV420P", "YUV422P", "YUV444P"):
            #account for subsampling (reduces quality):
            y, u, v = get_subsampling_divs(csc_format)
            div = 0.5  #any colourspace convertion will lose at least some quality (due to rounding)
            for div_x, div_y in (y, u, v):
                div += (div_x + div_y) / 2.0 / 3.0
            quality = quality / div

        if csc_spec:
            #csc_spec.quality is the upper limit (up to 100):
            quality += csc_spec.quality
            quality /= 2.0

        #the lower the current quality
        #the more we need an HQ encoder/csc to improve things:
        qscore = max(0, (100.0 - self.get_current_quality()) * quality / 100.0)
        mq = self.get_min_quality()
        if mq >= 0:
            #if the encoder quality is lower or close to min_quality
            #then it isn't very suitable:
            mqs = max(0, quality - mq) * 100 / max(1, 100 - mq)
            qscore = (qscore + mqs) / 2.0
        return qscore
コード例 #14
0
ファイル: codec_checks.py プロジェクト: chewi/xpra
def make_test_image(pixel_format, w, h):
    from xpra.codecs.image_wrapper import ImageWrapper
    from xpra.codecs.codec_constants import get_subsampling_divs
    #import time
    #start = monotonic()
    if pixel_format.startswith("YUV") or pixel_format.startswith(
            "GBRP") or pixel_format == "NV12":
        divs = get_subsampling_divs(pixel_format)
        try:
            depth = int(pixel_format.split("P")[1])  #ie: YUV444P10 -> 10
        except (IndexError, ValueError):
            depth = 8
        Bpp = roundup(depth, 8) // 8
        nplanes = len(divs)
        ydiv = divs[0]  #always (1, 1)
        y = makebuf(w // ydiv[0] * h // ydiv[1] * Bpp)
        udiv = divs[1]
        u = makebuf(w // udiv[0] * h // udiv[1] * Bpp)
        planes = [y, u]
        strides = [w // ydiv[0] * Bpp, w // udiv[0] * Bpp]
        if nplanes == 3:
            vdiv = divs[2]
            v = makebuf(w // vdiv[0] * h // vdiv[1] * Bpp)
            planes.append(v)
            strides.append(w // vdiv[0] * Bpp)
        image = ImageWrapper(0,
                             0,
                             w,
                             h,
                             planes,
                             pixel_format,
                             32,
                             strides,
                             planes=nplanes,
                             thread_safe=True)
        #l = len(y)+len(u)+len(v)
    elif pixel_format in ("RGB", "BGR", "RGBX", "BGRX", "XRGB", "BGRA", "RGBA",
                          "r210", "BGR48"):
        if pixel_format == "BGR48":
            stride = w * 6
        else:
            stride = w * len(pixel_format)
        rgb_data = makebuf(stride * h)
        image = ImageWrapper(0,
                             0,
                             w,
                             h,
                             rgb_data,
                             pixel_format,
                             32,
                             stride,
                             planes=ImageWrapper.PACKED,
                             thread_safe=True)
        #l = len(rgb_data)
    else:
        raise Exception("don't know how to create a %s image" % pixel_format)
    #log("make_test_image%30s took %3ims for %6iMBytes",
    #    (pixel_format, w, h), 1000*(monotonic()-start), l//1024//1024)
    return image
コード例 #15
0
    def update_planar_textures(self, x, y, width, height, img, pixel_format, scaling=False):
        assert self.textures is not None, "no OpenGL textures!"
        log("%s.update_planar_textures%s", self, (x, y, width, height, img, pixel_format))

        divs = get_subsampling_divs(pixel_format)
        if self.pixel_format is None or self.pixel_format!=pixel_format or self.texture_size!=(width, height):
            self.pixel_format = pixel_format
            self.texture_size = (width, height)
            self.gl_marker("Creating new planar textures, pixel format %s", pixel_format)
            # Create textures of the same size as the window's
            empty_buf = b"\0"*(width*height)
            pixel_data = self.pixels_for_upload(empty_buf)[1]

            for texture, index in ((GL_TEXTURE0, TEX_Y), (GL_TEXTURE1, TEX_U), (GL_TEXTURE2, TEX_V)):
                (div_w, div_h) = divs[index]
                glActiveTexture(texture)
                target = GL_TEXTURE_RECTANGLE_ARB
                glBindTexture(target, self.textures[index])
                mag_filter = GL_NEAREST
                if scaling or (div_w > 1 or div_h > 1):
                    mag_filter = GL_LINEAR
                glTexParameteri(target, GL_TEXTURE_MAG_FILTER, mag_filter)
                glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_NEAREST)
                set_texture_level()
                glTexImage2D(target, 0, GL_LUMINANCE, width//div_w, height//div_h, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, pixel_data)
                #glBindTexture(target, 0)        #redundant: we rebind below:

        self.gl_marker("updating planar textures: %sx%s %s", width, height, pixel_format)
        rowstrides = img.get_rowstride()
        img_data = img.get_pixels()
        assert len(rowstrides)==3 and len(img_data)==3
        for texture, index, tex_name in (
            (GL_TEXTURE0, TEX_Y, "Y"),
            (GL_TEXTURE1, TEX_U, "U"),
            (GL_TEXTURE2, TEX_V, "V"),
            ):
            div_w, div_h = divs[index]
            w = width//div_w
            h = height//div_h
            if w==0 or h==0:
                log.error("Error: zero dimension %ix%i for %s planar texture %s", w, h, pixel_format, tex_name)
                log.error(" screen update %s dropped", (x, y, width, height))
                continue
            glActiveTexture(texture)

            target = GL_TEXTURE_RECTANGLE_ARB
            glBindTexture(target, self.textures[index])
            self.set_alignment(w, rowstrides[index], tex_name)
            upload, pixel_data = self.pixels_for_upload(img_data[index])
            log("texture %s: div=%s, rowstride=%s, %sx%s, data=%s bytes, upload=%s",
                index, divs[index], rowstrides[index], w, h, len(pixel_data), upload)
            glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0)
            try:
                glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, 0)
            except:
                pass
            glTexSubImage2D(target, 0, 0, 0, w, h, GL_LUMINANCE, GL_UNSIGNED_BYTE, pixel_data)
            glBindTexture(target, 0)
コード例 #16
0
ファイル: gl_window_backing.py プロジェクト: rudresh2319/Xpra
    def update_planar_textures(self,
                               x,
                               y,
                               width,
                               height,
                               img,
                               pixel_format,
                               scaling=False):
        assert self.textures is not None, "no OpenGL textures!"
        log("%s.update_planar_textures%s", self,
            (x, y, width, height, img, pixel_format))

        divs = get_subsampling_divs(pixel_format)
        if self.pixel_format is None or self.pixel_format != pixel_format or self.texture_size != (
                width, height):
            self.pixel_format = pixel_format
            self.texture_size = (width, height)
            self.gl_marker("Creating new planar textures, pixel format %s" %
                           pixel_format)
            # Create textures of the same size as the window's
            glEnable(GL_TEXTURE_RECTANGLE_ARB)

            for texture, index in ((GL_TEXTURE0, 0), (GL_TEXTURE1, 1),
                                   (GL_TEXTURE2, 2)):
                (div_w, div_h) = divs[index]
                glActiveTexture(texture)
                glBindTexture(GL_TEXTURE_RECTANGLE_ARB, self.textures[index])
                glEnable(GL_TEXTURE_RECTANGLE_ARB)
                mag_filter = GL_NEAREST
                if scaling or (div_w > 1 or div_h > 1):
                    mag_filter = GL_LINEAR
                glTexParameteri(GL_TEXTURE_RECTANGLE_ARB,
                                GL_TEXTURE_MAG_FILTER, mag_filter)
                glTexParameteri(GL_TEXTURE_RECTANGLE_ARB,
                                GL_TEXTURE_MIN_FILTER, GL_NEAREST)
                glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0)
                glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_LUMINANCE,
                             width / div_w, height / div_h, 0, GL_LUMINANCE,
                             GL_UNSIGNED_BYTE, None)

        self.gl_marker("updating planar textures: %sx%s %s" %
                       (width, height, pixel_format))
        rowstrides = img.get_rowstride()
        img_data = img.get_pixels()
        assert len(rowstrides) == 3 and len(img_data) == 3
        for texture, index in ((GL_TEXTURE0, 0), (GL_TEXTURE1, 1),
                               (GL_TEXTURE2, 2)):
            (div_w, div_h) = divs[index]
            glActiveTexture(texture)
            glBindTexture(GL_TEXTURE_RECTANGLE_ARB, self.textures[index])
            glPixelStorei(GL_UNPACK_ROW_LENGTH, rowstrides[index])
            pixel_data = img_data[index]
            log("texture %s: div=%s, rowstride=%s, %sx%s, data=%s bytes",
                index, divs[index], rowstrides[index], width / div_w,
                height / div_h, len(pixel_data))
            glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, width / div_w,
                            height / div_h, GL_LUMINANCE, GL_UNSIGNED_BYTE,
                            pixel_data)
コード例 #17
0
 def _test_YUV420P(self,
                   encoding,
                   encoder_module,
                   decoder_module,
                   yuvdata,
                   width=16,
                   height=16):
     in_csc = "YUV420P"
     if in_csc not in encoder_module.get_input_colorspaces(encoding):
         raise Exception("%s does not support %s as input" %
                         (encoder_module, in_csc))
     if in_csc != decoder_module.get_output_colorspace(encoding, in_csc):
         raise Exception("%s does not support %s as output for %s" %
                         (decoder_module, in_csc, in_csc))
     encoder = encoder_module.Encoder()
     options = typedict({"max-delayed": 0})
     encoder.init_context(encoding, width, height, in_csc, options)
     in_image = make_test_image(in_csc, width, height)
     yuv = []
     rowstrides = []
     divs = get_subsampling_divs(in_csc)
     for i, bvalue in enumerate(yuvdata):
         xdiv, ydiv = divs[i]
         rowstride = width // xdiv
         rowstrides.append(rowstride)
         size = rowstride * height // ydiv
         yuv.append(chr(bvalue).encode("latin1") * size)
     in_image.set_pixels(yuv)
     in_image.set_rowstride(rowstrides)
     cdata, client_options = encoder.compress_image(in_image)
     assert cdata
     #decode it:
     decoder = decoder_module.Decoder()
     decoder.init_context(encoding, width, height, in_csc)
     out_image = decoder.decompress_image(cdata, typedict(client_options))
     #print("%s %s : %s" % (encoding, decoder_module, out_image))
     in_planes = in_image.get_pixels()
     out_planes = out_image.get_pixels()
     for i, plane in enumerate(("Y", "U", "V")):
         in_pdata = in_planes[i]
         out_pdata = out_planes[i]
         xdiv, ydiv = divs[i]
         in_stride = in_image.get_rowstride()[i]
         out_stride = out_image.get_rowstride()[i]
         #compare lines at a time since the rowstride may be different:
         for y in range(height // ydiv):
             in_rowdata = in_pdata[in_stride * y:in_stride * y +
                                   width // xdiv]
             out_rowdata = out_pdata[out_stride * y:out_stride * y +
                                     width // xdiv]
             if not cmpp(in_rowdata, out_rowdata):
                 raise Exception(
                     "expected %s but got %s for row %i of plane %s with %s"
                     % (hexstr(in_rowdata), hexstr(out_rowdata), y, plane,
                        encoding))
コード例 #18
0
ファイル: gl_window_backing.py プロジェクト: svn2github/Xpra
    def update_planar_textures(self, x, y, width, height, img, pixel_format, scaling=False):
        assert x==0 and y==0
        assert self.textures is not None, "no OpenGL textures!"
        debug("%s.update_planar_textures%s", self, (x, y, width, height, img, pixel_format))

        divs = get_subsampling_divs(pixel_format)
        if self.pixel_format is None or self.pixel_format!=pixel_format or self.texture_size!=(width, height):
            self.pixel_format = pixel_format
            self.texture_size = (width, height)
            self.gl_marker("Creating new planar textures, pixel format %s" % pixel_format)
            # Create textures of the same size as the window's
            glEnable(GL_TEXTURE_RECTANGLE_ARB)

            for texture, index in ((GL_TEXTURE0, 0), (GL_TEXTURE1, 1), (GL_TEXTURE2, 2)):
                (div_w, div_h) = divs[index]
                glActiveTexture(texture)
                glBindTexture(GL_TEXTURE_RECTANGLE_ARB, self.textures[index])
                glEnable(GL_TEXTURE_RECTANGLE_ARB)
                mag_filter = GL_NEAREST
                if scaling or (div_w > 1 or div_h > 1):
                    mag_filter = GL_LINEAR
                glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, mag_filter)
                glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST)
                glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0)
                glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_LUMINANCE, width/div_w, height/div_h, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, None)


        self.gl_marker("updating planar textures: %sx%s %s" % (width, height, pixel_format))
        U_width = 0
        U_height = 0
        rowstrides = img.get_rowstride()
        img_data = img.get_pixels()
        assert len(rowstrides)==3
        assert len(img_data)==3
        for texture, index in ((GL_TEXTURE0, 0), (GL_TEXTURE1, 1), (GL_TEXTURE2, 2)):
            (div_w, div_h) = divs[index]
            glActiveTexture(texture)
            glBindTexture(GL_TEXTURE_RECTANGLE_ARB, self.textures[index])
            glPixelStorei(GL_UNPACK_ROW_LENGTH, rowstrides[index])
            pixel_data = img_data[index][:]
            debug("texture %s: div=%s, rowstride=%s, %sx%s, data=%s bytes", index, divs[index], rowstrides[index], width/div_w, height/div_h, len(pixel_data))
            glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, x, y, width/div_w, height/div_h, GL_LUMINANCE, GL_UNSIGNED_BYTE, pixel_data)
            if index == 1:
                U_width = width/div_w
                U_height = height/div_h
            elif index == 2:
                if width/div_w != U_width:
                    log.error("Width of V plane is %d, differs from width of corresponding U plane (%d), pixel_format is %d", width/div_w, U_width, pixel_format)
                if height/div_h != U_height:
                    log.error("Height of V plane is %d, differs from height of corresponding U plane (%d), pixel_format is %d", height/div_h, U_height, pixel_format)
コード例 #19
0
    def update_planar_textures(self, x, y, width, height, img, pixel_format, scaling=False):
        assert self.textures is not None, "no OpenGL textures!"
        log("%s.update_planar_textures%s", self, (x, y, width, height, img, pixel_format))

        divs = get_subsampling_divs(pixel_format)
        if self.pixel_format is None or self.pixel_format!=pixel_format or self.texture_size!=(width, height):
            self.pixel_format = pixel_format
            self.texture_size = (width, height)
            self.gl_marker("Creating new planar textures, pixel format %s", pixel_format)
            # Create textures of the same size as the window's
            glEnable(GL_TEXTURE_RECTANGLE_ARB)

            for texture, index in ((GL_TEXTURE0, 0), (GL_TEXTURE1, 1), (GL_TEXTURE2, 2)):
                (div_w, div_h) = divs[index]
                glActiveTexture(texture)
                glBindTexture(GL_TEXTURE_RECTANGLE_ARB, self.textures[index])
                glEnable(GL_TEXTURE_RECTANGLE_ARB)
                mag_filter = GL_NEAREST
                if scaling or (div_w > 1 or div_h > 1):
                    mag_filter = GL_LINEAR
                glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, mag_filter)
                glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST)
                set_texture_level()
                glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_LUMINANCE, width//div_w, height//div_h, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, None)

        self.gl_marker("updating planar textures: %sx%s %s", width, height, pixel_format)
        rowstrides = img.get_rowstride()
        img_data = img.get_pixels()
        assert len(rowstrides)==3 and len(img_data)==3
        for texture, index in ((GL_TEXTURE0, 0), (GL_TEXTURE1, 1), (GL_TEXTURE2, 2)):
            (div_w, div_h) = divs[index]
            glActiveTexture(texture)
            glBindTexture(GL_TEXTURE_RECTANGLE_ARB, self.textures[index])
            glPixelStorei(GL_UNPACK_ROW_LENGTH, rowstrides[index])
            upload, pixel_data = self.pixels_for_upload(img_data[index])
            log("texture %s: div=%s, rowstride=%s, %sx%s, data=%s bytes, upload=%s", index, divs[index], rowstrides[index], width//div_w, height//div_h, len(pixel_data), upload)
            glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_BASE_LEVEL, 0)
            try:
                glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAX_LEVEL, 0)
            except:
                pass
            glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, width//div_w, height//div_h, GL_LUMINANCE, GL_UNSIGNED_BYTE, pixel_data)
コード例 #20
0
ファイル: codec_checks.py プロジェクト: svn2github/Xpra
def make_test_image(pixel_format, w, h):
    from xpra.codecs.image_wrapper import ImageWrapper
    from xpra.codecs.codec_constants import get_subsampling_divs
    #import time
    #start = time.time()
    if pixel_format.startswith("YUV") or pixel_format=="GBRP":
        divs = get_subsampling_divs(pixel_format)
        ydiv = divs[0]  #always (1,1)
        y = makebuf(w//ydiv[0]*h//ydiv[1])
        udiv = divs[1]
        u = makebuf(w//udiv[0]*h//udiv[1])
        vdiv = divs[2]
        v = makebuf(w//vdiv[0]*h//vdiv[1])
        image = ImageWrapper(0, 0, w, h, [y, u, v], pixel_format, 32, [w//ydiv[0], w//udiv[0], w//vdiv[0]], planes=ImageWrapper._3_PLANES, thread_safe=True)
        #l = len(y)+len(u)+len(v)
    elif pixel_format in ("RGB", "BGR", "RGBX", "BGRX", "XRGB", "BGRA", "RGBA", "r210"):
        stride = w*len(pixel_format)
        rgb_data = makebuf(stride*h)
        image = ImageWrapper(0, 0, w, h, rgb_data, pixel_format, 32, stride, planes=ImageWrapper.PACKED, thread_safe=True)
        #l = len(rgb_data)
    else:
        raise Exception("don't know how to create a %s image" % pixel_format)
    #log("make_test_image%30s took %3ims for %6iMBytes", (pixel_format, w, h), 1000*(time.time()-start), l//1024//1024)
    return image
コード例 #21
0
    def get_score(self, csc_format, csc_spec, encoder_spec, width, height):
        """
            Given an optional csc step (csc_format and csc_spec), and
            and a required encoding step (encoder_spec and width/height),
            we calculate a score of how well this matches our requirements:
            * our quality target (as per get_currend_quality)
            * our speed target (as per get_current_speed)
            * how expensive it would be to switch to this pipeline option
            Note: we know the current pipeline settings, so the "switching
            cost" will be lower for pipelines that share components with the
            current one.
        """
        # first discard if we cannot handle this size:
        if csc_spec and not csc_spec.can_handle(width, height):
            return -1
        if not encoder_spec.can_handle(width, height):
            return -1
        # debug("get_score%s", (csc_format, csc_spec, encoder_spec,
        #          width, height, min_quality, target_quality, min_speed, target_speed))
        def clamp(v):
            return max(0, min(100, v))

        # evaluate output quality:
        quality = clamp(encoder_spec.quality)
        if csc_format and csc_format in ("YUV420P", "YUV422P", "YUV444P"):
            # account for subsampling (reduces quality):
            y, u, v = get_subsampling_divs(csc_format)
            div = 0.5  # any colourspace convertion will lose at least some quality (due to rounding)
            for div_x, div_y in (y, u, v):
                div += (div_x + div_y) / 2.0 / 3.0
            quality = quality / div
        if csc_spec and csc_spec.quality < 100:
            # csc_spec.quality is the upper limit (up to 100):
            quality *= csc_spec.quality / 100.0
        # score based on how far we are:
        if quality < self.get_min_quality():
            qscore = 0
        else:
            qscore = 100 - abs(quality - self.get_current_quality())

        # score based on speed:
        speed = clamp(encoder_spec.speed)
        if csc_spec:
            speed *= csc_spec.speed / 100.0
        if speed < self.get_min_speed():
            sscore = 0
        else:
            sscore = 100 - abs(speed - self.get_current_speed())

        # score for "edge resistance" via setup cost:
        ecsc_score = 100
        if csc_spec:
            # OR the masks so we have a chance of making it work
            width_mask = csc_spec.width_mask & encoder_spec.width_mask
            height_mask = csc_spec.height_mask & encoder_spec.height_mask
            csc_width = width & width_mask
            csc_height = height & height_mask
            if (
                self._csc_encoder is None
                or self._csc_encoder.get_dst_format() != csc_format
                or type(self._csc_encoder) != csc_spec.codec_class
                or self._csc_encoder.get_src_width() != csc_width
                or self._csc_encoder.get_src_height() != csc_height
            ):
                # if we have to change csc, account for new csc setup cost:
                ecsc_score = max(0, 80 - csc_spec.setup_cost * 80.0 / 100.0)
            else:
                ecsc_score = 80
            enc_width, enc_height = self.get_encoder_dimensions(csc_spec, encoder_spec, csc_width, csc_height)
        else:
            # not using csc at all!
            ecsc_score = 100
            width_mask = encoder_spec.width_mask
            height_mask = encoder_spec.height_mask
            enc_width = width & width_mask
            enc_height = height & height_mask
        ee_score = 100
        if (
            self._video_encoder is None
            or type(self._video_encoder) != encoder_spec.codec_class
            or self._video_encoder.get_src_format() != csc_format
            or self._video_encoder.get_width() != enc_width
            or self._video_encoder.get_height() != enc_height
        ):
            # account for new encoder setup cost:
            ee_score = 100 - encoder_spec.setup_cost
        # edge resistance score: average of csc and encoder score:
        er_score = (ecsc_score + ee_score) / 2.0
        debug(
            "get_score%s %s/%s/%s",
            (csc_format, csc_spec, encoder_spec, width, height),
            int(qscore),
            int(sscore),
            int(er_score),
        )
        return int((qscore + sscore + er_score) / 3.0)
コード例 #22
0
    def convert_image_rgb(self, image):
        global program
        start = time.time()
        iplanes = image.get_planes()
        w = image.get_width()
        h = image.get_height()
        stride = image.get_rowstride()
        pixels = image.get_pixels()
        debug("convert_image(%s) planes=%s, pixels=%s, size=%s", image, iplanes, type(pixels), len(pixels))
        assert iplanes==ImageWrapper.PACKED, "must use packed format as input"
        assert image.get_pixel_format()==self.src_format, "invalid source format: %s (expected %s)" % (image.get_pixel_format(), self.src_format)
        divs = get_subsampling_divs(self.dst_format)

        #copy packed rgb pixels to GPU:
        upload_start = time.time()
        stream = driver.Stream()
        mem = numpy.frombuffer(pixels, dtype=numpy.byte)
        in_buf = driver.mem_alloc(len(pixels))
        hmem = driver.register_host_memory(mem, driver.mem_host_register_flags.DEVICEMAP)
        pycuda.driver.memcpy_htod_async(in_buf, mem, stream)

        out_bufs = []
        out_strides = []
        out_sizes = []
        for i in range(3):
            x_div, y_div = divs[i]
            out_stride = roundup(self.dst_width/x_div, 4)
            out_height = roundup(self.dst_height/y_div, 2)
            out_buf, out_stride = driver.mem_alloc_pitch(out_stride, out_height, 4)
            out_bufs.append(out_buf)
            out_strides.append(out_stride)
            out_sizes.append((out_stride, out_height))
        #ensure uploading has finished:
        stream.synchronize()
        #we can now unpin the host memory:
        hmem.base.unregister()
        debug("allocation and upload took %.1fms", 1000.0*(time.time() - upload_start))

        kstart = time.time()
        kargs = [in_buf, numpy.int32(stride)]
        for i in range(3):
            kargs.append(out_bufs[i])
            kargs.append(numpy.int32(out_strides[i]))
        blockw, blockh = 16, 16
        #figure out how many pixels we process at a time in each dimension:
        xdiv = max([x[0] for x in divs])
        ydiv = max([x[1] for x in divs])
        gridw = max(1, w/blockw/xdiv)
        if gridw*2*blockw<w:
            gridw += 1
        gridh = max(1, h/blockh/ydiv)
        if gridh*2*blockh<h:
            gridh += 1
        debug("calling %s%s, with grid=%s, block=%s", self.kernel_function_name, tuple(kargs), (gridw, gridh), (blockw, blockh, 1))
        self.kernel_function(*kargs, block=(blockw,blockh,1), grid=(gridw, gridh))

        #we can now free the GPU source buffer:
        in_buf.free()
        kend = time.time()
        debug("%s took %.1fms", self.kernel_function_name, (kend-kstart)*1000.0)
        self.frames += 1

        #copy output YUV channel data to host memory:
        read_start = time.time()
        pixels = []
        strides = []
        for i in range(3):
            x_div, y_div = divs[i]
            out_size = out_sizes[i]
            #direct full plane async copy keeping current GPU padding:
            plane = driver.aligned_empty(out_size, dtype=numpy.byte)
            driver.memcpy_dtoh_async(plane, out_bufs[i], stream)
            pixels.append(plane.data)
            stride = out_strides[min(len(out_strides)-1, i)]
            strides.append(stride)
        stream.synchronize()
        #the copying has finished, we can now free the YUV GPU memory:
        #(the host memory will be freed by GC when 'pixels' goes out of scope)
        for out_buf in out_bufs:
            out_buf.free()
        self.cuda_context.synchronize()
        read_end = time.time()
        debug("strides=%s", strides)
        debug("read back took %.1fms, total time: %.1f", (read_end-read_start)*1000.0, 1000.0*(time.time()-start))
        return ImageWrapper(0, 0, self.dst_width, self.dst_height, pixels, self.dst_format, 24, strides, planes=ImageWrapper._3_PLANES)
コード例 #23
0
    def convert_image_rgb(self, image):
        start = time.time()
        iplanes = image.get_planes()
        width = image.get_width()
        height = image.get_height()
        stride = image.get_rowstride()
        pixels = image.get_pixels()
        #log("convert_image(%s) planes=%s, pixels=%s, size=%s", image, iplanes, type(pixels), len(pixels))
        assert iplanes==ImageWrapper.PACKED, "we only handle packed data as input!"
        assert image.get_pixel_format()==self.src_format, "invalid source format: %s (expected %s)" % (image.get_pixel_format(), self.src_format)
        assert width>=self.src_width and height>=self.src_height, "expected source image with dimensions of at least %sx%s but got %sx%s" % (self.src_width, self.src_height, width, height)

        #adjust work dimensions for subsampling:
        #(we process N pixels at a time in each dimension)
        divs = get_subsampling_divs(self.dst_format)
        wwidth = dimdiv(self.dst_width, max([x_div for x_div, _ in divs]))
        wheight = dimdiv(self.dst_height, max([y_div for _, y_div in divs]))
        globalWorkSize, localWorkSize  = self.get_work_sizes(wwidth, wheight)

        #input image:
        iformat = pyopencl.ImageFormat(self.channel_order, pyopencl.channel_type.UNSIGNED_INT8)
        shape = (stride/4, self.src_height)
        log("convert_image() input image format=%s, shape=%s, work size: local=%s, global=%s", iformat, shape, localWorkSize, globalWorkSize)
        if type(pixels)==str:
            #str is not a buffer, so we have to copy the data
            #alternatively, we could copy it first ourselves using this:
            #pixels = numpy.fromstring(pixels, dtype=numpy.byte).data
            #but I think this would be even slower
            flags = mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR
        else:
            flags = mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR
        iimage = pyopencl.Image(self.context, flags, iformat, shape=shape, hostbuf=pixels)

        kernelargs = [self.queue, globalWorkSize, localWorkSize,
                      iimage, numpy.int32(self.src_width), numpy.int32(self.src_height),
                      numpy.int32(self.dst_width), numpy.int32(self.dst_height),
                      self.sampler]

        #calculate plane strides and allocate output buffers:
        strides = []
        out_buffers = []
        out_sizes = []
        for i in range(3):
            x_div, y_div = divs[i]
            p_stride = roundup(self.dst_width / x_div, max(2, localWorkSize[0]))
            p_height = roundup(self.dst_height / y_div, 2)
            p_size = p_stride * p_height
            #log("output buffer for channel %s: stride=%s, height=%s, size=%s", i, p_stride, p_height, p_size)
            out_buf = pyopencl.Buffer(self.context, mem_flags.WRITE_ONLY, p_size)
            out_buffers.append(out_buf)
            kernelargs += [out_buf, numpy.int32(p_stride)]
            strides.append(p_stride)
            out_sizes.append(p_size)

        kstart = time.time()
        log("convert_image(%s) calling %s%s after %.1fms", image, self.kernel_function_name, tuple(kernelargs), 1000.0*(kstart-start))
        self.kernel_function(*kernelargs)
        self.queue.finish()
        #free input image:
        iimage.release()
        kend = time.time()
        log("%s took %.1fms", self.kernel_function_name, 1000.0*(kend-kstart))

        #read back:
        pixels = []
        for i in range(3):
            out_array = numpy.empty(out_sizes[i], dtype=numpy.byte)
            pixels.append(out_array.data)
            pyopencl.enqueue_read_buffer(self.queue, out_buffers[i], out_array, is_blocking=False)
        readstart = time.time()
        log("queue read events took %.1fms (3 planes of size %s, with strides=%s)", 1000.0*(readstart-kend), out_sizes, strides)
        self.queue.finish()
        readend = time.time()
        log("wait for read events took %.1fms", 1000.0*(readend-readstart))
        #free output buffers:
        for out_buf in out_buffers:
            out_buf.release()
        return ImageWrapper(0, 0, self.dst_width, self.dst_height, pixels, self.dst_format, 24, strides, planes=ImageWrapper._3_PLANES)
コード例 #24
0
    def get_score(self, csc_format, csc_spec, encoder_spec, width, height):
        """
            Given an optional csc step (csc_format and csc_spec), and
            and a required encoding step (encoder_spec and width/height),
            we calculate a score of how well this matches our requirements:
            * our quality target (as per get_currend_quality)
            * our speed target (as per get_current_speed)
            * how expensive it would be to switch to this pipeline option
            Note: we know the current pipeline settings, so the "switching
            cost" will be lower for pipelines that share components with the
            current one.
        """
        #first discard if we cannot handle this size:
        if csc_spec and not csc_spec.can_handle(width, height):
            return -1
        if not encoder_spec.can_handle(width, height):
            return -1
        #debug("get_score%s", (csc_format, csc_spec, encoder_spec,
        #          width, height, min_quality, target_quality, min_speed, target_speed))
        def clamp(v):
            return max(0, min(100, v))

        #evaluate output quality:
        quality = clamp(encoder_spec.quality)
        if csc_format and csc_format in ("YUV420P", "YUV422P", "YUV444P"):
            #account for subsampling (reduces quality):
            y, u, v = get_subsampling_divs(csc_format)
            div = 0.5  #any colourspace convertion will lose at least some quality (due to rounding)
            for div_x, div_y in (y, u, v):
                div += (div_x + div_y) / 2.0 / 3.0
            quality = quality / div
        if csc_spec and csc_spec.quality < 100:
            #csc_spec.quality is the upper limit (up to 100):
            quality *= csc_spec.quality / 100.0
        #score based on how far we are:
        if quality < self.get_min_quality():
            qscore = 0
        else:
            qscore = 100 - abs(quality - self.get_current_quality())

        #score based on speed:
        speed = clamp(encoder_spec.speed)
        if csc_spec:
            speed *= csc_spec.speed / 100.0
        if speed < self.get_min_speed():
            sscore = 0
        else:
            sscore = 100 - abs(speed - self.get_current_speed())

        #score for "edge resistance" via setup cost:
        ecsc_score = 100
        if csc_spec:
            #OR the masks so we have a chance of making it work
            width_mask = csc_spec.width_mask & encoder_spec.width_mask
            height_mask = csc_spec.height_mask & encoder_spec.height_mask
            csc_width = width & width_mask
            csc_height = height & height_mask
            if self._csc_encoder is None or self._csc_encoder.get_dst_format()!=csc_format or \
               type(self._csc_encoder)!=csc_spec.codec_class or \
               self._csc_encoder.get_src_width()!=csc_width or self._csc_encoder.get_src_height()!=csc_height:
                #if we have to change csc, account for new csc setup cost:
                ecsc_score = max(0, 80 - csc_spec.setup_cost * 80.0 / 100.0)
            else:
                ecsc_score = 80
            enc_width, enc_height = self.get_encoder_dimensions(
                csc_spec, encoder_spec, csc_width, csc_height)
        else:
            #not using csc at all!
            ecsc_score = 100
            width_mask = encoder_spec.width_mask
            height_mask = encoder_spec.height_mask
            enc_width = width & width_mask
            enc_height = height & height_mask
        ee_score = 100
        if self._video_encoder is None or type(self._video_encoder)!=encoder_spec.codec_class or \
           self._video_encoder.get_src_format()!=csc_format or \
           self._video_encoder.get_width()!=enc_width or self._video_encoder.get_height()!=enc_height:
            #account for new encoder setup cost:
            ee_score = 100 - encoder_spec.setup_cost
        #edge resistance score: average of csc and encoder score:
        er_score = (ecsc_score + ee_score) / 2.0
        debug("get_score%s %s/%s/%s",
              (csc_format, csc_spec, encoder_spec, width, height), int(qscore),
              int(sscore), int(er_score))
        return int((qscore + sscore + er_score) / 3.0)
コード例 #25
0
    def convert_image_rgb(self, image):
        global program
        start = time.time()
        iplanes = image.get_planes()
        w = image.get_width()
        h = image.get_height()
        stride = image.get_rowstride()
        pixels = image.get_pixels()
        debug("convert_image(%s) planes=%s, pixels=%s, size=%s", image,
              iplanes, type(pixels), len(pixels))
        assert iplanes == ImageWrapper.PACKED, "must use packed format as input"
        assert image.get_pixel_format(
        ) == self.src_format, "invalid source format: %s (expected %s)" % (
            image.get_pixel_format(), self.src_format)
        divs = get_subsampling_divs(self.dst_format)

        #copy packed rgb pixels to GPU:
        upload_start = time.time()
        stream = driver.Stream()
        mem = numpy.frombuffer(pixels, dtype=numpy.byte)
        in_buf = driver.mem_alloc(len(pixels))
        hmem = driver.register_host_memory(
            mem, driver.mem_host_register_flags.DEVICEMAP)
        pycuda.driver.memcpy_htod_async(in_buf, mem, stream)

        out_bufs = []
        out_strides = []
        out_sizes = []
        for i in range(3):
            x_div, y_div = divs[i]
            out_stride = roundup(self.dst_width / x_div, 4)
            out_height = roundup(self.dst_height / y_div, 2)
            out_buf, out_stride = driver.mem_alloc_pitch(
                out_stride, out_height, 4)
            out_bufs.append(out_buf)
            out_strides.append(out_stride)
            out_sizes.append((out_stride, out_height))
        #ensure uploading has finished:
        stream.synchronize()
        #we can now unpin the host memory:
        hmem.base.unregister()
        debug("allocation and upload took %.1fms",
              1000.0 * (time.time() - upload_start))

        kstart = time.time()
        kargs = [in_buf, numpy.int32(stride)]
        for i in range(3):
            kargs.append(out_bufs[i])
            kargs.append(numpy.int32(out_strides[i]))
        blockw, blockh = 16, 16
        #figure out how many pixels we process at a time in each dimension:
        xdiv = max([x[0] for x in divs])
        ydiv = max([x[1] for x in divs])
        gridw = max(1, w / blockw / xdiv)
        if gridw * 2 * blockw < w:
            gridw += 1
        gridh = max(1, h / blockh / ydiv)
        if gridh * 2 * blockh < h:
            gridh += 1
        debug("calling %s%s, with grid=%s, block=%s",
              self.kernel_function_name, tuple(kargs), (gridw, gridh),
              (blockw, blockh, 1))
        self.kernel_function(*kargs,
                             block=(blockw, blockh, 1),
                             grid=(gridw, gridh))

        #we can now free the GPU source buffer:
        in_buf.free()
        kend = time.time()
        debug("%s took %.1fms", self.kernel_function_name,
              (kend - kstart) * 1000.0)
        self.frames += 1

        #copy output YUV channel data to host memory:
        read_start = time.time()
        pixels = []
        strides = []
        for i in range(3):
            x_div, y_div = divs[i]
            out_size = out_sizes[i]
            #direct full plane async copy keeping current GPU padding:
            plane = driver.aligned_empty(out_size, dtype=numpy.byte)
            driver.memcpy_dtoh_async(plane, out_bufs[i], stream)
            pixels.append(plane.data)
            stride = out_strides[min(len(out_strides) - 1, i)]
            strides.append(stride)
        stream.synchronize()
        #the copying has finished, we can now free the YUV GPU memory:
        #(the host memory will be freed by GC when 'pixels' goes out of scope)
        for out_buf in out_bufs:
            out_buf.free()
        self.cuda_context.synchronize()
        read_end = time.time()
        debug("strides=%s", strides)
        debug("read back took %.1fms, total time: %.1f",
              (read_end - read_start) * 1000.0, 1000.0 * (time.time() - start))
        return ImageWrapper(0,
                            0,
                            self.dst_width,
                            self.dst_height,
                            pixels,
                            self.dst_format,
                            24,
                            strides,
                            planes=ImageWrapper._3_PLANES)