Example #1
0
    def test_pow_ps(self):
        asm = Tdasm()
        mc = asm.assemble(POW_CODE_PS)
        runtime = Runtime()
        load_math_func("fast_pow_ps", runtime)
        ds = runtime.load("pow_ps", mc)

        for x in range(1000):
            num1 = random.random() * 3 
            num2 = random.random() * 3
            num3 = random.random() * 3
            num4 = random.random() * 3
            num5 = random.random() * 3 
            num6 = random.random() * 3
            num7 = random.random() * 3
            num8 = random.random() * 3
            ds["v1"] = (num1, num2, num3, num4) 
            ds["v2"] = (num5, num6, num7, num8)
            runtime.run("pow_ps")
            rez_asm = ds["v1"]
            rez_py1 = math.pow(num1, num5)
            rez_py2 = math.pow(num2, num6)
            rez_py3 = math.pow(num3, num7)
            rez_py4 = math.pow(num4, num8)

            self.assertAlmostEqual(rez_asm[0], rez_py1, 1)
            self.assertAlmostEqual(rez_asm[1], rez_py2, 1)
            self.assertAlmostEqual(rez_asm[2], rez_py3, 1)
            self.assertAlmostEqual(rez_asm[3], rez_py4, 1)
Example #2
0
 def _create_struct(self, shape):
     code = " #DATA " + shape.asm_struct() + """
     #CODE
     #END
     """
     mc = Tdasm().assemble(code)
     return mc.get_struct(shape.asm_struct_name())
Example #3
0
    def prepare(self, runtimes):
        self._load_color_funcs(runtimes)

        if self.loader:
            self.loader(runtimes)

        for s in self._shaders:
            s.prepare(runtimes)

        self._runtimes = runtimes
        asm = Tdasm()
        name = 'shader' + str(id(self))

        for fun in self._functions:
            fun_name, fun_label, avx, bit = fun
            load_asm_function(fun_name, fun_label, runtimes, avx, bit)

        ds = []
        for r in runtimes:
            if not r.global_exists(self._name):
                if self._name in self._mc_cache:
                    ds.append(r.load(name, self._mc_cache[self._name])) 
                else:
                    mc = asm.assemble(self._code, self._func)
                    self._mc_cache[self._name] = mc
                    ds.append(r.load(name, mc)) 
        if ds:
            self._ds = ds
Example #4
0
    def test_pow_ps(self):
        asm = Tdasm()
        mc = asm.assemble(POW_CODE_PS)
        runtime = Runtime()
        load_math_func("fast_pow_ps", runtime)
        ds = runtime.load("pow_ps", mc)

        for x in range(1000):
            num1 = random.random() * 3
            num2 = random.random() * 3
            num3 = random.random() * 3
            num4 = random.random() * 3
            num5 = random.random() * 3
            num6 = random.random() * 3
            num7 = random.random() * 3
            num8 = random.random() * 3
            ds["v1"] = (num1, num2, num3, num4)
            ds["v2"] = (num5, num6, num7, num8)
            runtime.run("pow_ps")
            rez_asm = ds["v1"]
            rez_py1 = math.pow(num1, num5)
            rez_py2 = math.pow(num2, num6)
            rez_py3 = math.pow(num3, num7)
            rez_py4 = math.pow(num4, num8)

            self.assertAlmostEqual(rez_asm[0], rez_py1, 1)
            self.assertAlmostEqual(rez_asm[1], rez_py2, 1)
            self.assertAlmostEqual(rez_asm[2], rez_py3, 1)
            self.assertAlmostEqual(rez_asm[3], rez_py4, 1)
Example #5
0
    def test_sincos_ps(self):
        asm = Tdasm()
        mc = asm.assemble(SINCOS_CODE_PS)
        runtime = Runtime()
        load_math_func("fast_sincos_ps", runtime)
        ds = runtime.load("sincos_ps", mc)

        for x in range(1000):
            num1 = random.random() * 2000
            num2 = random.random() * 2000
            num3 = random.random() * 2000
            num4 = random.random() * 2000
            ds["v1"] = (num1, num2, num3, num4) 
            runtime.run("sincos_ps")
            rez_asm_sin = ds["v1"]
            rez_asm_cos = ds["v2"]
            rez_py1_sin = math.sin(num1)
            rez_py2_sin = math.sin(num2)
            rez_py3_sin = math.sin(num3)
            rez_py4_sin = math.sin(num4)
            rez_py1_cos = math.cos(num1)
            rez_py2_cos = math.cos(num2)
            rez_py3_cos = math.cos(num3)
            rez_py4_cos = math.cos(num4)

            self.assertAlmostEqual(rez_asm_sin[0], rez_py1_sin, 3)
            self.assertAlmostEqual(rez_asm_sin[1], rez_py2_sin, 3)
            self.assertAlmostEqual(rez_asm_sin[2], rez_py3_sin, 3)
            self.assertAlmostEqual(rez_asm_sin[3], rez_py4_sin, 3)
            self.assertAlmostEqual(rez_asm_cos[0], rez_py1_cos, 3)
            self.assertAlmostEqual(rez_asm_cos[1], rez_py2_cos, 3)
            self.assertAlmostEqual(rez_asm_cos[2], rez_py3_cos, 3)
            self.assertAlmostEqual(rez_asm_cos[3], rez_py4_cos, 3)
Example #6
0
 def _create_struct(self, shape):
     code = " #DATA " + shape.asm_struct() + """
     #CODE
     #END
     """
     mc = Tdasm().assemble(code)
     return mc.get_struct(shape.asm_struct_name())
Example #7
0
    def prepare(self, runtimes):
        self._load_color_funcs(runtimes)

        if self.loader:
            self.loader(runtimes)

        for s in self._shaders:
            s.prepare(runtimes)

        self._runtimes = runtimes
        asm = Tdasm()
        name = 'shader' + str(id(self))

        for fun in self._functions:
            fun_name, fun_label, avx, bit = fun
            load_asm_function(fun_name, fun_label, runtimes, avx, bit)

        ds = []
        for r in runtimes:
            if not r.global_exists(self._name):
                if self._name in self._mc_cache:
                    ds.append(r.load(name, self._mc_cache[self._name]))
                else:
                    mc = asm.assemble(self._code, self._func)
                    self._mc_cache[self._name] = mc
                    ds.append(r.load(name, mc))
        if ds:
            self._ds = ds
Example #8
0
 def _create_struct(self, struct_def, name):
     code = " #DATA \n" + struct_def + """
     #CODE
     #END
     """
     mc = Tdasm().assemble(code)
     return mc.get_struct(name)
Example #9
0
    def test_sincos_ps(self):
        asm = Tdasm()
        mc = asm.assemble(SINCOS_CODE_PS)
        runtime = Runtime()
        load_math_func("fast_sincos_ps", runtime)
        ds = runtime.load("sincos_ps", mc)

        for x in range(1000):
            num1 = random.random() * 2000
            num2 = random.random() * 2000
            num3 = random.random() * 2000
            num4 = random.random() * 2000
            ds["v1"] = (num1, num2, num3, num4)
            runtime.run("sincos_ps")
            rez_asm_sin = ds["v1"]
            rez_asm_cos = ds["v2"]
            rez_py1_sin = math.sin(num1)
            rez_py2_sin = math.sin(num2)
            rez_py3_sin = math.sin(num3)
            rez_py4_sin = math.sin(num4)
            rez_py1_cos = math.cos(num1)
            rez_py2_cos = math.cos(num2)
            rez_py3_cos = math.cos(num3)
            rez_py4_cos = math.cos(num4)

            self.assertAlmostEqual(rez_asm_sin[0], rez_py1_sin, 3)
            self.assertAlmostEqual(rez_asm_sin[1], rez_py2_sin, 3)
            self.assertAlmostEqual(rez_asm_sin[2], rez_py3_sin, 3)
            self.assertAlmostEqual(rez_asm_sin[3], rez_py4_sin, 3)
            self.assertAlmostEqual(rez_asm_cos[0], rez_py1_cos, 3)
            self.assertAlmostEqual(rez_asm_cos[1], rez_py2_cos, 3)
            self.assertAlmostEqual(rez_asm_cos[2], rez_py3_cos, 3)
            self.assertAlmostEqual(rez_asm_cos[3], rez_py4_cos, 3)
Example #10
0
 def __init__(self):
     asm = Tdasm()
     m = asm.assemble(MEMCPY)
     self.r = Runtime()
     self.ds = self.r.load("memcpy", m)
     m2 = asm.assemble(BLTRGBA)
     self.ds2 = self.r.load("bltrgba", m2)
     m3 = asm.assemble(BLTFLOATRGBA)
     self.ds3 = self.r.load("bltfloatrgba", m3)
Example #11
0
def create_float_image(runtime):
    img = renmas.gui.ImageFloatRGBA(150, 150)

    img.set_pixel_asm(runtime, "set_pixel")

    asm = Tdasm()
    mc = asm.assemble(ASM)
    runtime.load("write", mc)
    runtime.run("write")
    return img
Example #12
0
def create_float_image(runtime):
    img = renmas.gui.ImageFloatRGBA(150, 150)
    
    img.set_pixel_asm(runtime, "set_pixel")

    asm = Tdasm()
    mc = asm.assemble(ASM)
    runtime.load("write", mc)
    runtime.run("write")
    return img
Example #13
0
 def compile(self, shaders=[]):
     stms = parse(self._code)
     cgen = CodeGenerator()
     asm, ret_type = cgen.generate_code(
         stms, args=self._args, is_func=self._is_func, name=self._name, func_args=self._func_args, shaders=shaders
     )
     self._asm_code = asm
     self._ret_type = ret_type
     asm = Tdasm()
     self._mc = asm.assemble(self._asm_code, self._is_func)
Example #14
0
 def _create_struct(self, struct_def, name):
     code = " #DATA \n" + struct_def + """
     #CODE
     #END
     """
     ia32 = True
     bits = platform.architecture()[0]
     if bits == '64bit':
         ia32 = False
     mc = Tdasm().assemble(code, ia32=ia32)
     return mc.get_struct(name)
Example #15
0
def regular_sampler():
    runtime = Runtime()
    sampler = renmas2.samplers.RegularSampler(2, 2, pixel=1.0)
    sampler.get_sample_asm([runtime], 'get_sample')
    tile = renmas2.core.Tile(0, 0, 2, 2)
    tile.split(1)
    sampler.set_tile(tile)
    asm = Tdasm()
    mc = asm.assemble(ASM_CODE)
    runtime.load("test", mc)
    return (sampler, runtime, 'test')
Example #16
0
def _conv_rgba_bgra_asm():
    bits = platform.architecture()[0]
    if bits == '64bit':
        code = _conv_rgba_bgra_asm64()
        mc = Tdasm().assemble(code, ia32=False)
    else:
        code = _conv_rgba_bgra_asm32()
        mc = Tdasm().assemble(code, ia32=True)

    runtime = Runtime()
    ds = runtime.load("convert", mc)
    return runtime, ds
Example #17
0
 def prepare(self, runtimes):
     for s in self._shaders:
         s.prepare(runtimes)
     self._ds = []
     asm = Tdasm()
     mc = asm.assemble(self._code, self._func)
     #mc.print_machine_code()
     name = 'shader' + str(id(self))
     self._runtimes = runtimes
     for r in runtimes:
         #TODO check if shader allread exist in runtime
         #TODO if shader is function load it as function
         self._ds.append(r.load(name, mc))
Example #18
0
 def __init__(self, width, height, pitch, address):
     self.addr = address
     self.width = width
     self.height = height
     asm = Tdasm()
     m = asm.assemble(ASM_STR)
     self.r = Runtime()
     self.ds = self.r.load("set_pixel", m)
     self.ds["color"] = 0xFF00FF00  # red color is default
     self.ds["address"] = address
     self.ds["width"] = width
     self.ds["height"] = height
     self.ds["pitch"] = pitch
Example #19
0
 def compile(self, shaders=[]):
     stms = parse(self._code)
     cgen = CodeGenerator()
     asm, ret_type = cgen.generate_code(stms,
                                        args=self._args,
                                        is_func=self._is_func,
                                        name=self._name,
                                        func_args=self._func_args,
                                        shaders=shaders)
     self._asm_code = asm
     self._ret_type = ret_type
     asm = Tdasm()
     self._mc = asm.assemble(self._asm_code, self._is_func)
Example #20
0
 def prepare(self, runtimes):
     for s in self._shaders:
         s.prepare(runtimes)
     self._ds = []
     asm = Tdasm()
     mc = asm.assemble(self._code, self._func)
     #mc.print_machine_code()
     name = 'shader' + str(id(self))
     self._runtimes = runtimes
     for r in runtimes:
         #TODO check if shader allread exist in runtime
         #TODO if shader is function load it as function
         self._ds.append(r.load(name, mc)) 
Example #21
0
    def test_atan(self):
        asm = Tdasm()
        mc = asm.assemble(ATAN_CODE)
        runtime = Runtime()
        load_math_func("fast_atan_ss", runtime)
        ds = runtime.load("atan", mc)

        for x in range(1000):
            num = random.random() * 2000
            ds["x"] = num 
            runtime.run("atan")
            rez_asm = ds["x"]
            rez_py = math.atan(num)
            self.assertAlmostEqual(rez_asm, rez_py, 3)
Example #22
0
    def test_exp(self):
        asm = Tdasm()
        mc = asm.assemble(EXP_CODE)
        runtime = Runtime()
        load_math_func("fast_exp_ss", runtime)
        ds = runtime.load("exp", mc)

        for x in range(1000):
            num = random.random() * 4 
            ds["x"] = num 
            runtime.run("exp")
            rez_asm = ds["x"]
            rez_py = math.exp(num)
            self.assertAlmostEqual(rez_asm, rez_py, 2)
Example #23
0
    def test_exp(self):
        asm = Tdasm()
        mc = asm.assemble(EXP_CODE)
        runtime = Runtime()
        load_math_func("fast_exp_ss", runtime)
        ds = runtime.load("exp", mc)

        for x in range(1000):
            num = random.random() * 4
            ds["x"] = num
            runtime.run("exp")
            rez_asm = ds["x"]
            rez_py = math.exp(num)
            self.assertAlmostEqual(rez_asm, rez_py, 2)
Example #24
0
    def test_log(self):
        asm = Tdasm()
        mc = asm.assemble(LOG_CODE)
        runtime = Runtime()
        load_math_func("fast_log_ss", runtime)
        ds = runtime.load("log", mc)

        for x in range(1000):
            num = random.random()  
            ds["x"] = num 
            runtime.run("log")
            rez_asm = ds["x"]
            rez_py = math.log(num)
            self.assertAlmostEqual(rez_asm, rez_py, 3)
Example #25
0
    def test_atan(self):
        asm = Tdasm()
        mc = asm.assemble(ATAN_CODE)
        runtime = Runtime()
        load_math_func("fast_atan_ss", runtime)
        ds = runtime.load("atan", mc)

        for x in range(1000):
            num = random.random() * 2000
            ds["x"] = num
            runtime.run("atan")
            rez_asm = ds["x"]
            rez_py = math.atan(num)
            self.assertAlmostEqual(rez_asm, rez_py, 3)
Example #26
0
    def test_log(self):
        asm = Tdasm()
        mc = asm.assemble(LOG_CODE)
        runtime = Runtime()
        load_math_func("fast_log_ss", runtime)
        ds = runtime.load("log", mc)

        for x in range(1000):
            num = random.random()
            ds["x"] = num
            runtime.run("log")
            rez_asm = ds["x"]
            rez_py = math.log(num)
            self.assertAlmostEqual(rez_asm, rez_py, 3)
Example #27
0
 def compile(self, shaders=[], color_mgr=None):
     stms = parse(self._code)
     cgen = CodeGenerator()
     asm, ret_type, fns = cgen.generate_code(stms, args=self._args,
                                             is_func=self._is_func,
                                             name=self._name,
                                             func_args=self._func_args,
                                             shaders=shaders,
                                             color_mgr=color_mgr)
     self._asm_code = asm
     self._ret_type = ret_type
     self._ext_functions = fns
     asm = Tdasm()
     self._mc = asm.assemble(self._asm_code, naked=self._is_func,
                             ia32=not cgen.BIT64)
Example #28
0
    def set_pixel_asm(self, runtime, label):
        
        bits = platform.architecture()[0]
        if bits == "64bit": ecx = "rcx"
        else: ecx = "ecx"

        if util.AVX:
            line = "vmovaps oword [" + ecx + "], xmm0"
        else:
            line = "movaps oword [" + ecx + "], xmm0"

        bits = platform.architecture()[0]
        if bits == "64bit":
            l1 = "uint64 ptr_buffer"
            l2 = "mov rcx, qword [ptr_buffer]"
            l3 = "add rcx, rax"
        else:
            l1 = "uint32 ptr_buffer"
            l2 = "mov ecx, dword [ptr_buffer]"
            l3 = "add ecx, eax"

        asm_code = """
        #DATA
        """
        asm_code += l1 + """
        uint32 pitch
        #CODE
        ; eax = x , ebx = y, value = xmm0
        """
        asm_code += "global " + label + ": \n"
        asm_code += """
        imul ebx, dword [pitch]
        imul eax , eax, 16
        """
        asm_code += l2 + """
        add eax, ebx
        """
        asm_code += l3 + "\n"
        asm_code += line + """
        ret
        """

        asm = Tdasm()
        mc = asm.assemble(asm_code, True)
        name = "ImageFloatRGBA" + str(hash(self)) 
        self.ds = runtime.load(name, mc)
        self.ds["ptr_buffer"] = self.pixels.ptr()
        self.ds["pitch"] = self.pitch
Example #29
0
    def set_pixel_asm(self, runtime, label):
        
        bits = platform.architecture()[0]
        if bits == "64bit": ecx = "rcx"
        else: ecx = "ecx"

        if util.AVX:
            line = "vmovaps oword [" + ecx + "], xmm0"
        else:
            line = "movaps oword [" + ecx + "], xmm0"

        bits = platform.architecture()[0]
        if bits == "64bit":
            l1 = "uint64 ptr_buffer"
            l2 = "mov rcx, qword [ptr_buffer]"
            l3 = "add rcx, rax"
        else:
            l1 = "uint32 ptr_buffer"
            l2 = "mov ecx, dword [ptr_buffer]"
            l3 = "add ecx, eax"

        asm_code = """
        #DATA
        """
        asm_code += l1 + """
        uint32 pitch
        #CODE
        ; eax = x , ebx = y, value = xmm0
        """
        asm_code += "global " + label + ": \n"
        asm_code += """
        imul ebx, dword [pitch]
        imul eax , eax, 16
        """
        asm_code += l2 + """
        add eax, ebx
        """
        asm_code += l3 + "\n"
        asm_code += line + """
        ret
        """

        asm = Tdasm()
        mc = asm.assemble(asm_code, True)
        name = "ImageFloatRGBA" + str(hash(self)) 
        self.ds = runtime.load(name, mc)
        self.ds["ptr_buffer"] = self.pixels.ptr()
        self.ds["pitch"] = self.pitch
Example #30
0
    def test_pow(self):
        asm = Tdasm()
        mc = asm.assemble(POW_CODE)
        runtime = Runtime()
        load_math_func("fast_pow_ss", runtime)
        ds = runtime.load("pow", mc)

        for x in range(1000):
            num = random.random() * 3
            num1 = random.random() * 3
            ds["x"] = num
            ds["y"] = num1
            runtime.run("pow")
            rez_asm = ds["x"]
            rez_py = math.pow(num, num1)
            self.assertAlmostEqual(rez_asm, rez_py, 1)
Example #31
0
    def test_pow(self):
        asm = Tdasm()
        mc = asm.assemble(POW_CODE)
        runtime = Runtime()
        load_math_func("fast_pow_ss", runtime)
        ds = runtime.load("pow", mc)

        for x in range(1000):
            num = random.random() * 3 
            num1 = random.random() * 3 
            ds["x"] = num 
            ds["y"] = num1 
            runtime.run("pow")
            rez_asm = ds["x"]
            rez_py = math.pow(num, num1)
            self.assertAlmostEqual(rez_asm, rez_py, 1)
Example #32
0
    def test_sincos(self):
        asm = Tdasm()
        mc = asm.assemble(SINCOS_CODE)
        runtime = Runtime()
        load_math_func("fast_sincos_ss", runtime)
        ds = runtime.load("sincos", mc)

        for x in range(1000):
            num = random.random() * 2000
            ds["x"] = num 
            runtime.run("sincos")
            rez_asm1 = ds["x"]
            rez_asm2 = ds["y"]

            rez_py1, rez_py2 = math.sin(num), math.cos(num)
            self.assertAlmostEqual(rez_asm1, rez_py1, 3)
            self.assertAlmostEqual(rez_asm2, rez_py2, 3)
Example #33
0
    def test_sincos(self):
        asm = Tdasm()
        mc = asm.assemble(SINCOS_CODE)
        runtime = Runtime()
        load_math_func("fast_sincos_ss", runtime)
        ds = runtime.load("sincos", mc)

        for x in range(1000):
            num = random.random() * 2000
            ds["x"] = num
            runtime.run("sincos")
            rez_asm1 = ds["x"]
            rez_asm2 = ds["y"]

            rez_py1, rez_py2 = math.sin(num), math.cos(num)
            self.assertAlmostEqual(rez_asm1, rez_py1, 3)
            self.assertAlmostEqual(rez_asm2, rez_py2, 3)
Example #34
0
 def compile(self, shaders=[], color_mgr=None):
     stms = parse(self._code)
     cgen = CodeGenerator()
     asm, ret_type, fns = cgen.generate_code(stms,
                                             args=self._args,
                                             is_func=self._is_func,
                                             name=self._name,
                                             func_args=self._func_args,
                                             shaders=shaders,
                                             color_mgr=color_mgr)
     self._asm_code = asm
     self._ret_type = ret_type
     self._ext_functions = fns
     asm = Tdasm()
     self._mc = asm.assemble(self._asm_code,
                             naked=self._is_func,
                             ia32=not cgen.BIT64)
Example #35
0
def random_sampler():
    runtime = Runtime()
    width = 1
    height = 1
    spp = 1
    sampler = renmas2.samplers.RandomSampler(width, height, spp=spp, pixel=1.0)
    sampler.get_sample_asm([runtime], 'get_sample')
    tile = renmas2.core.Tile(0, 0, width, height)
    tile.split(1)
    sampler.set_tile(tile)
    asm = Tdasm()
    mc = asm.assemble(ASM_CODE)
    runtime.load("test", mc)

    nsamples = width * height * spp
    for x in range(nsamples):
        get_sample(sampler, runtime, "test")

    get_sample(sampler, runtime, "test")
Example #36
0
class Structures:
    def __init__(self, renderer):
        self.tdasm = Tdasm()
        self.renderer = renderer

        self._line1 = "struct spectrum \n"
        self._line3 = "end struct \n"

    def get_struct(self, name):
        if name in structures:
            return structures[name]
        elif name == "spectrum":
            if self.renderer.spectral_rendering:
                line2 = "float values[" + str(
                    self.renderer.nspectrum_samples) + "] \n"
            else:
                line2 = "float values[4] \n"
            return self._line1 + line2 + self._line3
        elif name == "hitpoint":
            if self.renderer.spectral_rendering:
                line2 = "float values[" + str(
                    self.renderer.nspectrum_samples) + "] \n"
            else:
                line2 = "float values[4] \n"
            spec = self._line1 + line2 + self._line3
            return spec + HITPOINT
        return None

    def get_compiled_struct(self, name):
        if name in structures:
            asm_code = """ #DATA
            """
            asm_code += self.get_struct(name)
            asm_code += """
            #CODE
            #END
            """
            mc = self.tdasm.assemble(asm_code)
            return mc.get_struct(name)
        return None

    def structs(self, names):
        code = ""
        for name in names:
            struct = self.get_struct(name)
            if struct is None:
                raise ValueError("Structure " + str(name) + " doesn't exist!")
            code += struct
        return code
Example #37
0
    def test_log_ps(self):
        asm = Tdasm()
        mc = asm.assemble(LOG_CODE_PS)
        runtime = Runtime()
        load_math_func("fast_log_ps", runtime)
        ds = runtime.load("log_ps", mc)

        for x in range(1000):
            num1 = random.random() 
            num2 = random.random()
            num3 = random.random() 
            num4 = random.random() 
            ds["v1"] = (num1, num2, num3, num4) 
            runtime.run("log_ps")
            rez_asm = ds["v1"]
            rez_py1 = math.log(num1)
            rez_py2 = math.log(num2)
            rez_py3 = math.log(num3)
            rez_py4 = math.log(num4)

            self.assertAlmostEqual(rez_asm[0], rez_py1, 3)
            self.assertAlmostEqual(rez_asm[1], rez_py2, 3)
            self.assertAlmostEqual(rez_asm[2], rez_py3, 3)
            self.assertAlmostEqual(rez_asm[3], rez_py4, 3)
Example #38
0
    def test_log_ps(self):
        asm = Tdasm()
        mc = asm.assemble(LOG_CODE_PS)
        runtime = Runtime()
        load_math_func("fast_log_ps", runtime)
        ds = runtime.load("log_ps", mc)

        for x in range(1000):
            num1 = random.random()
            num2 = random.random()
            num3 = random.random()
            num4 = random.random()
            ds["v1"] = (num1, num2, num3, num4)
            runtime.run("log_ps")
            rez_asm = ds["v1"]
            rez_py1 = math.log(num1)
            rez_py2 = math.log(num2)
            rez_py3 = math.log(num3)
            rez_py4 = math.log(num4)

            self.assertAlmostEqual(rez_asm[0], rez_py1, 3)
            self.assertAlmostEqual(rez_asm[1], rez_py2, 3)
            self.assertAlmostEqual(rez_asm[2], rez_py3, 3)
            self.assertAlmostEqual(rez_asm[3], rez_py4, 3)
Example #39
0
class Structures:
    def __init__(self, renderer):
        self.tdasm = Tdasm()
        self.renderer = renderer

        self._line1 = "struct spectrum \n"
        self._line3 = "end struct \n"

    def get_struct(self, name):
        if name in structures:
            return structures[name]
        elif name == "spectrum":
            if self.renderer.spectral_rendering:
                line2 = "float values[" + str(self.renderer.nspectrum_samples) + "] \n"
            else:
                line2 = "float values[4] \n"
            return self._line1 + line2 + self._line3
        elif name == "hitpoint":
            if self.renderer.spectral_rendering:
                line2 = "float values[" + str(self.renderer.nspectrum_samples) + "] \n"
            else:
                line2 = "float values[4] \n"
            spec = self._line1 + line2 + self._line3
            return spec + HITPOINT
        return None

    def get_compiled_struct(self, name):
        if name in structures:
            asm_code = """ #DATA
            """
            asm_code += self.get_struct(name)
            asm_code += """
            #CODE
            #END
            """
            mc = self.tdasm.assemble(asm_code)
            return mc.get_struct(name)
        return None

    def structs(self, names):
        code = ""
        for name in names:
            struct = self.get_struct(name)
            if struct is None:
                raise ValueError("Structure " + str(name) + " doesn't exist!")
            code += struct
        return code
Example #40
0
def sin_ss():
    data = """
    #DATA

    uint32 _ps_am_inv_sign_mask[4] = 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF
    uint32 _ps_am_sign_mask[4] = 0x80000000, 0x80000000, 0x80000000, 0x80000000
    float _ps_am_2_o_pi[4] = 0.63661977236, 0.63661977236, 0.63661977236, 0.63661977236
    uint32 _epi32_1[4] = 1, 1, 1, 1
    float _ps_am_1[4] = 1.0, 1.0, 1.0, 1.0
    uint32 _epi32_2[4] = 2, 2, 2, 2

    float _ps_sincos_p3[4] = -0.00468175413, -0.00468175413, -0.00468175413, -0.00468175413
    float _ps_sincos_p2[4] = 0.0796926262, 0.0796926262, 0.0796926262, 0.0796926262
    float _ps_sincos_p1[4] = -0.64596409750621,-0.64596409750621,-0.64596409750621,-0.64596409750621 
    float _ps_sincos_p0[4] = 1.570796326794896, 1.570796326794896, 1.570796326794896, 1.570796326794896

    """

    asm_code = data + """

    #CODE
    global fast_sin_ss:
	movaps	xmm7, xmm0
	movss	xmm1, dword [_ps_am_inv_sign_mask]
	movss	xmm2, dword [_ps_am_sign_mask]
	movss	xmm3, dword [_ps_am_2_o_pi]
	andps	xmm0, xmm1
	andps	xmm7, xmm2
	mulss	xmm0, xmm3

	pxor	xmm3, xmm3
	movd	xmm5, dword [_epi32_1]
	movss	xmm4, dword [_ps_am_1]
	cvttps2dq	xmm2, xmm0
	pand	xmm5, xmm2
	movd	xmm1, dword [_epi32_2]
	pcmpeqd	xmm5, xmm3
	cvtdq2ps	xmm6, xmm2
	pand	xmm2, xmm1
	pslld	xmm2, 30

	subss	xmm0, xmm6
	movss	xmm3, dword [_ps_sincos_p3]
	minss	xmm0, xmm4
	subss	xmm4, xmm0
	andps	xmm0, xmm5
	andnps	xmm5, xmm4
	orps	xmm0, xmm5

	movaps	xmm1, xmm0
	movss	xmm4, dword [_ps_sincos_p2]
	mulss	xmm0, xmm0
	xorps	xmm2, xmm7
	movss	xmm5, dword [_ps_sincos_p1]
	orps	xmm1, xmm2
	movaps	xmm7, xmm0
	mulss	xmm0, xmm3
	movss	xmm6, dword [_ps_sincos_p0]
	addss	xmm0, xmm4
	mulss	xmm0, xmm7
	addss	xmm0, xmm5
	mulss	xmm0, xmm7
	addss	xmm0, xmm6
	mulss	xmm0, xmm1
    ret
    """

    avx_code = data + """

    #CODE
    global fast_sin_ss:
    vmovaps	xmm7, xmm0 
	vmovss	xmm1, dword [_ps_am_inv_sign_mask]
	vmovss	xmm2, dword [_ps_am_sign_mask]
	vmovss	xmm3, dword [_ps_am_2_o_pi]

	vandps	xmm0, xmm0, xmm1
	vandps	xmm7, xmm7, xmm2 
	vmulss	xmm0, xmm0, xmm3

	vpxor	xmm3, xmm3, xmm3 
	vmovd	xmm5, dword [_epi32_1]
	vmovss	xmm4, dword [_ps_am_1]
	vcvttps2dq	xmm2, xmm0
	vpand	xmm5, xmm5, xmm2
	vmovd	xmm1, dword [_epi32_2]
	vpcmpeqd	xmm5, xmm5, xmm3
	vcvtdq2ps	xmm6, xmm2
	vpand	xmm2, xmm2, xmm1
	vpslld	xmm2, xmm2, 30

	vsubss	xmm0, xmm0, xmm6
	vmovss	xmm3, dword [_ps_sincos_p3]
	vminss	xmm0, xmm0, xmm4
	vsubss	xmm4, xmm4, xmm0
	vandps	xmm0, xmm0, xmm5
	vandnps	xmm5, xmm5, xmm4
	vorps	xmm0, xmm0, xmm5

	vmovaps	xmm1, xmm0
	vmovss	xmm4, dword [_ps_sincos_p2]
	vmulss	xmm0, xmm0, xmm0
	vxorps	xmm2, xmm2, xmm7
	vmovss	xmm5, dword [_ps_sincos_p1]
	vorps	xmm1, xmm1, xmm2
	vmovaps	xmm7, xmm0
	vmulss	xmm0, xmm0, xmm3
	vmovss	xmm6, dword [_ps_sincos_p0]
	vaddss	xmm0, xmm0, xmm4
	vmulss	xmm0, xmm0, xmm7
	vaddss	xmm0, xmm0, xmm5
	vmulss	xmm0, xmm0, xmm7
	vaddss	xmm0, xmm0, xmm6
	vmulss	xmm0, xmm0, xmm1
    ret
    """

    asm = Tdasm()
    if proc.AVX:
        mc = asm.assemble(avx_code, True)
    else:
        mc = asm.assemble(asm_code, True)

    return mc
Example #41
0
def tan_ps():
    data = """
    #DATA
    uint32 _ps_am_inv_sign_mask[4] = 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF
    float _ps_am_4_o_pi[4] = 1.273239544735, 1.273239544735, 1.273239544735, 1.273239544735
    float _ps_am_1[4] = 1.0, 1.0, 1.0, 1.0
    float _ps_am_pi_o_4[4] = 0.78539816339, 0.78539816339, 0.78539816339, 0.78539816339
    int32 _epi32_1[4] = 1, 1, 1, 1
    int32 _epi32_7[4] = 7, 7, 7, 7
    int32 _epi32_2[4] = 2, 2, 2, 2
    uint32 _ps_am_sign_mask[4] = 0x80000000, 0x80000000, 0x80000000, 0x80000000
    float _ps_tan_p0[4] = -17956525.197648, -17956525.197648, -17956525.197648, -17956525.197648 
    float _ps_tan_q0[4] = -53869575.592945, -53869575.592945, -53869575.592945, -53869575.592945 
    float _ps_tan_p1[4] = 1153516.64838587, 1153516.64838587, 1153516.64838587, 1153516.64838587
    float _ps_tan_q1[4] = 25008380.18233579, 25008380.18233579, 25008380.18233579, 25008380.18233579
    float _ps_tan_p2[4] = -13093.693918138, -13093.693918138, -13093.693918138, -13093.693918138
    float _ps_tan_q2[4] = -1320892.3444021, -1320892.3444021, -1320892.3444021, -1320892.3444021
    float _ps_tan_q3[4] = 13681.296347069, 13681.296347069, 13681.296347069, 13681.296347069
    float _ps_tan_poleval[4] = 36893500000000000000.0, 36893500000000000000.0, 36893500000000000000.0, 36893500000000000000.0

    """
    asm_code = data + """

    #CODE
    global fast_tan_ps:
    movaps	xmm7, xmm0
    andps	xmm0, oword [_ps_am_inv_sign_mask]
    andps	xmm7, oword [_ps_am_sign_mask]
    movaps	xmm1, xmm0
    mulps	xmm0, oword [_ps_am_4_o_pi]

    cvttps2dq	xmm0, xmm0
    movdqa	xmm4, oword [_epi32_1]
    movdqa	xmm5, oword [_epi32_7]

    pand	xmm4, xmm0
    pand	xmm5, xmm0
    movaps	xmm3, oword [_ps_am_1]
    paddd	xmm0, xmm4
    paddd	xmm5, xmm4

    cvtdq2ps	xmm0, xmm0

    mulps	xmm0, oword [_ps_am_pi_o_4]
    xorps	xmm6, xmm6
    subps	xmm1, xmm0
    movaps	xmm2, oword [_ps_tan_p2]
    minps	xmm1, xmm3
    movaps	xmm3, oword [_ps_tan_q3]
    movaps	xmm0, xmm1
    mulps	xmm1, xmm1

    mulps	xmm2, xmm1
    addps	xmm3, xmm1
    addps	xmm2, oword [_ps_tan_p1]
    mulps	xmm3, xmm1
    mulps	xmm2, xmm1
    addps	xmm3, oword [_ps_tan_q2]
    addps	xmm2, oword [_ps_tan_p0]
    mulps	xmm3, xmm1
    mulps	xmm2, xmm1
    addps	xmm3, oword [_ps_tan_q1]
    xorps	xmm0, xmm7
    mulps	xmm3, xmm1
    pand	xmm5, oword [_epi32_2]
    addps	xmm3, oword [_ps_tan_q0]
    mulps	xmm2, xmm0

    cmpps xmm6, xmm1, 4
    rcpps	xmm4, xmm3
    pxor	xmm7, xmm7
    mulps	xmm3, xmm4
    pcmpeqd	xmm5, xmm7
    mulps	xmm3, xmm4
    addps	xmm4, xmm4
    orps	xmm6, xmm5
    subps	xmm4, xmm3

    mulps	xmm2, xmm4
    movaps	xmm1, oword [_ps_am_sign_mask]
    movmskps	eax, xmm6
    addps	xmm2, xmm0

    rcpps	xmm4, xmm2
    cmp		eax, 0xf
    movaps	xmm0, xmm2
    mulps	xmm2, xmm4
    mulps	xmm2, xmm4
    addps	xmm4, xmm4
    subps	xmm4, xmm2
    jne		l_pole

    xorps	xmm4, xmm1

    andps	xmm0, xmm5
    andnps	xmm5, xmm4
    orps	xmm0, xmm5

    ret	

    l_pole:
    movaps	xmm7, xmm1
    movaps	xmm3, oword [_ps_tan_poleval]
    andps	xmm1, xmm0
    orps	xmm3, xmm1
    andps	xmm4, xmm6
    andnps	xmm6, xmm3
    orps	xmm4, xmm6

    xorps	xmm4, xmm7

    andps	xmm0, xmm5
    andnps	xmm5, xmm4
    orps	xmm0, xmm5

    ret	


    """

    avx_code = data + """

    #CODE
    global fast_tan_ps:
    vmovaps	xmm7, xmm0
    vandps	xmm0, xmm0, oword [_ps_am_inv_sign_mask]
    vandps	xmm7, xmm7, oword [_ps_am_sign_mask]
    vmovaps	xmm1, xmm0
    vmulps	xmm0, xmm0, oword [_ps_am_4_o_pi]

    vcvttps2dq	xmm0, xmm0
    vmovdqa	xmm4, oword [_epi32_1]
    vmovdqa	xmm5, oword [_epi32_7]

    vpand	xmm4, xmm4, xmm0
    vpand	xmm5, xmm5, xmm0
    vmovaps	xmm3, oword [_ps_am_1]
    vpaddd	xmm0, xmm0, xmm4
    vpaddd	xmm5, xmm5, xmm4

    vcvtdq2ps	xmm0, xmm0

    vmulps	xmm0, xmm0, oword [_ps_am_pi_o_4]
    vxorps	xmm6, xmm6, xmm6
    vsubps	xmm1, xmm1, xmm0
    vmovaps	xmm2, oword [_ps_tan_p2]
    vminps	xmm1, xmm1, xmm3
    vmovaps	xmm3, oword [_ps_tan_q3]
    vmovaps	xmm0, xmm1
    vmulps	xmm1, xmm1, xmm1

    vmulps	xmm2, xmm2, xmm1
    vaddps	xmm3, xmm3, xmm1
    vaddps	xmm2, xmm2, oword [_ps_tan_p1]
    vmulps	xmm3, xmm3, xmm1
    vmulps	xmm2, xmm2, xmm1
    vaddps	xmm3, xmm3, oword [_ps_tan_q2]
    vaddps	xmm2, xmm2, oword [_ps_tan_p0]
    vmulps	xmm3, xmm3, xmm1
    vmulps	xmm2, xmm2, xmm1
    vaddps	xmm3, xmm3, oword [_ps_tan_q1]
    vxorps	xmm0, xmm0, xmm7
    vmulps	xmm3, xmm3, xmm1
    vpand	xmm5, xmm5, oword [_epi32_2]
    vaddps	xmm3, xmm3, oword [_ps_tan_q0]
    vmulps	xmm2, xmm2, xmm0

    vcmpps xmm6, xmm6, xmm1, 4
    vrcpps	xmm4, xmm3
    vpxor	xmm7, xmm7, xmm7
    vmulps	xmm3, xmm3, xmm4
    vpcmpeqd	xmm5, xmm5, xmm7
    vmulps	xmm3, xmm3, xmm4
    vaddps	xmm4, xmm4, xmm4
    vorps	xmm6, xmm6, xmm5
    vsubps	xmm4, xmm4, xmm3

    vmulps	xmm2, xmm2, xmm4
    vmovaps	xmm1, oword [_ps_am_sign_mask]
    vmovmskps	eax, xmm6
    vaddps	xmm2, xmm2, xmm0

    vrcpps	xmm4, xmm2
    cmp		eax, 0xf
    vmovaps	xmm0, xmm2
    vmulps	xmm2, xmm2, xmm4
    vmulps	xmm2, xmm2, xmm4
    vaddps	xmm4, xmm4, xmm4
    vsubps	xmm4, xmm4, xmm2
    jne		l_pole

    vxorps	xmm4, xmm4, xmm1

    vandps	xmm0, xmm0, xmm5
    vandnps	xmm5, xmm5, xmm4
    vorps	xmm0, xmm0, xmm5

    ret	

    l_pole:
    vmovaps	xmm7, xmm1
    vmovaps	xmm3, oword [_ps_tan_poleval]
    vandps	xmm1, xmm1, xmm0
    vorps	xmm3, xmm3, xmm1
    vandps	xmm4, xmm4, xmm6
    vandnps	xmm6, xmm6, xmm3
    vorps	xmm4, xmm4, xmm6

    vxorps	xmm4, xmm4, xmm7

    vandps	xmm0, xmm0, xmm5
    vandnps	xmm5, xmm5, xmm4
    vorps	xmm0, xmm0, xmm5

    ret	


    """

    asm = Tdasm()
    if proc.AVX:
        mc = asm.assemble(avx_code, True)
    else:
        mc = asm.assemble(asm_code, True)

    return mc
Example #42
0
    _update_distance:
    mov eax, dword [esp + 8]
    mov ebx, dword [eax + hitpoint.t]

    mov edx, dword [esp + 16] ;populate new minimum distance
    mov dword [edx], ebx
    jmp _next_object
    
    _end_objects:
    add esp, 20 
    ret


"""

asm = Tdasm()
renmas.shapes.multiple_isect_asm(runtime, "multiple_isect")
mc = asm.assemble(ASM)

def v4(v3):
    return (v3.x, v3.y, v3.z, 0.0)

ds = runtime.load("test", mc)

ray = ren.random_ray()
ds["ray1.origin"] = v4(ray.origin)
ds["ray1.dir"] = v4(ray.dir)
ds["num"] = len(lst_shapes)
ds["addrs"] = adrese

runtime.run("test")
Example #43
0
def asin_ps():
    data = """
    #DATA
    uint32 _ps_am_sign_mask[4] = 0x80000000, 0x80000000, 0x80000000, 0x80000000
    float _ps_am_1[4] = 1.0, 1.0, 1.0, 1.0
    float _ps_am_m1[4] = -1.0, -1.0, -1.0, -1.0
    float _ps_atan_t0[4] = -0.091646118527, -0.091646118527, -0.091646118527, -0.091646118527
    float _ps_atan_s0[4] = 1.2797564625, 1.2797564625, 1.2797564625, 1.2797564625
    float _ps_atan_s1[4] = 2.1972168858, 2.1972168858, 2.1972168858, 2.1972168858
    float _ps_atan_t1[4] = -1.395694568, -1.395694568, -1.395694568, -1.395694568
    float _ps_atan_s2[4] = 6.8193064723, 6.8193064723, 6.8193064723 ,6.8193064723
    float _ps_atan_t2[4] = -94.3939261227, -94.3939261227, -94.3939261227, -94.3939261227
    float _ps_atan_s3[4] = 28.205206687, 28.205206687, 28.205206687, 28.205206687
    float _ps_atan_t3[4] = 12.888383034, 12.888383034, 12.888383034, 12.888383034
    float _ps_am_pi_o_2[4] = 1.57079632679, 1.57079632679, 1.57079632679, 1.57079632679

    """
    asm_code = data + """

    #CODE
    global fast_asin_ps:
    movaps xmm1, oword [_ps_am_1]
    movaps xmm2, xmm1
    addps xmm1, xmm0
    subps xmm2, xmm0
    mulps xmm1, xmm2
    rsqrtps xmm1, xmm1
    mulps xmm0, xmm1

    ;atan
    movaps	xmm5, oword [_ps_am_1]
	movaps	xmm6, oword [_ps_am_m1]
	rcpps	xmm4, xmm0

	cmpps	xmm5, xmm0, 1
	cmpps	xmm6, xmm0, 6
	movaps	xmm1, oword [_ps_atan_s0]
	orps	xmm5, xmm6

	andps	xmm4, xmm5
	movaps	xmm2, oword [_ps_atan_t0]
	movaps	xmm7, xmm5
	andnps	xmm5, xmm0
	movaps	xmm3, oword [_ps_atan_s1]
	orps	xmm4, xmm5
	movaps	xmm0, xmm4

	movaps	xmm6, oword [_ps_atan_t1]
	mulps	xmm4, xmm4

	addps	xmm1, xmm4
	movaps	xmm5, oword [_ps_atan_s2]
	rcpps	xmm1, xmm1
	mulps	xmm1, xmm2
	movaps	xmm2, oword [_ps_atan_t2]
	addps	xmm3, xmm4
	addps	xmm1, xmm3

	movaps	xmm3, oword [_ps_atan_s3]
	rcpps	xmm1, xmm1
	mulps	xmm1, xmm6
	movaps	xmm6, oword [_ps_atan_t3]
	addps	xmm5, xmm4
	addps	xmm1, xmm5

	movaps	xmm5, oword [_ps_am_sign_mask]
	rcpps	xmm1, xmm1
	mulps	xmm1, xmm2
	addps	xmm3, xmm4
	movaps	xmm4, oword [_ps_am_pi_o_2]
	mulps	xmm6, xmm0
	addps	xmm1, xmm3

	andps	xmm0, xmm5
	rcpps	xmm1, xmm1
	mulps	xmm1, xmm6

	orps	xmm0, xmm4
	subps	xmm0, xmm1

	andps	xmm0, xmm7
	andnps	xmm7, xmm1
	orps	xmm0, xmm7
	ret

    """

    avx_code = data + """

    #CODE
    global fast_asin_ps:
    vmovaps xmm1, oword [_ps_am_1]
    vmovaps xmm2, xmm1
    vaddps xmm1, xmm1, xmm0
    vsubps xmm2, xmm2, xmm0
    vmulps xmm1, xmm1, xmm2
    vrsqrtps xmm1, xmm1
    vmulps xmm0, xmm0, xmm1

    ;atan
    vmovaps	xmm5, oword [_ps_am_1]
	vmovaps	xmm6, oword [_ps_am_m1]
	vrcpps	xmm4, xmm0

	vcmpps	xmm5, xmm5, xmm0, 1
	vcmpps	xmm6, xmm6, xmm0, 6
	vmovaps	xmm1, oword [_ps_atan_s0]
	vorps	xmm5, xmm5, xmm6

	vandps	xmm4, xmm4, xmm5
	vmovaps	xmm2, oword [_ps_atan_t0]
	vmovaps	xmm7, xmm5
	vandnps	xmm5, xmm5, xmm0
	vmovaps	xmm3, oword [_ps_atan_s1]
	vorps	xmm4, xmm4, xmm5
	vmovaps	xmm0, xmm4

	vmovaps	xmm6, oword [_ps_atan_t1]
	vmulps	xmm4, xmm4, xmm4

	vaddps	xmm1, xmm1, xmm4
	vmovaps	xmm5, oword [_ps_atan_s2]
	vrcpps	xmm1, xmm1
	vmulps	xmm1, xmm1, xmm2
	vmovaps	xmm2, oword [_ps_atan_t2]
	vaddps	xmm3, xmm3, xmm4
	vaddps	xmm1, xmm1, xmm3

	vmovaps	xmm3, oword [_ps_atan_s3]
	vrcpps	xmm1, xmm1
	vmulps	xmm1, xmm1, xmm6
	vmovaps	xmm6, oword [_ps_atan_t3]
	vaddps	xmm5, xmm5, xmm4
	vaddps	xmm1, xmm1, xmm5

	vmovaps	xmm5, oword [_ps_am_sign_mask]
	vrcpps	xmm1, xmm1
	vmulps	xmm1, xmm1, xmm2
	vaddps	xmm3, xmm3, xmm4
	vmovaps	xmm4, oword [_ps_am_pi_o_2]
	vmulps	xmm6, xmm6, xmm0
	vaddps	xmm1, xmm1, xmm3

	vandps	xmm0, xmm0, xmm5
	vrcpps	xmm1, xmm1
	vmulps	xmm1, xmm1, xmm6

	vorps	xmm0, xmm0, xmm4
	vsubps	xmm0, xmm0, xmm1

	vandps	xmm0, xmm0, xmm7
	vandnps	xmm7, xmm7, xmm1
	vorps	xmm0, xmm0, xmm7
	ret

    """

    asm = Tdasm()
    if proc.AVX:
        mc = asm.assemble(avx_code, True)
    else:
        mc = asm.assemble(asm_code, True)
    
    return mc
Example #44
0
        add dword [sy], 1
        add dword [y], 1
        jmp _bltrgba
        _endblt:
        ret 
    """ 
    return code

def _blt_floatrgba_code(bgra=True):
    bits = platform.architecture()[0]
    if bits == '64bit':
        return _blt_floatrgba_code64(bgra)
    else:
        return _blt_floatrgba_code32(bgra)

_asm = Tdasm()
_mc = _asm.assemble(_blt_floatrgba_code())
_runtime = Runtime()
_data_section = _runtime.load("blt_prgba_to_bgra", _mc)

_mc2 = _asm.assemble(_blt_floatrgba_code(bgra=False))
_data_section2 = _runtime.load("blt_prgba_to_rgba", _mc2)

# blt float rgba to byte bgra
def blt_prgba_to_bgra(src, dest):

    assert isinstance(src, ImagePRGBA)
    assert isinstance(dest, ImageBGRA)

    sa, spitch = src.address_info() 
    da, dpitch = dest.address_info()
Example #45
0
    ds[name+ ".origin"] = (o.x, o.y, o.z, 0.0)
    ds[name+ ".dir"] = (d.x, d.y, d.z, 0.0)

def sphere_ds(ds, sphere, name):
    o = sphere.origin
    ds[name+".origin"] = (o.x, o.y, o.z, 0.0) 
    ds[name+".radius"] = sphere.radius
    ds[name+".mat_index"] = sphere.material

    
ray = get_ray()
sph = get_sphere()

runtime = Runtime()
sph.isect_asm([runtime], 'ray_sphere_intersection')
asm = Tdasm()
mc = asm.assemble(ASM_CODE)
ds = runtime.load('test', mc)

ray_ds(ds, ray, 'ray1')
sphere_ds(ds, sph, 'sph1')

runtime.run('test')

hp = sph.isect(ray)

if hp:
    print(hp.t, ds['hp1.t'])
    print(hp.hit_point)
    print(ds['hp1.hit'])
    print(hp.normal)
Example #46
0
def get_asm():

    from renmas.macros import eq32, eq128, eq32_32, eq32_128, eq128_128, eq128_32
    from renmas.macros import dot_product, macro_if, broadcast
    global assembler
    if assembler is None:
        assembler = Tdasm()
        assembler.register_macro("eq128", eq128)
        assembler.register_macro("eq32", eq32)

        assembler.register_macro("eq128_32", eq128_32)
        assembler.register_macro("eq32_128", eq32_128)
        assembler.register_macro("eq128_128", eq128_128)
        assembler.register_macro("eq32_32", eq32_32)

        assembler.register_macro("dot", dot_product)
        assembler.register_macro("if", macro_if)
        assembler.register_macro("broadcast", broadcast)

    return assembler
Example #47
0
def cos_ss():
    data = """
    #DATA
    uint32 _ps_am_inv_sign_mask[4] = 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF
    float _ps_am_pi_o_2[4] = 1.57079632679, 1.57079632679, 1.57079632679, 1.57079632679
    float _ps_am_2_o_pi[4] = 0.63661977236, 0.63661977236, 0.63661977236, 0.63661977236
    uint32 _epi32_1[4] = 1, 1, 1, 1
    float _ps_am_1[4] = 1.0, 1.0, 1.0, 1.0
    uint32 _epi32_2[4] = 2, 2, 2, 2

    float _ps_sincos_p3[4] = -0.00468175413, -0.00468175413, -0.00468175413, -0.00468175413
    float _ps_sincos_p2[4] = 0.0796926262, 0.0796926262, 0.0796926262, 0.0796926262
    float _ps_sincos_p1[4] = -0.64596409750621,-0.64596409750621,-0.64596409750621,-0.64596409750621 
    float _ps_sincos_p0[4] = 1.570796326794896, 1.570796326794896, 1.570796326794896, 1.570796326794896
    """

    asm_code = data + """

    #CODE
    global fast_cos_ss:
    movss	xmm1, dword [_ps_am_inv_sign_mask]
    movss	xmm2, dword [_ps_am_pi_o_2]
    movss	xmm3, dword [_ps_am_2_o_pi]
    andps	xmm0, xmm1
    addss	xmm0, xmm2
    mulss	xmm0, xmm3

    pxor	xmm3, xmm3
    movd	xmm5, dword [_epi32_1]
    movss	xmm4, dword [_ps_am_1]
    cvttps2dq	xmm2, xmm0
    pand	xmm5, xmm2
    movd	xmm1, dword [_epi32_2]
    pcmpeqd	xmm5, xmm3
    cvtdq2ps	xmm6, xmm2
    pand	xmm2, xmm1
    pslld	xmm2, 30 

    subss	xmm0, xmm6
    movss	xmm3, dword [_ps_sincos_p3]
    minss	xmm0, xmm4
    subss	xmm4, xmm0
    andps	xmm0, xmm5
    andnps	xmm5, xmm4
    orps	xmm0, xmm5

    movaps	xmm1, xmm0
    movss	xmm4, dword [_ps_sincos_p2]
    mulss	xmm0, xmm0
    movss	xmm5, dword [_ps_sincos_p1]
    orps	xmm1, xmm2
    movaps	xmm7, xmm0
    mulss	xmm0, xmm3
    movss	xmm6, dword [_ps_sincos_p0]
    addss	xmm0, xmm4
    mulss	xmm0, xmm7
    addss	xmm0, xmm5
    mulss	xmm0, xmm7
    addss	xmm0, xmm6
    mulss	xmm0, xmm1
    ret
    """

    avx_code = data + """

    #CODE
    global fast_cos_ss:
    vmovss	xmm1, dword [_ps_am_inv_sign_mask]
    vmovss	xmm2, dword [_ps_am_pi_o_2]
    vmovss	xmm3, dword [_ps_am_2_o_pi]
    vandps	xmm0, xmm0, xmm1
    vaddss	xmm0, xmm0, xmm2
    vmulss	xmm0, xmm0, xmm3

    vpxor	xmm3, xmm3, xmm3
    vmovd	xmm5, dword [_epi32_1]
    vmovss	xmm4, dword [_ps_am_1]
    vcvttps2dq	xmm2, xmm0
    vpand	xmm5, xmm5, xmm2
    vmovd	xmm1, dword [_epi32_2]
    vpcmpeqd	xmm5, xmm5, xmm3
    vcvtdq2ps	xmm6, xmm2
    vpand	xmm2, xmm2, xmm1
    vpslld	xmm2, xmm2, 30 

    vsubss	xmm0, xmm0, xmm6
    vmovss	xmm3, dword [_ps_sincos_p3]
    vminss	xmm0, xmm0, xmm4
    vsubss	xmm4, xmm4, xmm0
    vandps	xmm0, xmm0, xmm5
    vandnps	xmm5, xmm5, xmm4
    vorps	xmm0, xmm0, xmm5

    vmovaps	xmm1, xmm0
    vmovss	xmm4, dword [_ps_sincos_p2]
    vmulss	xmm0, xmm0, xmm0
    vmovss	xmm5, dword [_ps_sincos_p1]
    vorps	xmm1, xmm1, xmm2
    vmovaps	xmm7, xmm0
    vmulss	xmm0, xmm0, xmm3
    vmovss	xmm6, dword [_ps_sincos_p0]
    vaddss	xmm0, xmm0, xmm4
    vmulss	xmm0, xmm0, xmm7
    vaddss	xmm0, xmm0, xmm5
    vmulss	xmm0, xmm0, xmm7
    vaddss	xmm0, xmm0, xmm6
    vmulss	xmm0, xmm0, xmm1
    ret
    """

    asm = Tdasm()
    if proc.AVX:
        mc = asm.assemble(avx_code, True)
    else:
        mc = asm.assemble(asm_code, True)
   
    return mc
Example #48
0
        ret 
    """
    return code


def _blt_rect_code():
    bits = platform.architecture()[0]
    if bits == '64bit':
        return _blt_rect_code64()
    else:
        return _blt_rect_code32()


bits = platform.architecture()[0]
if bits == '64bit':
    _mc = Tdasm().assemble(_blt_rect_code(), ia32=False)
else:
    _mc = Tdasm().assemble(_blt_rect_code(), ia32=True)
_runtime = Runtime()
_data_section = _runtime.load("bltrgba", _mc)


def blt_image(src, dest, sx=0, sy=0, sw=-1, sh=-1, dx=0, dy=0, fliped=False):
    """
        Transfer block of image from source to destination.

        @param src - source image 
        @param dest - destination image
        @param sx - x position in source image
        @param sy - y position in source image
        @param sw - width of source image
Example #49
0
def log_ps():
    data = """
    #DATA
    float _ps_am_1[4] = 1.0, 1.0, 1.0, 1.0
    uint32 _ps_am_min_norm_pos[4] = 0x00800000, 0x00800000, 0x00800000, 0x00800000
    uint32 _ps_am_inv_mant_mask[4] = 0x807FFFFF, 0x807FFFFF, 0x807FFFFF, 0x807FFFFF 
    uint32 _epi32_0x7f[4] = 0x7F, 0x7F, 0x7F, 0x7F
    float _ps_log_p0[4] = -0.789580278884, -0.789580278884, -0.789580278884, -0.789580278884
    float _ps_log_q0[4] = -35.6722798256, -35.6722798256, -35.6722798256, -35.6722798256
    float _ps_log_p1[4] = 16.38666456995, 16.38666456995, 16.38666456995, 16.38666456995
    float _ps_log_q1[4] = 312.0937663722, 312.0937663722, 312.0937663722, 312.0937663722
    float _ps_log_p2[4] = -64.14099529587, -64.14099529587, -64.14099529587, -64.14099529587
    float _ps_log_q2[4] = -769.69194355046, -769.69194355046, -769.69194355046, -769.69194355046
    float _ps_log_c0[4] = 0.6931471805599, 0.6931471805599, 0.6931471805599, 0.6931471805599 

    """
    asm_code = data + """

    #CODE
    global fast_log_ps:
    maxps	xmm0, oword [_ps_am_min_norm_pos]  ; cut off denormalized stuff
    movaps	xmm1, oword [_ps_am_1]
    movaps	xmm3, xmm0

    andps	xmm0, oword [_ps_am_inv_mant_mask]
    orps	xmm0, xmm1

    movaps	xmm4, xmm0
    subps	xmm0, xmm1
    addps	xmm4, xmm1
    psrld	xmm3, 23
    rcpps	xmm4, xmm4
    mulps	xmm0, xmm4
    psubd	xmm3, oword [_epi32_0x7f]
    addps	xmm0, xmm0

    movaps	xmm2, xmm0
    mulps	xmm0, xmm0

    movaps	xmm4, oword [_ps_log_p0]
    movaps	xmm6, oword [_ps_log_q0]

    mulps	xmm4, xmm0
    movaps	xmm5, oword [_ps_log_p1]
    mulps	xmm6, xmm0
    movaps	xmm7, oword [_ps_log_q1]

    addps	xmm4, xmm5
    addps	xmm6, xmm7

    movaps	xmm5, oword [_ps_log_p2]
    mulps	xmm4, xmm0
    movaps	xmm7, oword [_ps_log_q2]
    mulps	xmm6, xmm0

    addps	xmm4, xmm5
    movaps	xmm5, oword [_ps_log_c0]
    addps	xmm6, xmm7
    cvtdq2ps	xmm1, xmm3

    mulps	xmm0, xmm4
    rcpps	xmm6, xmm6

    mulps	xmm0, xmm6
    mulps	xmm0, xmm2

    mulps	xmm1, xmm5

    addps	xmm0, xmm2
    addps	xmm0, xmm1

    ret	


    """

    avx_code = data + """

    #CODE
    global fast_log_ps:
    vmaxps	xmm0, xmm0, oword [_ps_am_min_norm_pos]  ; cut off denormalized stuff
    vmovaps	xmm1, oword [_ps_am_1]
    vmovaps	xmm3, xmm0

    vandps	xmm0, xmm0, oword [_ps_am_inv_mant_mask]
    vorps	xmm0, xmm0, xmm1

    vmovaps	xmm4, xmm0
    vsubps	xmm0, xmm0, xmm1
    vaddps	xmm4, xmm4, xmm1
    vpsrld	xmm3, xmm3, 23
    vrcpps	xmm4, xmm4
    vmulps	xmm0, xmm0, xmm4
    vpsubd	xmm3, xmm3, oword [_epi32_0x7f]
    vaddps	xmm0, xmm0, xmm0

    vmovaps	xmm2, xmm0
    vmulps	xmm0, xmm0, xmm0

    vmovaps	xmm4, oword [_ps_log_p0]
    vmovaps	xmm6, oword [_ps_log_q0]

    vmulps	xmm4, xmm4, xmm0
    vmovaps	xmm5, oword [_ps_log_p1]
    vmulps	xmm6, xmm6, xmm0
    vmovaps	xmm7, oword [_ps_log_q1]

    vaddps	xmm4, xmm4, xmm5
    vaddps	xmm6, xmm6, xmm7

    vmovaps	xmm5, oword [_ps_log_p2]
    vmulps	xmm4, xmm4, xmm0
    vmovaps	xmm7, oword [_ps_log_q2]
    vmulps	xmm6, xmm6, xmm0

    vaddps	xmm4, xmm4, xmm5
    vmovaps	xmm5, oword [_ps_log_c0]
    vaddps	xmm6, xmm6, xmm7
    vcvtdq2ps	xmm1, xmm3

    vmulps	xmm0, xmm0, xmm4
    vrcpps	xmm6, xmm6

    vmulps	xmm0, xmm0, xmm6
    vmulps	xmm0, xmm0, xmm2

    vmulps	xmm1, xmm1, xmm5

    vaddps	xmm0, xmm0, xmm2
    vaddps	xmm0, xmm0, xmm1

    ret	


    """

    asm = Tdasm()
    if proc.AVX:
        mc = asm.assemble(avx_code, True)
    else:
        mc = asm.assemble(asm_code, True)

    return mc
Example #50
0
def cos_ps():

    data = """
    #DATA
    uint32 _ps_am_inv_sign_mask[4] = 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF
    float _ps_am_pi_o_2[4] = 1.57079632679, 1.57079632679, 1.57079632679, 1.57079632679
    float _ps_am_2_o_pi[4] = 0.63661977236, 0.63661977236, 0.63661977236, 0.63661977236
    uint32 _epi32_1[4] = 1, 1, 1, 1
    float _ps_am_1[4] = 1.0, 1.0, 1.0, 1.0
    uint32 _epi32_2[4] = 2, 2, 2, 2

    float _ps_sincos_p3[4] = -0.00468175413, -0.00468175413, -0.00468175413, -0.00468175413
    float _ps_sincos_p2[4] = 0.0796926262, 0.0796926262, 0.0796926262, 0.0796926262
    float _ps_sincos_p1[4] = -0.64596409750621,-0.64596409750621,-0.64596409750621,-0.64596409750621 
    float _ps_sincos_p0[4] = 1.570796326794896, 1.570796326794896, 1.570796326794896, 1.570796326794896
    """

    asm_code = data + """

    #CODE
    global fast_cos_ps:
    andps	xmm0, oword [_ps_am_inv_sign_mask]
    addps	xmm0, oword [_ps_am_pi_o_2]
    mulps	xmm0, oword [_ps_am_2_o_pi]

    pxor	xmm3, xmm3
    movdqa	xmm5, oword [_epi32_1]
    movaps	xmm4, oword [_ps_am_1]
    cvttps2dq	xmm2, xmm0
    pand	xmm5, xmm2
    pcmpeqd	xmm5, xmm3
    cvtdq2ps	xmm6, xmm2
    pand	xmm2, oword [_epi32_2]
    pslld	xmm2, 30 

    subps	xmm0, xmm6
    minps	xmm0, xmm4
    subps	xmm4, xmm0
    andps	xmm0, xmm5
    andnps	xmm5, xmm4
    orps	xmm0, xmm5

    movaps	xmm1, xmm0
    mulps	xmm0, xmm0
    orps	xmm1, xmm2
    movaps	xmm7, xmm0
    mulps	xmm0, oword [_ps_sincos_p3]
    addps	xmm0, oword [_ps_sincos_p2]
    mulps	xmm0, xmm7
    addps	xmm0, oword [_ps_sincos_p1]
    mulps	xmm0, xmm7
    addps	xmm0, oword [_ps_sincos_p0]
    mulps	xmm0, xmm1
    ret
    """

    avx_code = data + """

    #CODE
    global fast_cos_ps:
    vandps	xmm0, xmm0, oword [_ps_am_inv_sign_mask]
    vaddps	xmm0, xmm0, oword [_ps_am_pi_o_2]
    vmulps	xmm0, xmm0, oword [_ps_am_2_o_pi]

    vpxor	xmm3, xmm3, xmm3
    vmovdqa	xmm5, oword [_epi32_1]
    vmovaps	xmm4, oword [_ps_am_1]
    vcvttps2dq	xmm2, xmm0
    vpand	xmm5, xmm5, xmm2
    vpcmpeqd	xmm5, xmm5, xmm3
    vcvtdq2ps	xmm6, xmm2
    vpand	xmm2, xmm2, oword [_epi32_2]
    vpslld	xmm2, xmm2, 30 

    vsubps	xmm0, xmm0, xmm6
    vminps	xmm0, xmm0, xmm4
    vsubps	xmm4, xmm4, xmm0
    vandps	xmm0, xmm0, xmm5
    vandnps	xmm5, xmm5, xmm4
    vorps	xmm0, xmm0, xmm5

    vmovaps	xmm1, xmm0
    vmulps	xmm0, xmm0, xmm0
    vorps	xmm1, xmm1, xmm2
    vmovaps	xmm7, xmm0
    vmulps	xmm0, xmm0, oword [_ps_sincos_p3]
    vaddps	xmm0, xmm0, oword [_ps_sincos_p2]
    vmulps	xmm0, xmm0, xmm7
    vaddps	xmm0, xmm0, oword [_ps_sincos_p1]
    vmulps	xmm0, xmm0, xmm7
    vaddps	xmm0, xmm0, oword [_ps_sincos_p0]
    vmulps	xmm0, xmm0, xmm1
    ret
    """

    asm = Tdasm()
    if proc.AVX:
        mc = asm.assemble(avx_code, True)
    else:
        mc = asm.assemble(asm_code, True)

    return mc
Example #51
0
            #END
        """
    else:
        code = """
            #DATA
            uint32 sa, da, n

            #CODE
            mov ecx, dword [n]
            mov esi, dword [sa] 
            mov edi, dword [da]
            rep movs byte [edi], byte [esi]

            #END
        """
    return code 

_mc = Tdasm().assemble(_memcpy_code())
_runtime = Runtime()
_data_section = _runtime.load("memcpy", _mc)

def memcpy(da, sa, n):
    """
        Copy n bytes form source address(sa) to destination address(da).
    """
    _data_section["da"] = da
    _data_section["sa"] = sa
    _data_section["n"] = n
    _runtime.run("memcpy")

Example #52
0
def sincos_ss():
    data = """
    #DATA
    uint32 _ps_am_inv_sign_mask[4] = 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF
    uint32 _ps_am_sign_mask[4] = 0x80000000, 0x80000000, 0x80000000, 0x80000000
    float _ps_am_pi_o_2[4] = 1.57079632679, 1.57079632679, 1.57079632679, 1.57079632679
    float _ps_am_2_o_pi[4] = 0.63661977236, 0.63661977236, 0.63661977236, 0.63661977236
    uint32 _epi32_1[4] = 1, 1, 1, 1
    float _ps_am_1[4] = 1.0, 1.0, 1.0, 1.0
    uint32 _epi32_2[4] = 2, 2, 2, 2

    float _ps_sincos_p3[4] = -0.00468175413, -0.00468175413, -0.00468175413, -0.00468175413
    float _ps_sincos_p2[4] = 0.0796926262, 0.0796926262, 0.0796926262, 0.0796926262
    float _ps_sincos_p1[4] = -0.64596409750621,-0.64596409750621,-0.64596409750621,-0.64596409750621 
    float _ps_sincos_p0[4] = 1.570796326794896, 1.570796326794896, 1.570796326794896, 1.570796326794896
    """
    asm_code = data + """
    #CODE
    global fast_sincos_ss:
    movaps	xmm7, xmm0
    movss	xmm1, dword [_ps_am_inv_sign_mask]
    movss	xmm2, dword [_ps_am_sign_mask]
    movss	xmm3, dword [_ps_am_2_o_pi]
    andps	xmm0, xmm1
    andps	xmm7, xmm2
    mulss	xmm0, xmm3

    pxor	xmm3, xmm3
    movd	xmm5, dword [_epi32_1]
    movss	xmm4, dword [_ps_am_1]

    cvttps2dq	xmm2, xmm0
    pand	xmm5, xmm2
    movd	xmm1, dword [_epi32_2]
    pcmpeqd	xmm5, xmm3
    movd	xmm3, dword [_epi32_1]
    cvtdq2ps	xmm6, xmm2
    paddd	xmm3, xmm2
    pand	xmm2, xmm1
    pand	xmm3, xmm1
    subss	xmm0, xmm6
    pslld	xmm2, 30
    minss	xmm0, xmm4
    ;mov		eax, [esp + 4 + 16]
    ;mov		edx, [esp + 4 + 16 + 4]
    subss	xmm4, xmm0
    pslld	xmm3, 30

    movaps	xmm6, xmm4
    xorps	xmm2, xmm7
    movaps	xmm7, xmm5
    andps	xmm6, xmm7
    andnps	xmm7, xmm0
    andps	xmm0, xmm5
    andnps	xmm5, xmm4
    movss	xmm4, dword [_ps_sincos_p3]
    orps	xmm6, xmm7
    orps	xmm0, xmm5
    movss	xmm5, dword [_ps_sincos_p2]

    movaps	xmm1, xmm0
    movaps	xmm7, xmm6
    mulss	xmm0, xmm0
    mulss	xmm6, xmm6
    orps	xmm1, xmm2
    orps	xmm7, xmm3
    movaps	xmm2, xmm0
    movaps	xmm3, xmm6
    mulss	xmm0, xmm4
    mulss	xmm6, xmm4
    movss	xmm4, dword [_ps_sincos_p1]
    addss	xmm0, xmm5
    addss	xmm6, xmm5
    movss	xmm5, dword [_ps_sincos_p0]
    mulss	xmm0, xmm2
    mulss	xmm6, xmm3
    addss	xmm0, xmm4
    addss	xmm6, xmm4
    mulss	xmm0, xmm2
    mulss	xmm6, xmm3
    addss	xmm0, xmm5
    addss	xmm6, xmm5
    mulss	xmm0, xmm1
    mulss	xmm6, xmm7

    ;use full stores since caller might reload with full loads
    ;movaps	[eax], xmm0
    ;movaps	[edx], xmm6

    ret	
    """

    avx_code = data + """
    #CODE
    global fast_sincos_ss:
    vmovaps	xmm7, xmm0
    vmovss	xmm1, dword [_ps_am_inv_sign_mask]
    vmovss	xmm2, dword [_ps_am_sign_mask]
    vmovss	xmm3, dword [_ps_am_2_o_pi]
    vandps	xmm0, xmm0, xmm1
    vandps	xmm7, xmm7, xmm2
    vmulss	xmm0, xmm0, xmm3

    vpxor	xmm3, xmm3, xmm3
    vmovd	xmm5, dword [_epi32_1]
    vmovss	xmm4, dword [_ps_am_1]

    vcvttps2dq	xmm2, xmm0
    vpand	xmm5, xmm5, xmm2
    vmovd	xmm1, dword [_epi32_2]
    vpcmpeqd	xmm5, xmm5, xmm3
    vmovd	xmm3, dword [_epi32_1]
    vcvtdq2ps	xmm6, xmm2
    vpaddd	xmm3, xmm3, xmm2
    vpand	xmm2, xmm2, xmm1
    vpand	xmm3, xmm3, xmm1
    vsubss	xmm0, xmm0, xmm6
    vpslld	xmm2, xmm2, 30
    vminss	xmm0, xmm0, xmm4
    ;mov		eax, [esp + 4 + 16]
    ;mov		edx, [esp + 4 + 16 + 4]
    vsubss	xmm4, xmm4, xmm0
    vpslld	xmm3, xmm3, 30

    vmovaps	xmm6, xmm4
    vxorps	xmm2, xmm2, xmm7
    vmovaps	xmm7, xmm5
    vandps	xmm6, xmm6, xmm7
    vandnps	xmm7, xmm7, xmm0
    vandps	xmm0, xmm0, xmm5
    vandnps	xmm5, xmm5, xmm4
    vmovss	xmm4, dword [_ps_sincos_p3]
    vorps	xmm6, xmm6, xmm7
    vorps	xmm0, xmm0, xmm5
    vmovss	xmm5, dword [_ps_sincos_p2]

    vmovaps	xmm1, xmm0
    vmovaps	xmm7, xmm6
    vmulss	xmm0, xmm0, xmm0
    vmulss	xmm6, xmm6, xmm6
    vorps	xmm1, xmm1, xmm2
    vorps	xmm7, xmm7, xmm3
    vmovaps	xmm2, xmm0
    vmovaps	xmm3, xmm6
    vmulss	xmm0, xmm0, xmm4
    vmulss	xmm6, xmm6, xmm4
    vmovss	xmm4, dword [_ps_sincos_p1]
    vaddss	xmm0, xmm0, xmm5
    vaddss	xmm6, xmm6, xmm5
    vmovss	xmm5, dword [_ps_sincos_p0]
    vmulss	xmm0, xmm0, xmm2
    vmulss	xmm6, xmm6, xmm3
    vaddss	xmm0, xmm0, xmm4
    vaddss	xmm6, xmm6, xmm4
    vmulss	xmm0, xmm0, xmm2
    vmulss	xmm6, xmm6, xmm3
    vaddss	xmm0, xmm0, xmm5
    vaddss	xmm6, xmm6, xmm5
    vmulss	xmm0, xmm0, xmm1
    vmulss	xmm6, xmm6, xmm7

    ;use full stores since caller might reload with full loads
    ;movaps	[eax], xmm0
    ;movaps	[edx], xmm6

    ret	
    """

    asm = Tdasm()
    if proc.AVX:
        mc = asm.assemble(avx_code, True)
    else:
        mc = asm.assemble(asm_code, True)
    
    return mc
Example #53
0
from tdasm import Tdasm
import renmas.core
from renmas.core import AsmStructures


asm = Tdasm()
AVX = asm.avx_supported()
AVX = False 

SSSE3 = asm.cpu["ssse3"]
#SSSE3 = False
SSE3 = asm.cpu["sse3"]
SSE41 = asm.cpu["sse41"]
#SSE41 = False
SSE2 = asm.cpu["sse2"]

def structs(*lst_structs):
    code = ""
    asm_structs = AsmStructures()

    for s in lst_structs:
        struct = asm_structs.get_struct(s)
        if struct is None:
           raise ValueError("Structure " + str(s) + " doesn't exist!")
        code += struct

    return code
    
assembler = None
Example #54
0
            code += lst_inst2[l] + "\n"
        for l in range(len(lst_inst2), len(lst_inst1)):
            code += lst_inst1[l] + "\n"

    return code

def arth128_32(tokens):
    return arth_mix(tokens, 128, 32)

def arth32_128(tokens):
    return arth_mix(tokens, 32, 128)

def arth128_128(tokens):
    return arth_mix(tokens, 128, 128)

def arth32_32(tokens):
    return arth_mix(tokens, 32, 32)

if __name__ == "__main__":
    asm = Tdasm()
    asm.register_macro("arth128", arth128)
    asm.register_macro("arth32", arth32)
    mc = asm.assemble(ASM_CODE)

    run = Runtime()
    ds = run.load("test", mc)
    run.run("test")

    print(ds["rez"])

Example #55
0
    if gamma < 0.0: return False

    if beta + gamma > 1.0: return False

    e3 = a * p - b * r + d * s
    t = e3 * inv_denom

    if t < 0.00001: return False  # self-intersection

    return (beta, gamma, t)


code = ray_triangle_intersection("ray_triangle_intersection")

asm = Tdasm()
mc = asm.assemble(code, True)

runtime = Runtime()
runtime.load('ray_triangle', mc)

# xmm3 - origin
# xmm4 - direction
# xmm5 - p0
# xmm6 - p1
# xmm7 - p2
# edx - min_distance

test_code = """
#DATA
float p0[4]