def D_asm(self, runtime):

        util.load_func(runtime, "fast_acos_ss", "fast_cos_ss", "fast_tan_ss", "fast_exp_ss")
        # eax pointer to hitpoint
        # ebx pointer to H (half vector)
        name = "DistBeck" + str(hash(self))
        ASM = """
        #DATA
        """
        ASM += "float " + name + "m \n"
        ASM += "float " + name + "minus_one = -1.0 \n"
        ASM += "float " + name + "alpha \n"
        ASM += "float " + name + "temp \n"
        ASM += "#CODE \n"
        ASM += "macro dot xmm0 = eax.hitpoint.normal * ebx \n"
        ASM += "call fast_acos_ss \n"
        ASM += "macro eq32 " + name + "alpha = xmm0 \n"
        ASM += "call fast_tan_ss \n"
        ASM += "macro eq32 xmm0 = xmm0 / " + name + "m \n"
        ASM += "macro eq32 xmm0 = xmm0 * xmm0 \n"
        ASM += "macro eq32 xmm0 = xmm0 * " + name + "minus_one \n"
        ASM += "call fast_exp_ss \n"
        ASM += "macro eq32 " + name + "temp = xmm0 \n"
        ASM += "macro eq32 xmm0 = " + name + "alpha \n"
        ASM += "call fast_cos_ss \n"
        ASM += "macro eq32 xmm0 = xmm0 * xmm0 \n"
        ASM += "macro eq32 xmm1 = xmm0 * xmm0 \n"
        ASM += "macro eq32 xmm1 = xmm1 * " + name + "m \n"
        ASM += "macro eq32 xmm1 = xmm1 * " + name + "m \n"
        ASM += "macro eq32 xmm0 = " + name + "temp \n"
        ASM += "macro eq32 xmm0 = xmm0 / xmm1 \n"

        return ASM
Exemple #2
0
    def D_asm(self, runtime):

        util.load_func(runtime, "fast_acos_ss", "fast_cos_ss", "fast_tan_ss",
                       "fast_exp_ss")
        #eax pointer to hitpoint
        #ebx pointer to H (half vector)
        name = "DistBeck" + str(hash(self))
        ASM = """
        #DATA
        """
        ASM += "float " + name + "m \n"
        ASM += "float " + name + "minus_one = -1.0 \n"
        ASM += "float " + name + "alpha \n"
        ASM += "float " + name + "temp \n"
        ASM += "#CODE \n"
        ASM += "macro dot xmm0 = eax.hitpoint.normal * ebx \n"
        ASM += "call fast_acos_ss \n"
        ASM += "macro eq32 " + name + "alpha = xmm0 \n"
        ASM += "call fast_tan_ss \n"
        ASM += "macro eq32 xmm0 = xmm0 / " + name + "m \n"
        ASM += "macro eq32 xmm0 = xmm0 * xmm0 \n"
        ASM += "macro eq32 xmm0 = xmm0 * " + name + "minus_one \n"
        ASM += "call fast_exp_ss \n"
        ASM += "macro eq32 " + name + "temp = xmm0 \n"
        ASM += "macro eq32 xmm0 = " + name + "alpha \n"
        ASM += "call fast_cos_ss \n"
        ASM += "macro eq32 xmm0 = xmm0 * xmm0 \n"
        ASM += "macro eq32 xmm1 = xmm0 * xmm0 \n"
        ASM += "macro eq32 xmm1 = xmm1 * " + name + "m \n"
        ASM += "macro eq32 xmm1 = xmm1 * " + name + "m \n"
        ASM += "macro eq32 xmm0 = " + name + "temp \n"
        ASM += "macro eq32 xmm0 = xmm0 / xmm1 \n"

        return ASM
Exemple #3
0
    def light_sample_asm(self, runtime):
        util.load_func(runtime, "random")
        #eax - pointer to hitpoint structure
        asm_structs = util.structs("hitpoint")
        ASM = """
        #DATA
        """
        ASM += asm_structs + """
        float normal[4]
        float edge_a[4]
        float edge_b[4]
        float point[4]
        float pdf
        uint32 hp_ptr
        #CODE
        mov dword [hp_ptr], eax
        call random
        macro eq128 xmm1 = xmm0
        macro broadcast xmm0 = xmm0[0] 
        macro broadcast xmm1 = xmm1[1]
        macro eq128 xmm0 = xmm0 * edge_a {xmm1}
        macro eq128 xmm1 = xmm1 * edge_b {xmm0}
        macro eq128 xmm0 = xmm0 + point {xmm1}
        macro eq128 xmm0 = xmm0 + xmm1 
        mov eax, dword [hp_ptr]
        macro eq128 eax.hitpoint.light_sample = xmm0
        macro eq128 eax.hitpoint.light_normal = normal
        macro eq32 eax.hitpoint.light_pdf = pdf
        ret
        
        """
        assembler = util.get_asm()
        mc = assembler.assemble(ASM, True)
        #mc.print_machine_code()
        name = "recangle_sample" + str(util.unique())

        self.ds = runtime.load(name, mc)
        self._populate_ds()

        #FIXME - add method to runtime class so we can ask runtime for address of module
        self.sample_ptr = runtime.modules[name][0]
Exemple #4
0
    def brdf_asm(self, runtime):

        util.load_func(runtime, "fast_pow_ss")
        # eax pointer to hitpoint
        name = "phong" + str(hash(self))

        ASM = """
        #DATA
        """
        ASM += "float " + name + "spectrum[4] \n"
        ASM += "float " + name + "k[4] \n"
        ASM += "float " + name + "zero_spectrum[4] = 0.0, 0.0, 0.0, 0.0 \n"
        ASM += "float " + name + "e\n"
        ASM += "float " + name + "two = 2.0 \n"
        ASM += "uint32 " + name + "hp_ptr \n"
        ASM += "#CODE \n"
        ASM += "mov dword [" + name + "hp_ptr], eax \n"
        ASM += "macro eq32 xmm0 = " + name + "two * eax.hitpoint.ndotwi \n"
        ASM += "macro broadcast xmm0 = xmm0[0] \n"
        ASM += "macro eq128 xmm0 = xmm0 * eax.hitpoint.normal\n"
        ASM += "macro eq128 xmm0 = xmm0 - eax.hitpoint.wi \n"
        ASM += "macro dot xmm0 = xmm0 * eax.hitpoint.wo \n"
        ASM += "macro if xmm0 > " + name + "zero_spectrum goto " + name + "accept \n"
        ASM += "macro eq128 xmm0 = " + name + "zero_spectrum \n"
        ASM += "jmp " + name + "end \n"

        ASM += name + "accept:\n"
        ASM += "macro eq32 xmm1 = " + name + "e\n"
        ASM += "call fast_pow_ss \n"
        ASM += "macro broadcast xmm0 = xmm0[0] \n"
        ASM += "macro eq128 xmm0 = xmm0 *" + name + "spectrum \n"
        ASM += "macro eq32 xmm1 = eax.hitpoint.ndotwi \n"
        ASM += "macro broadcast xmm1 = xmm1[0] \n"
        ASM += "macro eq128 xmm0 = xmm0 / xmm1 \n"
        if self.k is not None:
            ASM += "macro eq128 xmm0 = xmm0 * " + name + "k\n"

        ASM += name + "end: \n"

        return ASM
Exemple #5
0
    def brdf_asm(self, runtime):

        util.load_func(runtime, "fast_pow_ss")
        #eax pointer to hitpoint
        name = "phong" + str(hash(self))

        ASM = """
        #DATA
        """
        ASM += "float " + name + "spectrum[4] \n"
        ASM += "float " + name + "k[4] \n"
        ASM += "float " + name + "zero_spectrum[4] = 0.0, 0.0, 0.0, 0.0 \n"
        ASM += "float " + name + "e\n"
        ASM += "float " + name + "two = 2.0 \n"
        ASM += "uint32 " + name + "hp_ptr \n"
        ASM += "#CODE \n"
        ASM += "mov dword [" + name + "hp_ptr], eax \n"
        ASM += "macro eq32 xmm0 = " + name + "two * eax.hitpoint.ndotwi \n"
        ASM += "macro broadcast xmm0 = xmm0[0] \n"
        ASM += "macro eq128 xmm0 = xmm0 * eax.hitpoint.normal\n"
        ASM += "macro eq128 xmm0 = xmm0 - eax.hitpoint.wi \n"
        ASM += "macro dot xmm0 = xmm0 * eax.hitpoint.wo \n"
        ASM += "macro if xmm0 > " + name + "zero_spectrum goto " + name + "accept \n"
        ASM += "macro eq128 xmm0 = " + name + "zero_spectrum \n"
        ASM += "jmp " + name + "end \n"

        ASM += name + "accept:\n"
        ASM += "macro eq32 xmm1 = " + name + "e\n"
        ASM += "call fast_pow_ss \n"
        ASM += "macro broadcast xmm0 = xmm0[0] \n"
        ASM += "macro eq128 xmm0 = xmm0 *" + name + "spectrum \n"
        ASM += "macro eq32 xmm1 = eax.hitpoint.ndotwi \n"
        ASM += "macro broadcast xmm1 = xmm1[0] \n"
        ASM += "macro eq128 xmm0 = xmm0 / xmm1 \n"
        if self.k is not None:
            ASM += "macro eq128 xmm0 = xmm0 * " + name + "k\n"

        ASM += name + "end: \n"

        return ASM
Exemple #6
0
    def brdf_asm(self, runtime):

        #eax pointer to hitpoint
        name = "oren" + str(hash(self))

        util.load_func(runtime, "fast_acos_ps", "fast_sin_ss", "fast_tan_ss")

        ASM = """
        #DATA
        """
        ASM += "float " + name + "spectrum[4] \n"
        ASM += "float " + name + "k[4] \n"
        ASM += "float " + name + "zero[4] = 0.0, 0.0, 0.0, 0.0 \n"
        ASM += "float " + name + "A \n"
        ASM += "float " + name + "B \n"
        ASM += "float " + name + "alpha \n"
        ASM += "float " + name + "beta \n"
        ASM += "uint32 " + name + "hp_ptr \n"
        ASM += "#CODE \n"
        ASM += "mov dword [" + name + "hp_ptr], eax \n"
        ASM += "macro dot xmm0 = eax.hitpoint.normal * eax.hitpoint.wo \n"
        ASM += "macro eq32 xmm1 = eax.hitpoint.ndotwi \n"
        ASM += "movlhps xmm0, xmm1 \n"
        ASM += "call fast_acos_ps \n"
        ASM += "movhlps xmm1, xmm0 \n"
        ASM += "macro eq32 xmm2 = xmm0 \n"
        ASM += "macro eq32 xmm3 = xmm1 \n"
        ASM += "minss xmm0, xmm1 \n"  # _beta
        ASM += "maxss xmm2, xmm3 \n"  # _alpha
        ASM += "macro eq32 " + name + "alpha = xmm2 \n"
        ASM += "call fast_tan_ss \n"
        ASM += "macro eq32 " + name + "beta = xmm0 \n"
        ASM += "macro eq32 xmm0 = " + name + "alpha \n"
        ASM += "call fast_sin_ss \n"
        ASM += "macro eq32 xmm0 = xmm0 * " + name + "beta \n"
        ASM += "macro eq32 " + name + "alpha = xmm0 \n"  # sin(alpha) * tan(beta)
        ASM += "mov eax, dword [" + name + "hp_ptr]\n"
        ASM += "macro dot xmm0 = eax.hitpoint.normal * eax.hitpoint.wo \n"
        ASM += "macro broadcast xmm0 = xmm0[0] \n"
        ASM += "macro eq128 xmm0 = xmm0 * eax.hitpoint.normal \n"
        ASM += "macro eq128 xmm1 = eax.hitpoint.wo \n"
        ASM += "macro eq128 xmm1 = xmm1 - xmm0 \n"
        ASM += util.normalization("xmm1", "xmm4", "xmm5")  #v1
        ASM += "macro eq32 xmm2 = eax.hitpoint.ndotwi \n"
        ASM += "macro broadcast xmm2 = xmm2[0] \n"
        ASM += "macro eq128 xmm2 = xmm2 * eax.hitpoint.normal {xmm1} \n"
        ASM += "macro eq128 xmm3 = eax.hitpoint.wi \n"
        ASM += "macro eq128 xmm3 = xmm3 - xmm2 {xmm1} \n"
        ASM += util.normalization("xmm3", "xmm5", "xmm6")  #v2
        ASM += "macro dot xmm3 = xmm3 * xmm1 \n"
        ASM += "maxss xmm3, dword [" + name + "zero] \n"
        ASM += "macro eq32 xmm3 = xmm3 * " + name + "alpha \n"
        ASM += "macro eq32 xmm3 = xmm3 * " + name + "B \n"
        ASM += "macro eq32 xmm3 = xmm3 + " + name + "A \n"
        ASM += "macro broadcast xmm0 = xmm3[0] \n"
        ASM += "macro eq128 xmm0 = xmm0 * " + name + "spectrum \n"

        if self.k is not None:
            ASM += "macro eq128 xmm0 = xmm0 * " + name + "k\n"

        return ASM
Exemple #7
0
    def get_sample_asm(self, runtime):
        
        # eax - pointer to hitpoint
        asm_structs = renmas.utils.structs("hitpoint")
        util.load_func(runtime, "random")
        util.load_func(runtime, "fast_sincos_ps")

        if util.AVX:
            line1 = "vmovss xmm1, dword [ecx + 4*ebx] \n"
        else:
            line1 = "movss xmm1, dword [ecx + 4*ebx] \n"


        ASM = """ 
        #DATA
        """
        ASM += asm_structs + """
            float ie[4]
            float pi[4] = 3.14159265359, 3.14159265359, 3.14159265359, 3.14159265359
            float one[4] = 1.0, 1.0, 1.0, 1.0
            float two[4] = 2.0, 2.0, 2.0, 2.0
            float tvector[4] = 0.0034, 1.0, 0.0071, 0.0

            float pu[4]
            float pv[4]
            float pw[4]

            uint32 ptr_hp
            uint32 idx = 0

            #CODE
        """
        ASM += """
            mov dword [ptr_hp], eax
            sub dword [idx], 1
            js _calculate_samples
            _gen_direction:
            mov eax, dword [ptr_hp]
            macro eq128 xmm1 = eax.hitpoint.normal
            macro eq128 xmm7 = xmm1
            macro eq128 xmm0 = tvector
            
            """
        ASM += util.cross_product("xmm0", "xmm1", "xmm2", "xmm3") 
        ASM += util.normalization("xmm0", "xmm1", "xmm2") + """
            macro eq128 xmm1 = xmm7
            macro eq128 xmm6 = xmm0
        """
        ASM += util.cross_product("xmm0", "xmm1", "xmm2", "xmm3") 
        ASM += """
            mov ebx, dword [idx]
            mov ecx, pu
            ; in line we load pu, pv or pw
        """
        ASM += line1 + """
            macro broadcast xmm1 = xmm1[0]
            macro eq128 xmm0 = xmm0 * xmm1 {xmm6, xmm7}
            mov ecx, pv
        """
        ASM += line1 + """
            macro broadcast xmm1 = xmm1[0]
            macro eq128 xmm6 = xmm6 * xmm1 {xmm0, xmm7}
            macro eq128 xmm0 = xmm0 + xmm6
            mov ecx, pw
        """
        ASM += line1 + """
            macro broadcast xmm1 = xmm1[0]
            macro eq128 xmm1 = xmm1 * xmm7 {xmm0}
            macro eq128 xmm0 = xmm0 + xmm1 {xmm7}
        """
        ASM += util.normalization("xmm0", "xmm1", "xmm2") + """
            macro eq128 eax.hitpoint.wi = xmm0  
            macro dot xmm0 = xmm0 * xmm7
            macro eq32 eax.hitpoint.ndotwi = xmm0

            ret

            _calculate_samples:
            call random 

        """
        if self.e == 1:
            if util.AVX:
                ASM += "vsqrtps xmm0, xmm0 \n"
            else:
                ASM += "sqrtps xmm0, xmm0 \n"
        else:
            util.load_func(runtime, "fast_pow_ps")
            ASM += "macro eq128 xmm1 = ie \n" 
            ASM += "call fast_pow_ps \n"

        ASM += """
            macro eq128 pw = xmm0
            macro eq128 xmm0 = xmm0 * xmm0
            macro eq128 xmm1 = one - xmm0
            """
        if util.AVX:
            ASM += "vsqrtps xmm0, xmm1 \n"
        else:
            ASM += "sqrtps xmm0, xmm1 \n"

        ASM += """
            macro eq128 pu = xmm0
            macro eq128 pv = xmm0

            call random 

            macro eq128 xmm0 = xmm0 * pi
            macro eq128 xmm0 = xmm0 * two
            call fast_sincos_ps
            macro eq128 xmm0 = xmm0 * pv {xmm6}
            macro eq128 xmm6 = xmm6 * pu {xmm0}

            macro eq128 pv = xmm0
            macro eq128 pu = xmm6 
            mov dword [idx], 3
            jmp _gen_direction 
        """
        
        assembler = util.get_asm()
        mc = assembler.assemble(ASM, True)
        #mc.print_machine_code()
        name = "brdf_hemisphere" + str(util.unique())
        self.ds = runtime.load(name, mc)

        self.ds["ie"] = (self.ie, self.ie, self.ie, self.ie)

        #FIXME - add method to runtime class so we can ask runtime for address of module
        self.func_ptr = runtime.modules[name][0]
Exemple #8
0
    def _ray_tri_asm(self, runtime, label):

        util.load_func(runtime, "ray_triangle_mesh")

        asm_structs = util.structs("ray", "hitpoint")

        ASM = """
        #DATA
        """
        ASM += asm_structs + """
        float min_dist = 999999.0
        float max_dist = 999999.0
        float zero = 0.0
        float one = 1.0
        float epsilon = 0.00001
        float minus_nesto = 0.0001

        ; pointer to vertex and triangle buffers
        uint32 vb_ptr
        uint32 tr_ptr
        uint32 vertices_size
        uint32 triangle_size

        #CODE
        """
        ASM += " global " + label + ":\n" + """
        ; eax - ray,  ebx - hp, ecx - min_dist, esi - ptr_arr, edi - nobj
        ; 64-bit version will bi i little different beacuse of different size of array
        macro eq32 min_dist = max_dist + one
        mov ecx, min_dist

        push ecx
        push eax
        push ebx
        push esi
        push edi
        mov edx, dword [minus_nesto]
        mov dword [ebx + hitpoint.t], edx

        _objects_loop:
        mov eax, dword [esp + 12] ; address of ray
        mov ecx, dword [esp + 16] ; address of minimum distance
        mov edx, dword [esp + 8]  ; address of hitpoint
        mov esi, dword [esp + 4] ; array of indexes of triangles 

        mov ebx, dword [esi]  ; put index of triangle in ebx

        ; prepeare call - address of  parameters
        ;addres of points, normal a value of material index
        ;addr = self.address.ptr() + index * self.tri_size 
        
        imul ebx, dword [triangle_size]
        add ebx, dword [tr_ptr]
        ; trbuffer tr_ptr=v0, tr_ptr+4=v1, tr_ptr+8=v2, tr_ptr+12=mat_idx , tr_ptr+16=normal
        mov ebp, dword [ebx + 12]
        push ebp

        mov ebp, ebx
        add ebp, 16
        push ebp

        mov ebp, dword [ebx + 8] 
        imul ebp, dword [vertices_size]
        add ebp, dword [vb_ptr]
        push ebp
        
        mov ebp, dword [ebx + 4] 
        imul ebp, dword [vertices_size]
        add ebp, dword [vb_ptr]
        push ebp
        mov ebp, dword [ebx] 
        imul ebp, dword [vertices_size]
        add ebp, dword [vb_ptr]
        push ebp
        
        call ray_triangle_mesh
        add esp, 20

        cmp eax, 0  ; 0 - no intersection ocur 1 - intersection ocur
        jne _update_distance
        _next_object:
        sub dword [esp], 1  
        jz _end_objects
        add dword [esp + 4], 4  ;increment array by 4 - index of triangle
        jmp _objects_loop


        _update_distance:
        mov eax, dword [esp + 8]
        mov ebx, dword [eax + hitpoint.t]

        mov edx, dword [esp + 16] ;populate new minimum distance
        mov dword [edx], ebx
        jmp _next_object
        
        _end_objects:
        add esp, 20 
        macro eq32 xmm0 = min_dist
        macro if xmm0 < max_dist goto _accept
        mov eax, 0
        ret

        _accept:
        macro if xmm0 < epsilon goto _reject
        mov eax, 1
        ret

        _reject:
        mov eax, 0
        ret

        """

        asm = util.get_asm()
        mc = asm.assemble(ASM, True)
        #mc.print_machine_code()
        name = "ray_tri_intersection" + str(util.unique())
        self.ds = runtime.load(name, mc)

        self.ds['vertices_size'] = self.vertex_buffer.vsize() 
        self.ds['triangle_size'] = self.triangles.tsize()
        self.ds['vb_ptr'] = self.vertex_buffer.addr()
        self.ds['tr_ptr'] = self.triangles.addr()
Exemple #9
0
    def brdf_asm(self, runtime):
        
        #eax pointer to hitpoint
        name = "oren" + str(hash(self))

        util.load_func(runtime, "fast_acos_ps", "fast_sin_ss", "fast_tan_ss")

        ASM = """
        #DATA
        """
        ASM += "float " + name + "spectrum[4] \n" 
        ASM += "float " + name + "k[4] \n"
        ASM += "float " + name + "zero[4] = 0.0, 0.0, 0.0, 0.0 \n"
        ASM += "float " + name + "A \n"
        ASM += "float " + name + "B \n"
        ASM += "float " + name + "alpha \n"
        ASM += "float " + name + "beta \n"
        ASM += "uint32 " + name + "hp_ptr \n"
        ASM += "#CODE \n"
        ASM += "mov dword [" + name + "hp_ptr], eax \n"
        ASM += "macro dot xmm0 = eax.hitpoint.normal * eax.hitpoint.wo \n"
        ASM += "macro eq32 xmm1 = eax.hitpoint.ndotwi \n"
        ASM += "movlhps xmm0, xmm1 \n"
        ASM += "call fast_acos_ps \n"
        ASM += "movhlps xmm1, xmm0 \n"
        ASM += "macro eq32 xmm2 = xmm0 \n"
        ASM += "macro eq32 xmm3 = xmm1 \n"
        ASM += "minss xmm0, xmm1 \n"  # _beta
        ASM += "maxss xmm2, xmm3 \n" # _alpha
        ASM += "macro eq32 " + name + "alpha = xmm2 \n"
        ASM += "call fast_tan_ss \n"
        ASM += "macro eq32 " + name + "beta = xmm0 \n"
        ASM += "macro eq32 xmm0 = " + name + "alpha \n" 
        ASM += "call fast_sin_ss \n"
        ASM += "macro eq32 xmm0 = xmm0 * " + name + "beta \n"
        ASM += "macro eq32 " + name + "alpha = xmm0 \n" # sin(alpha) * tan(beta)  
        ASM += "mov eax, dword [" + name + "hp_ptr]\n"
        ASM += "macro dot xmm0 = eax.hitpoint.normal * eax.hitpoint.wo \n"
        ASM += "macro broadcast xmm0 = xmm0[0] \n"
        ASM += "macro eq128 xmm0 = xmm0 * eax.hitpoint.normal \n"
        ASM += "macro eq128 xmm1 = eax.hitpoint.wo \n"
        ASM += "macro eq128 xmm1 = xmm1 - xmm0 \n"
        ASM += util.normalization("xmm1", "xmm4", "xmm5") #v1
        ASM += "macro eq32 xmm2 = eax.hitpoint.ndotwi \n"
        ASM += "macro broadcast xmm2 = xmm2[0] \n"
        ASM += "macro eq128 xmm2 = xmm2 * eax.hitpoint.normal {xmm1} \n"
        ASM += "macro eq128 xmm3 = eax.hitpoint.wi \n"
        ASM += "macro eq128 xmm3 = xmm3 - xmm2 {xmm1} \n"
        ASM += util.normalization("xmm3", "xmm5", "xmm6") #v2
        ASM += "macro dot xmm3 = xmm3 * xmm1 \n"
        ASM += "maxss xmm3, dword [" + name + "zero] \n"
        ASM += "macro eq32 xmm3 = xmm3 * " + name + "alpha \n"
        ASM += "macro eq32 xmm3 = xmm3 * " + name + "B \n"
        ASM += "macro eq32 xmm3 = xmm3 + " + name + "A \n"
        ASM += "macro broadcast xmm0 = xmm3[0] \n"
        ASM += "macro eq128 xmm0 = xmm0 * " + name + "spectrum \n"

        if self.k is not None:
            ASM += "macro eq128 xmm0 = xmm0 * " + name + "k\n"

        return ASM
Exemple #10
0
    def get_sample_asm(self, runtime, label):
        # eax - pointer to sample structure
        util.load_func(runtime, "random")

        if util.AVX:
            line1 = "vcvtdq2ps xmm4, xmm4 \n"
        else:
            line1 = "cvtdq2ps xmm4, xmm4 \n"

        asm_structs = util.structs("sample")
        code = """
            #DATA
        """
        code += asm_structs + """
            uint32 n, curn
            uint32 tile_endx, tile_endy
            uint32 tilex, tiley
            uint32 cur_xyxy[4] ; we just use first two numbers for now
            float pixel_size[4]
            float w2h2[4]

            #CODE
        """
        code += " global " + label + ":\n" + """
            cmp dword [curn], 0
            jbe _next_pixel

            ; calculate sample
            call random
            ; random number is in xmm0
            macro eq128 xmm4 = cur_xyxy {xmm0}
            """
        code += line1 + """
            macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 {xmm0}
            macro eq128 xmm3 = xmm4 + xmm2 {xmm1, xmm0}
            macro eq128 xmm3 = xmm3 + xmm0 {xmm1}
            mov ebx, dword [cur_xyxy]
            mov ecx, dword [cur_xyxy + 4]
            macro eq128 eax.sample.xyxy = xmm3 * xmm1
            mov dword [eax + sample.ix] ,ebx
            mov dword [eax + sample.iy] ,ecx
            sub dword [curn], 1
            mov eax,  1 
            ret
            
            
            _next_pixel:
            mov edx, dword [n] ; self.curn = self.n - 1
            sub edx, 1
            mov dword [curn], edx

            mov ebx, dword [cur_xyxy]
            cmp ebx, dword [tile_endx]
            je _checky
            ; increase curx 
            add ebx, 1
            mov dword [cur_xyxy], ebx
            ; calculate sample
           
            call random
            macro eq128 xmm4 = cur_xyxy {xmm0}
        """
        code += line1 + """
            macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 {xmm0}
            macro eq128 xmm3 = xmm4 + xmm2 {xmm1, xmm0}
            macro eq128 xmm3 = xmm3 + xmm0 {xmm1}
            mov ecx, dword [cur_xyxy + 4]
            macro eq128 eax.sample.xyxy = xmm3 * xmm1
            mov dword [eax + sample.ix] ,ebx
            mov dword [eax + sample.iy] ,ecx
            mov eax, 1 
            ret

            _checky:
            mov ecx, dword [cur_xyxy + 4]
            cmp ecx, dword [tile_endy]
            je _end_sampling
            ; increase cury
            add ecx, 1
            mov ebx, dword [tilex]
            mov dword [cur_xyxy+ 4], ecx 
            mov dword [cur_xyxy], ebx

            call random
            macro eq128 xmm4 = cur_xyxy {xmm0}
        """
        code += line1 + """
            macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 {xmm0}
            macro eq128 xmm3 = xmm4 + xmm2 {xmm1, xmm0}
            macro eq128 xmm3 = xmm3 + xmm0 {xmm1}
            macro eq128 eax.sample.xyxy = xmm3 * xmm1
            mov dword [eax + sample.ix] ,ebx
            mov dword [eax + sample.iy] ,ecx
            mov eax, 1 
            ret

            _end_sampling:
            xor eax, eax 
            ret

        """
        assembler = util.get_asm()
        mc = assembler.assemble(code, True)
        #mc.print_machine_code()
        name = "get_sample" + str(util.unique())
        self.ds = runtime.load(name, mc)
        self._populate_ds()
        return True
Exemple #11
0
    def get_sample_asm(self, runtime, label):
        # eax - pointer to sample structure
        util.load_func(runtime, "random")

        if util.AVX:
            line1 = "vcvtdq2ps xmm4, xmm4 \n"
        else:
            line1 = "cvtdq2ps xmm4, xmm4 \n"

        asm_structs = util.structs("sample")
        code = """
            #DATA
        """
        code += asm_structs + """
            uint32 n, curn
            uint32 tile_endx, tile_endy
            uint32 tilex, tiley
            uint32 cur_xyxy[4] ; we just use first two numbers for now
            float pixel_size[4]
            float w2h2[4]

            #CODE
        """
        code += " global " + label + ":\n" + """
            cmp dword [curn], 0
            jbe _next_pixel

            ; calculate sample
            call random
            ; random number is in xmm0
            macro eq128 xmm4 = cur_xyxy {xmm0}
            """
        code += line1 + """
            macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 {xmm0}
            macro eq128 xmm3 = xmm4 + xmm2 {xmm1, xmm0}
            macro eq128 xmm3 = xmm3 + xmm0 {xmm1}
            mov ebx, dword [cur_xyxy]
            mov ecx, dword [cur_xyxy + 4]
            macro eq128 eax.sample.xyxy = xmm3 * xmm1
            mov dword [eax + sample.ix] ,ebx
            mov dword [eax + sample.iy] ,ecx
            sub dword [curn], 1
            mov eax,  1 
            ret
            
            
            _next_pixel:
            mov edx, dword [n] ; self.curn = self.n - 1
            sub edx, 1
            mov dword [curn], edx

            mov ebx, dword [cur_xyxy]
            cmp ebx, dword [tile_endx]
            je _checky
            ; increase curx 
            add ebx, 1
            mov dword [cur_xyxy], ebx
            ; calculate sample
           
            call random
            macro eq128 xmm4 = cur_xyxy {xmm0}
        """
        code += line1 + """
            macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 {xmm0}
            macro eq128 xmm3 = xmm4 + xmm2 {xmm1, xmm0}
            macro eq128 xmm3 = xmm3 + xmm0 {xmm1}
            mov ecx, dword [cur_xyxy + 4]
            macro eq128 eax.sample.xyxy = xmm3 * xmm1
            mov dword [eax + sample.ix] ,ebx
            mov dword [eax + sample.iy] ,ecx
            mov eax, 1 
            ret

            _checky:
            mov ecx, dword [cur_xyxy + 4]
            cmp ecx, dword [tile_endy]
            je _end_sampling
            ; increase cury
            add ecx, 1
            mov ebx, dword [tilex]
            mov dword [cur_xyxy+ 4], ecx 
            mov dword [cur_xyxy], ebx

            call random
            macro eq128 xmm4 = cur_xyxy {xmm0}
        """
        code += line1 + """
            macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 {xmm0}
            macro eq128 xmm3 = xmm4 + xmm2 {xmm1, xmm0}
            macro eq128 xmm3 = xmm3 + xmm0 {xmm1}
            macro eq128 eax.sample.xyxy = xmm3 * xmm1
            mov dword [eax + sample.ix] ,ebx
            mov dword [eax + sample.iy] ,ecx
            mov eax, 1 
            ret

            _end_sampling:
            xor eax, eax 
            ret

        """
        assembler = util.get_asm()
        mc = assembler.assemble(code, True)
        #mc.print_machine_code()
        name = "get_sample" + str(util.unique())
        self.ds = runtime.load(name, mc)
        self._populate_ds()
        return True